private void ShowStatistics_Click(object sender, RoutedEventArgs e)
{
HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
HtmlWeb hw = new HtmlWeb();
doc = hw.Load("http://www.gamerankings.com/browse.html");
HtmlNodeCollection nodes= doc.DocumentNode.SelectNodes("//a/");
string result = "";
foreach (var item in nodes)
{
result += item.InnerText+Environment.NewLine;
}
Info.ItemsSource = result;
}
By pressing the button i want to get information from the webpage in a textbox called Info.
After pressing the button I get an exception saying that the result of expression should be NodeSet, what should I do? I'm using agility pack
Your XPATH is wrong. You can use this instead if you want to get all hyperlink elements
var nodes = doc.DocumentNode.Descendants("a");
In addition to #Hung Cao, you can actually shorten this/work around:
foreach (HtmlAgilityPack.HtmlNode node in doc.DocumentNode.SelectNodes("Selector here")){
//your code here
}
Related
Here is my C# code what i am trying to do is to scrape data from a website by using HtmlAgilityPack but it's showing nothing found every time don't know what i am doing wrong a bit confused
HtmlAgilityPack.HtmlWeb webb = new HtmlAgilityPack.HtmlWeb();
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
HtmlAgilityPack.HtmlDocument doc = webb.Load("mywebsite");
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//ul[#class='unstyled']//li//a");
if (nodes != null)
{
foreach (HtmlNode n in nodes)
{
q = n.InnerText;
q = System.Net.WebUtility.HtmlDecode(q);
q = q.Trim();
Console.WriteLine(q);
}
}
else
{
Console.WriteLine("nothing found");
}
Here is the picture of the tag from which i am trying to capture data i need data from <a> tag .
The XPath used to select the tag is incorrect.
HtmlNodeCollection nodes =
doc.DocumentNode.SelectNodes("//ul[#class='unstyled']/li/a");
This should select all the anchor nodes and then you can loop through the nodes to get the InnerHtml.
Working sample shown below
string s = "<ul class='unstyle no-overflow'><li><ul class='unstyled'><li><a href='http://www.smsconnexion.com'>SMS ConneXion</a></li></ul><ul class='unstyled'><li><a href='http://www.celusion.com'>Celusion</a></li></ul></li></ul>";
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(s);
HtmlNodeCollection nodes =
doc.DocumentNode.SelectNodes("//ul[#class='unstyled']/li/a");
foreach(var node in nodes)
{
Console.WriteLine(node.Attributes["href"].Value);
}
Console.ReadLine();
I want to extract not the whole web-page but only text from one class, I want text from td class="result-neutral" and I don't know what is wrong with this code:
<td class="result-neutral" xseid="xz1nBfht">3 - 2 </td>
And this is C# code:
HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
HtmlWeb hw = new HtmlWeb();
doc = hw.Load("htt
var scoreNodes = doc.DocumentNode.Descendants("td").Where(d =>d.Attributes.Contains("class")&&d.Attributes["class"].Value.Contains("result-neutral"));
foreach (var item in scoreNodes)
{
result += item.OuterHtml + Environment.NewLine;
}
Info.Text = result;
}
The OuterHtml returns html with start & end of the element. Don't you want InnerHtml or InnerText?
EDIT:
This snippet works for me:
const string html = #"<html><body><table><tr><td class='result-neutral' xseid='xz1nBfht'><a href='/hockey/russia/khl/ska-st-petersburg-metallurg-magnitogorsk-xz1nBfht/'>3 - 2</a></td></tr></table></body></html>";
var doc = new HtmlDocument();
doc.LoadHtml(html);
var scoreNodes = doc.DocumentNode.Descendants("td").Where(d => d.Attributes.Contains("class") && d.Attributes["class"].Value.Contains("result-neutral"));
string result = "";
foreach (var item in scoreNodes) {
result += item.InnerText + Environment.NewLine;
}
result = result.TrimEnd(); // the result is "3-2"
<div class="form-field wide-80 normal">1997-09-15</div>
I am trying to select the date inside it 1997-09-15. I tried this code but its giving an error of "Xpath Exception was Unhandled" what's wrong in the code please Help
string Url = "http://whois.domaintools.com/google.com";
HtmlWeb web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = web.Load(Url);
var SpanNodes = doc.DocumentNode.SelectNodes("//div[#class=form-field wide-80 normal]");
if (SpanNodes != null)
{
foreach (HtmlNode SN in SpanNodes)
{
string text = SN.FirstChild.InnerText.Trim();
MessageBox.Show(text);
}
}
You forget 's
var SpanNodes =
doc.DocumentNode.SelectNodes("//div[#class='form-field wide-80 normal']");
In a webpage there are several nodes having class='inner'. But i need to the 3rd node having class='inner'. If i use
string x = textBox1.Text;
string q = "";
HtmlWeb web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = web.Load("myweb_link" + x);
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//div[#class='inner']");
if (nodes != null)
{
foreach (HtmlNode n in nodes)
{
q = n.InnerText;
q = System.Net.WebUtility.HtmlDecode(q);
q = q.Trim();
MessageBox.Show(q);
}
}
else
MessageBox.Show("nothing found ");
it gives me all the nodes having class='inner'. i also know that.
But i want only the 3rd node. How can i get that???
Get the third node from the nodes variable using the indexer:
var thirdNode = nodes[2];
There are 5 STRONG Tags inside my SPAN Tag from my Html document.
I want to know how to get the text from the first STRONG Tag inside the SPAN TAG?
Here is my code so far.
var web = new HtmlWeb();
var doc = web.Load(url);
var nodes = doc.DocumentNode.SelectNodes("//span[#class='advisory_link']/strong");
foreach (var node in nodes)
{
richTextBox1.Text = node.InnerHtml;
}
var nodes = doc.DocumentNode.SelectNodes("//span[#class='advisory_link']//strong[1]");
if (nodes != null)
{
foreach (var node in nodes)
{
string Description = node.InnerHtml;
return Description;
}
}
return null;