I am trying to replace the inner HTML of a specific div. Which shows up through the Gecko browser and view-source, but when the collection of elements is sent to a text box, the div is nowhere to be found. Here are some of the methods I have tried.. Any help is greatly appreciated..
//NODES
GeckoNodeCollection nodes2 = mainbrowser.Document.GetElementsByClassName("*");
foreach (GeckoNode node in nodes2)
{
GeckoElement element3 = node as GeckoElement;
}
GeckoNodeCollection nod = mainbrowser.Document.GetElementsByClassName("minin-class");
foreach (GeckoNode node in nod)
{
if (NodeType.Element == node.NodeType)
{
try
{
GeckoElement ele = (GeckoElement)node;
}
catch (Exception ex)
{
string ep = ex.ToString();
GeckoHtmlElement ele = (GeckoHtmlElement)node;
}
}
}
//ELEMENT COLLECTION
GeckoElementCollection element = mainbrowser.Document.GetElementsByTagName("div");
foreach (GeckoHtmlElement curelement in element) {
if (curelement.GetAttribute("id") == "minin") {
curelement.InnerHtml = ("hello");
}
}
//DOM
Gecko.DOM.GeckoLinkElement element6 = new Gecko.DOM.GeckoLinkElement(mainbrowser.Document.GetElementById("minin").DomObject);
element6.InnerHtml = "Hello";
//XPATH-ATTEMPT
//var r = mainbrowser.Document.EvaluateXPath("//*[#id='minin']");
//Assert.AreEqual(1, r.GetNodes().Count());
//GeckoElementCollection nodes = mainbrowser.Document.EvaluateXPath(x.ToString()).GetNodes();
//foreach (GeckoNode node in nodes)
//{
// GeckoElement element4 = node as GeckoElement;
//}
Of course it can be done, and it is also very easy:
foreach(var n in mainbrowser.Document.Body.GetElementsByTagName("div"))
{
GeckoHtmlElement hnode = n as GeckoHtmlElement;
if(hnode.Id == "minin")
{
//suppose this DOM: <div id="minin"><span> something </span></div>
hnode.InnerHtml = "<div><h2>hi!</h2></div>";
//outerHtml = "<div id="minin"><div><h2>hi!</h2></div></div>"
hnode.TextContent = "hi";
// outerHtml = "<div id="minin">hi</div>"
hnode.SetAttribute("style", "color: red;");
//outerHtml = "<div style="color: red;" id="minin">hi</div>";
}
}
Maybe you like it with javascript
using (AutoJSContext java = new AutoJSContext(geckoWebBrowser1.Window))
{
string value = "hello there";
string outp = "";
java.EvaluateScript("document.getElementById('minin').innerText ='" + value +"'", out outp);
}
Note: the geckoHtmlElement class has the Id property that has the value of the id attribute of the html tag
<tagName id = "myid">
I don't think it's possible so using selenium web driver instead.
Related
Im working on getting some values from an RSS feed but i am having difficulties getting a value which has the namespace in the element tag. I've tried adding the namespace to the lookup of the value but i always get null
Any idea on how this is achieved?
Feed
https://wegotthiscovered.com/movies/feed/
Element
xmlns:content="http://purl.org/rss/1.0/modules/content/"
Namespace
content:encoded
public async Task<bool> GetNewsFeeds()
{
Database db = new Database();
Dictionary<string, string> dictionary = new Dictionary<string, string>();
dictionary.Add("https://wegotthiscovered.com/movies/feed/", "Movie");
dictionary.Add("https://wegotthiscovered.com/blu-ray/feed/", "Blu-ray");
dictionary.Add("https://wegotthiscovered.com/reviews/feed/", "Reviews");
dictionary.Add("https://wegotthiscovered.com/featured/feed/", "Featured");
dictionary.Add("https://wegotthiscovered.com/galleries/feed/", "Galleries");
db.DeletMovieNews();
foreach (var pair in dictionary.ToList())
{
try
{
if (PhysicalDevice.HasInternetConnection())
{
XDocument doc = XDocument.Load(pair.Key);
XNamespace nsSys = "http://purl.org/rss/1.0/modules/content/";
var entries = (from item in doc.Descendants("item")
select new Movie_News
{
Content = item.Element(nsSys + "encoded").Value, // ISSUE HERE
Link = item.Element("link").Value,
PublishedDate = item.Element("pubDate").Value,
Title = item.Element("title").Value,
Description = item.Element("description").Value,
GroupName = "News",
FeedName = pair.Value
});
List<Movie_News> newsCollection = entries.ToList();
if (newsCollection.Count() != 0)
{
using (var rateGate = new RateGate(40, TimeSpan.FromSeconds(10)))
{
rateGate.WaitToProceed();
foreach (Movie_News item in newsCollection)
{
string regex = #"((http|ftp|https):\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,#?^=%&:\/~+#-]*[\w#?^=%&\/~+#-])?";
Match match = Regex.Match(item.Description, regex);
if (match.Success)
{
item.ImageUrl = match.Value;
item.B64Image = await DownloadImage(item.ImageUrl);
}
item.Description = item.Description.Remove(0, item.Description.IndexOf("</div>"));
item.Description = item.Description.Replace("</div>","");
db.InsertNewsData(item);
}
}
}
return true;
}
}
catch(Exception ex)
{
return false;
}
}
return true;
}
}
Typical , soon as i completed the write up, its working now
I have an autocompleteextender which works very fine when i set its innerHtml to text received from the webservice a i can catch the selected value of that text on selectedItem function . but when i set some HTML elements inside innerHtml of the childnode i loss the value of that node on selectedItem function (to keep on mind that i can see set value on OnClientPopulated function !).
here is OnClientPopulated
function PopulateDataForAce_Locations(sender) {
$("#tbxLocation").css("background-image", "none");
var comletionList = $find("aceYer").get_completionList();
for (var i = 0; i < comletionList.childNodes.length; i++) {
var data = JSON.parse(comletionList.childNodes[i].innerText);
var image = data[0];
var imageClass = "";
var textClass = "";
if (data[1].indexOf("##") > -1 || comletionList.childNodes[i]._value == "0000") {
imageClass = "imageStyleIlce";
textClass = "textStyleIlce";
}
else {
imageClass = "imageStyleIl";
textClass = "textStyleIl";
}
var text = data[1].replace("##", "");
var text2 = data[2];
// comletionList.childNodes[i].innerHTML = "<font class=\"" + textClass + "\">" + text + "</font>";
comletionList.childNodes[i].innerHTML = text;
}
}
when i use the comment line i lose the value of the node.
any help is appreciated
the only way that i found to get the value on selected item function was
function aceLocation_itemSelected(sender, e) {
var index = $find("aceYer")._selectIndex;
var _item = $find("aceYer").get_completionList().childNodes[index];
var hfYer = $get('<%= hfYer.ClientID %>');
hfYer.value = _item._value;
}
while e.get_value() did not work with html item.
I have a XML loaded from a URL like this:
WebClient client = new WebClient();
client.Encoding = Encoding.UTF8;
try
{
string reply = client.DownloadString("http://Example.com/somefile.xml");
label1.Text = reply;
}
catch
{
label1.Text = "FAILED";
}
That XML belongs to a RSS Feed. I want that label1.Text shows just the titles of that XML. How can I achieve that?
Example of label1.Text
This is my first title - This is my 2nd title - And this is my last title
You can load your XML into an XmlDocument and then use XPath to Get the value of each node you're targeting.
XmlDocument doc = new XmlDocument();
doc.LoadXml(reply);
XmlNodeList nodes = doc.SelectNodes("//NodeToSelect");
foreach (XmlNode node in nodes)
{
//If the value you want is the content of the node
label1.Text = node.InnerText;
//If the value you want is an attribute of the node
label1.Text = node.Attributes["AttibuteName"].Value;
}
If you are not familiar with XPath you can always check here :
http://www.w3schools.com/xpath/xpath_syntax.asp
var xml= XElement.Parse(reply);
label1.Text = string.Join(Environment.NewLine, xml
.Descendants()
.Where (x => !string.IsNullOrEmpty(x.Value))
.Select(x=> string.Format("{0}: {1}", x.Name, x.Value))
.ToArray());
You probably need to parse the RSS XML manually to get the title. Here is some sample code for your reference:
private static List<FeedsItem> ParseFeeds(string feedsXml)
{
XDocument xDoc = XDocument.Parse(feedsXml);
XNamespace xmlns = "http://www.w3.org/2005/Atom";
var items = from entry in xDoc.Descendants(xmlns + "entry")
select new FeedsItem
{
Id = (string)entry.Element(xmlns + "id").Value,
Title = (string)entry.Element(xmlns + "title").Value,
AlternateLink = (string)entry.Descendants(xmlns + "link").Where(link => link.Attribute("rel").Value == "alternate").First().Attribute("href").Value
};
Console.WriteLine("Count = {0}", items.Count());
foreach(var i in items)
{
Console.WriteLine(i);
}
return null;
}
<html>
<A NAME="doc_id_1"></A>
<div class="find1">
Iam here, extract me.
</div>
<div class = "find2">
iam here also, extract me as well.
</div>
<A NAME="doc_id_2"></A>
</html>
I have used below code to extract data :
var nodes = doc.DocumentNode.SelectNodes("//a[#name = 'doc_id_1']");
var nodes1 = doc.DocumentNode.SelectNodes("//a[#name = 'doc_id_2']");
foreach (HtmlNode node in nodes)
{
string yourText1 = node.InnerText;
//var yourText2 = node.NextSibling.SelectNodes("//div");
string yourText2 = node.NextSibling.InnerHtml;
//foreach (HtmlNode var in yourText2)
//{
// string yourText3 = var.InnerHtml;
//}
}
I don't want to give class name on those div. Because i am writing a generic code.Any help will be appreciated.
One option, using Linq:
var doc = new HtmlDocument();
doc.LoadHtml(html: Resources.Html);
var startNode = doc.DocumentNode.SelectSingleNode("//a[#name = 'doc_id_1']");
var endNode = doc.DocumentNode.SelectSingleNode("//a[#name = 'doc_id_2']");
var parent = startNode.ParentNode;
var nodesYouWant = parent.ChildNodes
.SkipWhile(node => node != startNode) // skip all nodes up to the start node
.Skip(1) // skip the start node
.TakeWhile(node => node != endNode) // take all nodes up to the next anchor
.Where(node => node.Name == "div"); // select only div nodes
Or:
var currentNode = doc.DocumentNode.SelectSingleNode("//a[#name = 'doc_id_1']");
var endNode = doc.DocumentNode.SelectSingleNode("//a[#name = 'doc_id_2']");
var nodesYouWant = GetEnclosedNodes(currentNode, endNode).Where(node => node.Name == "div");
private static IEnumerable<HtmlNode> GetEnclosedNodes(HtmlNode currentNode, HtmlNode endNode)
{
currentNode = currentNode.NextSibling;
while (currentNode != null && currentNode != endNode)
{
yield return currentNode;
currentNode = currentNode.NextSibling;
}
}
I'm assuming you will know the name value of the two anchor tags.
var doc = new HtmlDocument();
var firstAnchor = doc.DocumentNode.SelectSingleNode("//a[#name = 'doc_id_1']");
var div = firstAnchor.NextSibling;
while (div.Name != "doc_id_2") //when the name of the second anchor is found we have no more divs
{
var divText = div.InnerText; //do whatever with this
div = div.NextSibling;
}
I wonder how could i remove the html tags using htmlagilitypack as below ?
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(Description);
//markups to be removed
var markups = new List<string> { "br","ol","ul","li" };
thanks
you can use this method
public static string RemoveHTMLTags(string content)
{
var cleaned = string.Empty;
try
{
string textOnly = string.Empty;
Regex tagRemove = new Regex(#"<[^>]*(>|$)");
Regex compressSpaces = new Regex(#"[\s\r\n]+");
textOnly = tagRemove.Replace(content, string.Empty);
textOnly = compressSpaces.Replace(textOnly, " ");
cleaned = textOnly;
}
catch
{
//A tag is probably not closed. fallback to regex string clean.
}
return cleaned;
}
//markups to be removed
var markups = new List<string> { "br", "ol", "ul", "li" };
var xpath = String.Join(" | ", markups.Select(x => "//" + x));
var nodes = htmlDoc.DocumentNode.SelectNodes(xpath);
if (nodes != null)
{
foreach (var node in nodes)
{
node.Remove();
}
}