I have a bit of a pickle. There are a list of images I want to grab on a website. I know how to do that much, but I have to filter out the location of the images.
Such as I'd want to grab the images in a div tag with an id "theseImages", but there are another set of images within another div tag with an id called "notTheseImages". Looping through every tag into ah HtmlElementCollection with the tag "img" would ignore the divs, because it'd also grab the images from "notTheseImages."
Is there a way I could loop through the images while doing a check to see where those images are located in the div tags?
This could help you to do the selection of your current HTML and maybe for future occassions :)
protected HtmlElement[] GetElementsByParent(HtmlDocument document, HtmlElement baseElement = null, params string[] singleSelectors)
{
if (singleSelectors == null || singleSelectors.Length == 0)
{
throw new Exception("Please give at least 1 selector!");
}
IList<HtmlElement> result = new List<HtmlElement>();
bool last = singleSelectors.Length == 1;
string singleSelector = singleSelectors[0];
if (string.IsNullOrWhiteSpace(singleSelector) || string.IsNullOrWhiteSpace(singleSelector.Trim()))
{
return null;
}
singleSelector = singleSelector.Trim();
if (singleSelector.StartsWith("#"))
{
var item = document.GetElementById(singleSelector.Substring(1));
if (item == null)
{
return null;
}
if (last)
{
result.Add(item);
}
else
{
var results = GetElementsByParent(document, item, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
else if (singleSelector.StartsWith("."))
{
if (baseElement == null)
{
baseElement = document.Body;
}
foreach (HtmlElement child in baseElement.Children)
{
string cls;
if (!string.IsNullOrWhiteSpace((cls = child.GetAttribute("class"))))
{
if (cls.Split(' ').Contains(singleSelector.Substring(1)))
{
if (last)
{
result.Add(child);
}
else
{
var results = GetElementsByParent(document, child, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
}
}
}
else
{
HtmlElementCollection elements = null;
if (baseElement != null)
{
elements = baseElement.GetElementsByTagName(singleSelector);
}
else
{
elements = document.GetElementsByTagName(singleSelector);
}
foreach (HtmlElement item in elements)
{
if (last)
{
result.Add(item);
}
else
{
var results = GetElementsByParent(document, item, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
}
return result.ToArray();
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
// here we can query
var result = GetElementsByParent(webBrowser1.Document, null, "#theseImages", "img");
}
result would then contain the images that are under #theseImages
Mind you the GetElementsByParent is fairly untested, I just tested it for your use case and it seemed to be ok.
Don't forget to only start the query once you are sure the document is completed ;)
Related
To change the Content-Control Text in the Document Body I'm doing this:
var elements = doc.MainDocumentPart.Document
.Descendants<SdtElement>()
.Where(s => s.SdtProperties.ChildElements.Count > 0 &&
s.SdtProperties.GetFirstChild<Tag>()?.Val == contentControlTag);
foreach (var element in elements)
{
if (element == null)
{
continue;
}
var elementText = element.Descendants<Text>();
if (elementText != null)
{
var elementTextValue = elementText.FirstOrDefault();
if (elementTextValue != null)
{
elementTextValue.Text = text;
}
elementText.Skip(1).ToList().ForEach(t => t.Remove());
}
}
Sadly this doesn't change the Header/Footer part.
I want to check each page's header/Footer part for a specific Content-Control-Tag and change its text similar to the code above. How can I achieve this?
The headers and footers aren't in the MainDocumentPart.Document, they are subparts in the HeaderParts and FooterParts collections off the MainDocumentPart. So you'll need to process each part separately. Fortunately, all of the parts share a common base class, OpenXmlPart, so you can put them together into a list and process them in a loop. Since you'll be dealing with the base class, you'll need to use RootElement property instead of Document as a starting point to get the Descendants.
So the end result would look something like this:
var partsToUpdate = new List<OpenXmlPart> { doc.MainDocumentPart }
.Concat(doc.MainDocumentPart.HeaderParts)
.Concat(doc.MainDocumentPart.FooterParts);
foreach (var part in partsToUpdate)
{
var elements = part.RootElement
.Descendants<SdtElement>()
.Where(s => s.SdtProperties.ChildElements.Count > 0 &&
s.SdtProperties.GetFirstChild<Tag>()?.Val == contentControlTag);
foreach (var element in elements)
{
if (element == null)
{
continue;
}
var elementText = element.Descendants<Text>();
if (elementText != null)
{
var elementTextValue = elementText.FirstOrDefault();
if (elementTextValue != null)
{
elementTextValue.Text = text;
}
elementText.Skip(1).ToList().ForEach(t => t.Remove());
}
}
}
I'm comparing one XML & one XSD collection in WPF with TreeViewItem.
I finally got the missing result to show in red.
But how to filter the result from XSD collection to a listbox, so that it can show only the lines(values) which are in red ?
private void CompareTrees(ItemCollection xml, ItemCollection xsd, List<string> path)
{
foreach (TreeViewItem tvixsd in xsd.OfType<TreeViewItem>())
{
path.Add(tvixsd.Header.ToString());
if (tvixsd.HasItems)
{
CompareTrees(xml, tvixsd.Items, path);
}
else
{
TreeViewItem res = xml.OfType<TreeViewItem>().FirstOrDefault();
foreach (var point in path.Skip(1))
{
res = res.Items.OfType<TreeViewItem>().FirstOrDefault(i => i.Header.ToString() == point);
if (res == null)
{
treeViewCom.Items.Add(res);
tvixsd.Foreground = Brushes.Red;
break;
}
if (!res.HasItems)
{
break;
}
}
}
if (path.Count != 0)
{
path.RemoveAt(path.Count - 1);
}
}
}
Thanks
Set the Filter property of the ItemsCollection:
xsd.Filter = (obj) =>
{
TreeViewItem tvi = obj as TreeViewItem;
return tvi != null && tvi.Foreground == Brushes.Red;
};
I know you can't modify a collection during a foreach, but I should be able to set variable values of the underlying iterator through it. For some reason the method below, every time it executes is giving be the "Collection was modified..." error:
private static IInstrument AdjustForSimpleInstrument(DateTime valueDate, IInstrument temp)
{
var instr = temp;
foreach (var component in instr.Components)
{
component.Schedule.ScheduleRows.RemoveAll(
sr =>
((sr.Payment != null) && (sr.Payment.PaymentDate != null) &&
(sr.Payment.PaymentDate.AdjustedDate.Date <= valueDate.Date)));
if (
!component.ScheduleInputs.ScheduleType.In(ComponentType.Floating, ComponentType.FloatingLeg,
ComponentType.Cap, ComponentType.Floor)) continue;
foreach (var row in component.Schedule.ScheduleRows)
{
var clearRate = false;
if (row.Payment.CompoundingPeriods != null)
{
if (row.Payment.CompoundingPeriods.Count > 0)
{
foreach (
var period in
row.Payment.CompoundingPeriods.Where(
period => ((FloatingRate)period.Rate).ResetDate.FixingDate > valueDate))
{
period.Rate.IndexRate = null;
clearRate = true;
}
}
}
else if (row.Payment.PaymentRate is FloatingRate)
{
if (((FloatingRate)row.Payment.PaymentRate).ResetDate.FixingDate > valueDate)
clearRate = true;
}
else if (row.Payment.PaymentRate is MultipleResetRate)
{
if (
((MultipleResetRate)row.Payment.PaymentRate).ChildRates.Any(
rate => rate.ResetDate.FixingDate > valueDate))
{
clearRate = true;
}
}
if (clearRate)
{
row.Payment.PaymentRate.IndexRate = null;
}
}
}
return temp;
}
Am I just missing something easy here? The loop that is causing the exception is the second, this one:
foreach (var row in component.Schedule.ScheduleRows)
I suspect this is not .NET-framework stuff, so I assume that row is connected to its collection. Modifying the contents of the row, might shift its place inside its collection, thus modifying the collection, which is not allowed during some foreach-operations.
The solution is simple: create a copy of the collection (by using LINQ).
foreach (var row in component.Schedule.ScheduleRows.ToList())
...
I want to convert this recursive call to LINQ but I am not sure how to do the last two conditions. Please advise on how to add these last two conditions.
private void findGoogleOrganic(HtmlNode node)
{
if (node.Attributes["class"] != null)
{
if (node.Attributes["class"].Value.ToString().Contains("r ld"))
{
String tmp;
tmp = node.ParentNode.InnerHtml.ToString();
bool condition1 = false;
bool condition2 = false;
if (tmp != null)
{
**condition1 = tmp.Contains("qcp47e");
condition2 = tmp.Contains("r ld");**
}
**if (condition1 == false && condition2 == true)**
{
GoogleOrganicResults.Add(new Result(URLGoogleOrganic, Listing, node, SearchEngine.Google, SearchType.Organic, ResultType.Website));
}
}
}
if (node.HasChildNodes)
{
foreach (HtmlNode children in node.ChildNodes)
{
findGoogleOrganic(children);
}
}
}
Here is My first attempt without the last two conditions:
private void findGoogleOrganicLINQ(HtmlNode node)
{
var results = node.Descendants()
.Where(x => x.Attributes["class"] != null &&
x.Attributes["class"].Value.Contains("r ld"))
.Select(x => new Result(URLGoogleLocal, Listing, x, SearchEngine.Google, SearchType.Local, ResultType.GooglePlaces));
foreach (Result x in results)
{
GoogleOrganicResults.Add(x);
}
}
if(node.HasChildNodes)
{
node.ChildNodes.ForEach(findGoogleOrganic);
}
Im using Treeview control of System.Web.UI class, to display Category and subcategory of an item.
I have tried the following code but no hope
protected void tvwOrganisation_TreeNodeCheckChanged(object sender, TreeNodeEventArgs e)
{
if (tvwOrganisation.CheckedNodes.Count > 0)
{
// the selected nodes.
foreach (TreeNode node in tvwOrganisation.CheckedNodes)
{
if (node.ChildNodes.Count > 0)
{
foreach (TreeNode childNode in node.ChildNodes)
{
childNode.Checked = true;
}
}
}
}
}
Is there a way I can do this, I have tried javascript too.
Also what is the replace of AfterCheck - windows formd event in web forms.
The tree tag was as follows,
<asp:TreeView ID="tvwRegionCountry" runat="server" ShowCheckBoxes="All" ExpandDepth="0" AfterClientCheck="CheckChildNodes();" PopulateNodesFromClient="true" ShowLines="true" ShowExpandCollapse="true" OnTreeNodeCheckChanged="tvwRegionCountry_TreeNodeCheckChanged"
onclick="OnTreeClick(event)">
</asp:TreeView>
Added following JS as mentioned in asp.net treeview checkbox selection
<script language="javascript" type="text/javascript">
function OnTreeClick(evt) {
var src = window.event != window.undefined ? window.event.srcElement : evt.target;
var isChkBoxClick = (src.tagName.toLowerCase() == "input" && src.type == "checkbox");
if (isChkBoxClick) {
var parentTable = GetParentByTagName("table", src);
var nxtSibling = parentTable.nextSibling;
if (nxtSibling && nxtSibling.nodeType == 1)//check if nxt sibling is not null & is an element node
{
if (nxtSibling.tagName.toLowerCase() == "div") //if node has children
{
//check or uncheck children at all levels
CheckUncheckChildren(parentTable.nextSibling, src.checked);
}
}
//check or uncheck parents at all levels
CheckUncheckParents(src, src.checked);
}
}
function CheckUncheckChildren(childContainer, check) {
var childChkBoxes = childContainer.getElementsByTagName("input");
var childChkBoxCount = childChkBoxes.length;
for (var i = 0; i < childChkBoxCount; i++) {
childChkBoxes[i].checked = check;
}
}
function CheckUncheckParents(srcChild, check) {
var parentDiv = GetParentByTagName("div", srcChild);
var parentNodeTable = parentDiv.previousSibling;
if (parentNodeTable) {
var checkUncheckSwitch;
if (check) //checkbox checked
{
var isAllSiblingsChecked = AreAllSiblingsChecked(srcChild);
if (isAllSiblingsChecked)
checkUncheckSwitch = true;
else
return; //do not need to check parent if any(one or more) child not checked
}
else //checkbox unchecked
{
checkUncheckSwitch = false;
}
var inpElemsInParentTable = parentNodeTable.getElementsByTagName("input");
if (inpElemsInParentTable.length > 0) {
var parentNodeChkBox = inpElemsInParentTable[0];
parentNodeChkBox.checked = checkUncheckSwitch;
//do the same recursively
CheckUncheckParents(parentNodeChkBox, checkUncheckSwitch);
}
}
}
function AreAllSiblingsChecked(chkBox) {
var parentDiv = GetParentByTagName("div", chkBox);
var childCount = parentDiv.childNodes.length;
for (var i = 0; i < childCount; i++) {
if (parentDiv.childNodes[i].nodeType == 1) //check if the child node is an element node
{
if (parentDiv.childNodes[i].tagName.toLowerCase() == "table") {
var prevChkBox = parentDiv.childNodes[i].getElementsByTagName("input")[0];
//if any of sibling nodes are not checked, return false
if (!prevChkBox.checked) {
return false;
}
}
}
}
return true;
}
//utility function to get the container of an element by tagname
function GetParentByTagName(parentTagName, childElementObj) {
var parent = childElementObj.parentNode;
while (parent.tagName.toLowerCase() != parentTagName.toLowerCase()) {
parent = parent.parentNode;
}
return parent;
}
</script>
and it worked...