This code access iframe and gets me source code.
string content = null;
var iframe = browser.Document.GetElementsByTagName("iframe").FirstOrDefault() as Gecko.DOM.GeckoIFrameElement;
if (iframe != null)
{
var html = iframe.ContentDocument.DocumentElement as GeckoHtmlElement;
if (html != null)
content = html.OuterHtml;
textBox1.Text = content;
}
I tried puting some code
string content = null;
var iframe = browser.Document.GetElementsByTagName("iframe").FirstOrDefault() as Gecko.DOM.GeckoIFrameElement;
if (iframe != null)
{
var html = iframe.ContentDocument.DocumentElement as GeckoHtmlElement;
if (html != null)
content = html.OuterHtml;
textBox1.Text = content;
GeckoElementCollection elements = browser.Document.GetElementsByName("username");
foreach (var element in elements)
{
GeckoInputElement input = (GeckoInputElement)element;
input.Value = "Auto filled!";
}
}
But it wont work as code dont find elements. Any ideas?
Tried searching google for any iframe examples but seems that there isnt any good documentation for it.
Why are you looking for in the main document? You should look for in a frame.
string content = null;
var iframe = browser.Document.GetElementsByTagName("iframe").FirstOrDefault() as Gecko.DOM.GeckoIFrameElement;
if (iframe != null)
{
var html = iframe.ContentDocument.DocumentElement as GeckoHtmlElement;
if (html != null)
content = html.OuterHtml;
textBox1.Text = content;
GeckoElementCollection elements = iframe.ContentDocument.GetElementsByName("username");
foreach (var element in elements)
{
GeckoInputElement input = (GeckoInputElement)element;
input.Value = "Auto filled!";
}
}
Related
I was using this piece of code till today and it was working fine:
for (int page = 1; page <= reader.NumberOfPages; page++)
{
var cpage = reader.GetPageN(page);
var content = cpage.Get(PdfName.CONTENTS);
var ir = (PRIndirectReference)content;
var value = reader.GetPdfObject(ir.Number);
if (value.IsStream())
{
PRStream stream = (PRStream)value;
var streamBytes = PdfReader.GetStreamBytes(stream);
var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamBytes));
try
{
while (tokenizer.NextToken())
{
if (tokenizer.TokenType == PRTokeniser.TK_STRING)
{
string strs = tokenizer.StringValue;
if (!(br = excludeList.Any(st => strs.Contains(st))))
{
//strfor += tokenizer.StringValue;
if (!string.IsNullOrWhiteSpace(strs) &&
!stringsList.Any(i => i == strs && excludeHeaders.Contains(strs)))
stringsList.Add(strs);
}
}
}
}
finally
{
tokenizer.Close();
}
}
}
But today I got an exception for some pdf file: Unable to cast object of type 'iTextSharp.text.pdf.PdfArray' to type 'iTextSharp.text.pdf.PRIndirectReference
On debugging I got to know that the error is at this line: var ir = (PRIndirectReference)content;. That's because the pdf content that I'm extracting, I get it in the form of ArrayList, as you can see from the below image:
It would be really grateful if anyone can help me with this. Thanks in advance.
EDIT :
The pdf contents are paragraphs, tables, headers & footers, images in few cases. But I'm not bothered of images as I'm bypassing them.
As you can see from the code I'm trying to add the words into a string list, so I expect the output as plain text; words to be specific.
That was real easy! Don't know why I couldn't make out.
PdfReader reader = new PdfReader(name);
List<string> stringsList = new List<string>();
for (int page = 1; page <= reader.NumberOfPages; page++)
{
//directly get the contents into a byte stream
var streamByte = reader.GetPageContent(page);
var tokenizer = new PRTokeniser(new RandomAccessFileOrArray(streamByte));
var sb = new StringBuilder(); //use a string builder instead
try
{
while (tokenizer.NextToken())
{
if (tokenizer.TokenType == PRTokeniser.TK_STRING)
{
var currentText = tokenizer.StringValue;
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
sb.Append(tokenizer.StringValue);
}
}
}
finally
{
//add appended strings into a string list
if(sb != null)
stringsList.Add(sb.ToString());
tokenizer.Close();
}
}
imgs = doc.DocumentNode.SelectNodes("//img");
HtmlNode img in imgs
string imageIdString = image.Id.ToString();
img.SetAttributeValue("src", "/ImageBrowser/ImageById/" + imageIdString);
I get a proper value for the ID, but the img source stays unchanged and I can't find why
tried to manage it like here:
Need to replace an img src attrib with new value
Edit1: The requested code
string input = sectionEditModel.Content;
string htmlstring = sectionEditModel.Content;
string htmlstringdecoded = HttpUtility.HtmlDecode(htmlstring);
HtmlDocument doc = new HtmlDocument();
List<string> urls = new List<string>();
DbImgBrowser.Models.Image image = null;
doc.LoadHtml(htmlstringdecoded);
var files = new FilesRepository();
HtmlNodeCollection imgs = new HtmlNodeCollection(doc.DocumentNode);
imgs = doc.DocumentNode.SelectNodes("//img");
if (imgs != null && imgs.Count > 0)
{
foreach (HtmlNode img in imgs)
{
HtmlAttribute srcs = img.Attributes[#"src"];
urls.Add(srcs.Value);
{
foreach (string Value in urls){
string AttrVal = img.GetAttributeValue("src", null);
if(AttrVal.Contains("base64"))
{
byte[] data = Convert.FromBase64String(Value.Substring(Value.IndexOf(",") + 1));
var pFolder = files.GetFolderByPath(string.Empty);
if (pFolder != null)
{
image = new DbImgBrowser.Models.Image()
{
Name = Guid.NewGuid().ToString(),
Folder = pFolder,
Image1 = data
};
files.Db.Images.Add(image);
files.Db.SaveChanges();
string imageIdString = image.Id.ToString();
img.SetAttributeValue("src", "/ImageBrowser/ImageById/" + imageIdString);
files.Db.SaveChanges();
}
}
Edit2: Example paths: before base64 example image
Path by Url example /ImageBrowser/Image?path=Test2.PNG
Wanted Result src="ImageBrowser/ImageById/"ID" (1-1000)
Edit3: Still all src is not changed
The answer is very simple.
I was on a local doc but I had to return it to the content and save the section
SectionsRepository.SaveSection(Section sec)
I have a problem with extracting text between nodes.It shows me the entire span node.I would like to get value of hours e.g 4:45;5:15 e.t.c.
var html = #"https://programtv.onet.pl/";
HtmlWeb web = new HtmlWeb();
var htmldoc=web.Load(html);
var findhours = htmldoc.DocumentNode.SelectNodes("//div[#id='boxTV1']//div[#class='hours']//span[#class='hour']");
if (findhours != null)
{
foreach (var x in findhours )
{
Console.WriteLine(x.OuterHtml);
}
}
else
{
Console.WriteLine("node = null");
}
Console.ReadLine();
Application window
You can simply use the InnerText property of your HtmlNode object. Checkout the following documentation.
foreach (var x in findhours )
{
Console.WriteLine(x.InnerText);
}
I'm using HtmlAgilityPack. Does it have a function similar to jQuery closest? (closest parent that matches a CSS selector). I tried google and the website http://html-agility-pack.net - and both don't appear to have an answer.
As there is no built-in method currently, you can write a Extension method to achieve this.
I have written a simple extension method which can be used to find elements with tagName, ID and class names that you can use.
Anyways it can be further extended easily to match other selectors.
public static class HtmlAgilityPackExtensions
{
public static HtmlNode Closest(this HtmlNode node, string jQuerySelector)
{
if (node == null) return null;
string tagName = "", id = "";
var classes = new List<string>();
if (jQuerySelector.Contains("."))
{
var parts = jQuerySelector.Split('.');
if (!string.IsNullOrWhiteSpace(parts[0]))
{
tagName = parts[0];
}
for (int i = 1; i < parts.Length; i++)
{
classes.Add(parts[i]);
}
}
if (jQuerySelector.Contains("#"))
{
var parts = jQuerySelector.Split('#');
if (!string.IsNullOrWhiteSpace(parts[0]))
{
tagName = parts[0];
}
id = parts[1];
}
if (string.IsNullOrWhiteSpace(tagName) && string.IsNullOrWhiteSpace(id) && classes.Count == 0)
{
tagName = jQuerySelector;
}
HtmlNode closestParent = null;
while (node.ParentNode != null && closestParent == null)
{
var isClosest = true;
node = node.ParentNode;
if (!string.IsNullOrWhiteSpace(tagName))
{
isClosest = node.Name == tagName;
}
if (isClosest && !string.IsNullOrWhiteSpace(id))
{
isClosest = node.Id == id;
}
if (isClosest && classes.Count > 0)
{
var classNames = node.GetAttributeValue("class", "");
if (!string.IsNullOrWhiteSpace(classNames))
{
foreach (string c in classes)
{
isClosest = classNames.Contains(c);
if (!isClosest) break;
}
}
}
if (isClosest)
{
closestParent = node;
}
}
return closestParent;
}
}
Test Code
var html = "<div><div id='parent1' class='parent'><span id='parent2' class='parent'><div id='parent3' class='parent'><div id='TestNode' class='child'>Test node</div></div></span></div></div>";
var htmlDoc = new HtmlDocument();
htmlDoc.LoadHtml(html);
var testNode1 = htmlDoc.DocumentNode.SelectSingleNode("//div[#id='TestNode']");
if (testNode1 != null)
{
var parent1 = testNode1.Closest(".parent");
var parent2 = testNode1.Closest("#parent1");
var parent3 = testNode1.Closest("span.parent");
var nonExistingParent = testNode1.Closest("span.parent1");
}
I needed the same thing, but couldn't find any, so I wrote my own Closest function:
public static HtmlNode Closest(this HtmlNode node, string search)
{
search = search.ToLower();
while (node.ParentNode != null)
{
if (node.ParentNode.Name.ToLower() == search) return node.ParentNode;
node = node.ParentNode;
}
return null;
}
This one only works for tag names (as I needed) you can extend it to classes, attributes, and ...
How to assign src with content id in dynamically in c#
How to embed multiple images in email body using .NET
But not getting good idea.
When I submit then I would able to get html source and how to assign cid dynmically
> >
> > <html>><body><img src="~/Upload/1.jpg"><br><img src="~/Upload/1.jpg" /><br><img
> src="~/Upload/3.jpg"/><br><br>
> > thanks!!!</body></html>
Need to convert
I want to send email with multiple images
<html>><body><img src=cid:c1 /><br><img src=cid:c2 /><br><img
src=cid:c3/><br><br>
thanks!!!</body></html>
then
if (htmlString == null) return null;
var doc = new HtmlDocument();
doc.LoadHtml(htmlString);
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img");
if (nodes == null) return null;
foreach (HtmlNode node in nodes)
{
if (node.Attributes.Contains("src"))
{
string data = node.Attributes["src"].Value;
string base64Data = Regex.Match(data, #"data:image/(?<type>.+?),(?<data>.+)").Groups["data"].Value;
if (base64Data != "")
{
string cid = Guid.NewGuid().ToString();
byte[] binData = Convert.FromBase64String(base64Data);
var stream = new MemoryStream(binData);
string contenttype = "image/" +
Regex.Match(data, #"data:image/(?<type>.+?);(?<data>.+)").Groups["type"]
.Value;
var inline = new Attachment(stream, new ContentType(contenttype));
inline.ContentDisposition.Inline = true;
inline.ContentDisposition.DispositionType = DispositionTypeNames.Inline;
inline.ContentId = cid;
inline.ContentType.MediaType = contenttype;
mailMessage.Attachments.Add(inline);
node.Attributes["src"].Value = "cid:" + cid;
}
}
Any good idea to make dynmically assign and add Linked resources and alternative view dynamically,
the given code is workable for static case
string path = System.Web.HttpContext.Current.Server.MapPath("~/images/Logo.jpg"); // my logo is placed in images folder
//var path=""
var logo = new LinkedResource(path);
logo.ContentId = "companylogo";
logo.ContentType = new ContentType("image/jpeg");
//now do the HTML formatting
AlternateView av1 = AlternateView.CreateAlternateViewFromString(
"<html><body><img src=cid:companylogo/>" +
"<br></body></html>" + strMailContent,
null, MediaTypeNames.Text.Html);
//now add the AlternateView
av1.LinkedResources.Add(logo);
//now append it to the body of the mail
msg.AlternateViews.Add(av1);
Here is the complete solution for you.
string inputHtmlContent = "<Your Html Content containing images goes here>";
string outputHtmlContent = string.Empty;
var myResources = new List<LinkedResource>();
if ((!string.IsNullOrEmpty(inputHtmlContent)))
{
var doc = new HtmlDocument();
doc.LoadHtml(inputHtmlContent);
HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//img");
if (nodes !=null)
{
foreach (HtmlNode node in nodes)
{
if (node.Attributes.Contains("src"))
{
string data = node.Attributes["src"].Value;
string imgPath = System.Web.HttpContext.Current.Server.MapPath(data);
var imgLogo = new LinkedResource(imgPath);
imgLogo.ContentId = Guid.NewGuid().ToString();
imgLogo.ContentType = new ContentType("image/jpeg");
myResources.Add(imgLogo);
node.Attributes["src"].Value = string.Format("cid:{0}", imgLogo.ContentId);
outputHtmlContent = doc.DocumentNode.OuterHtml;
}
}
}
else
{
outputHtmlContent = inputHtmlContent;
}
AlternateView av2 = AlternateView.CreateAlternateViewFromString(outputHtmlContent,
null, MediaTypeNames.Text.Html);
foreach (LinkedResource linkedResource in myResources)
{
av2.LinkedResources.Add(linkedResource);
}
var msg = new MailMessage();
msg.AlternateViews.Add(av2);
msg.IsBodyHtml = true;
<-- Enter Other required Informations and send mail -->
...
}