WebRequest multiple pages and load into StreamReader - c#

I want to go to multiple pages using ASP.NET 4.0, copy all HTML and then finally paste it in a text box. From there I would like to run my parsing function, what is the best way to handle this?
protected void goButton_Click(object sender, EventArgs e)
{
if (datacenterCombo.Text == "BL2")
{
fwURL = "http://website1.com/index.html";
l2URL = "http://website2.com/index.html";
lbURL = "http://website3.com/index.html";
l3URL = "http://website4.com/index.html";
coreURL = "http://website5.com/index.html";
WebRequest objRequest = HttpWebRequest.Create(fwURL);
WebRequest layer2 = HttpWebRequest.Create(l2URL);
objRequest.Credentials = CredentialCache.DefaultCredentials;
using (StreamReader layer2 = new StreamReader(layer2.GetResponse().GetResponseStream()))
using (StreamReader objReader = new StreamReader(objRequest.GetResponse().GetResponseStream()))
{
originalBox.Text = objReader.ReadToEnd();
}
objRequest = HttpWebRequest.Create(l2URL);
//Read all lines of file
String[] crString = { "<BR> " };
String[] aLines = originalBox.Text.Split(crString, StringSplitOptions.RemoveEmptyEntries);
String noHtml = String.Empty;
for (int x = 0; x < aLines.Length; x++)
{
if (aLines[x].Contains(ipaddressBox.Text))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
//Print results to textbox
resultsBox.Text = String.Join(Environment.NewLine, noHtml);
}
}
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}

Instead of doing all this manually you should probably use HtmlAgilityPack instead then you could do something like this:
HtmlWeb web = new HtmlWeb();
HtmlDocument doc = web.Load("http://google.com");
var targetNodes = doc.DocumentNode
.Descendants()
.Where(x=> x.ChildNodes.Count == 0
&& x.InnerText.Contains(someIpAddress));
foreach (var node in targetNodes)
{
//do something
}
If HtmlAgilityPack is not an option for you, simplify at least the download portion of your code and use a WebClient:
using (WebClient wc = new WebClient())
{
string html = wc.DownloadString("http://google.com");
}

Related

Read PDF Line By Line using iText7 and Fill on Textboxes Winforms

I am working on a WinForms application. I use the pdf file to reset the password and the values on pdf are stored as key-value pairs(email: xxxx#mail.com, pass: 11111).
What I want to do:
Read the PDF file line by line and fill the appropriate textboxes.
What I Have done:
public bool CreatePDF(string location, string email, string key)
{
if(location != "" && email != "" && key != "")
{
PdfWriter pdfwriter = new PdfWriter(location);
PdfDocument pdf = new PdfDocument(pdfwriter);
Document document = new Document(pdf);
Paragraph fields = new Paragraph("Email: "+email + "\n" + "Secret Key: "+key);
document.Add(fields);
document.Close();
return true;
}
else
{
return false;
}
}
public string ReadPDF(string location)
{
var pdfDocument = new PdfDocument(new PdfReader(location));
StringBuilder processed = new StringBuilder();
var strategy = new LocationTextExtractionStrategy();
string text = "";
for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i)
{
var page = pdfDocument.GetPage(i);
text += PdfTextExtractor.GetTextFromPage(page, strategy);
processed.Append(text);
}
return text;
}
}
Thank you in advance Guys!. Any suggestions on CreatePDF are also welcome.
This is what I came up with,
var pdfDocument = new PdfDocument(new PdfReader("G:\\Encryption_File.pdf"));
StringBuilder processed = new StringBuilder();
var strategy = new LocationTextExtractionStrategy();
string text = "";
for (int i = 1; i <= pdfDocument.GetNumberOfPages(); ++i)
{
var page = pdfDocument.GetPage(i);
text += PdfTextExtractor.GetTextFromPage(page, strategy);
processed.Append(text);
}
text.Split('\n');
string line = "";
line = text + "&";
string[] newLines = line.Split('&');
textBox1.Text = newLines[0].Split(':')[1].ToString();
textBox2.Text = newLines[0].Split(':')[2].ToString();

Cannot Write Multiple Paragraph in Aspose

I have an issue when I try to write multiple paragraphs in existing Shape. Only the first paragraph is written. I debug the code and I found that the Shape object as all the paragraphs I want. The problem is when I write to file I found only the first one. I share with you the project code.
class Program
{
public static void Run()
{
string dataDir = ConfigurationManager.AppSettings["directoryToSave"];
string srcDir = ConfigurationManager.AppSettings["Source"];
string appData = Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData);
string file = Path.Combine(appData, srcDir);
using (Presentation presentation = new Presentation(srcDir))
{
IMasterLayoutSlideCollection layoutSlides = presentation.Masters[0].LayoutSlides;
ILayoutSlide layoutSlide = null;
foreach (ILayoutSlide titleAndObjectLayoutSlide in layoutSlides)
{
if (titleAndObjectLayoutSlide.Name == "TITRE_CONTENU")
{
layoutSlide = titleAndObjectLayoutSlide;
break;
}
}
var contenu = File.ReadAllText(#"E:\DemosProject\PF_GEN\PF_GEN\Source\contenu.txt", Encoding.UTF8);
IAutoShape contenuShape = (IAutoShape)layoutSlide.Shapes.SingleOrDefault(r => r.Name.Equals("contenu"));
ITextFrame txt = ((IAutoShape)contenuShape).TextFrame;
txt.Paragraphs.Clear();
string[] lines = contenu.Split(new[] { Environment.NewLine }, StringSplitOptions.None).Where(str => !String.IsNullOrEmpty(str)).ToArray();
for (int i = 0; i < lines.Length; i++)
{
var portion = new Portion();
portion.Text = lines[i];
var paragraphe = new Paragraph();
paragraphe.Portions.Add(portion);
txt.Paragraphs.Add(paragraphe);
}
presentation.Slides.InsertEmptySlide(0, layoutSlide);
presentation.Save(dataDir + "AddLayoutSlides_out.pptx", SaveFormat.Pptx);
}
}
static void Main(string[] args)
{
try
{
var path = ConfigurationManager.AppSettings["sourceAsposeLicensePath"];
License license = new License();
license.SetLicense(path);
Run();
}
catch (Exception ex)
{
Console.WriteLine("Error" + ex.Message);
}
finally
{
Console.WriteLine("Terminated");
Console.ReadKey();
}
}
}
You can find the ppt file (source file) in the attachement file. (https://gofile.io/?c=JpBDS8 1)
Is there any thing missing in my code?
Thanks
I have observed your requirements and suggest you to please try using following sample code on your end. In your sample code, you are adding different paragraphs to a shape inside LayoutSlide and then adding a slide using that LayoutSlide to contain the desired shape. This approach is not correct. You actually need to first add slide based on LayoutSlide and then add text to that shape as per your requirements. The following code will be helpful to you.
public static void RunParaText()
{
string path = #"C:\Aspose Data\";
string dataDir = path;
string srcDir = path + "Master.pptx";
//string appData = Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData);
//string file = Path.Combine(appData, srcDir);
using (Presentation presentation = new Presentation(srcDir))
{
IMasterLayoutSlideCollection layoutSlides = presentation.Masters[0].LayoutSlides;
ILayoutSlide layoutSlide = null;
foreach (ILayoutSlide titleAndObjectLayoutSlide in layoutSlides)
{
if (titleAndObjectLayoutSlide.Name == "TITRE_CONTENU")
{
layoutSlide = titleAndObjectLayoutSlide;
break;
}
}
var contenu = File.ReadAllText(dataDir+"contenu.txt", Encoding.UTF8);
var slide=presentation.Slides.InsertEmptySlide(0, layoutSlide);
IAutoShape contenuShape = (IAutoShape)slide.Shapes.SingleOrDefault(r => r.Name.Equals("contenu"));
//IAutoShape contenuShape = (IAutoShape)layoutSlide.Shapes.SingleOrDefault(r => r.Name.Equals("contenu"));
ITextFrame txt = ((IAutoShape)contenuShape).TextFrame;
txt.Paragraphs.Clear();
string[] lines = contenu.Split(new[] { Environment.NewLine }, StringSplitOptions.None).Where(str => !String.IsNullOrEmpty(str)).ToArray();
for (int i = 0; i < lines.Length; i++)
{
var portion = new Portion();
portion.Text = lines[i];
var paragraphe = new Paragraph();
paragraphe.Portions.Add(portion);
txt.Paragraphs.Add(paragraphe);
}
//Change font size w.r.t shape size
contenuShape.TextFrame.TextFrameFormat.AutofitType = TextAutofitType.Normal;
presentation.Save(dataDir + "AddLayoutSlides_out.pptx", SaveFormat.Pptx);
}
}
I am working as Support developer/ Evangelist at Aspose.

Reading a specific value from a GitHub text file

I would like to read from a text file in the Internet the certain assignment to a word.
In the output "content" I get the complete content of the text file.
But I only want v7.7.3 from the line: version = "v7.7.3".
How can I filter by version with the streamreader?
That is the LastVersion.txt file:
[general]
version = "v7.7.3"
messagenew = "Works with June 2018 Update!\n Plus new Smart Farm strategy\n New Siege Machines\n For more information, go to \n https://mybot.run \n Always free and open source."
messageold = "A new version of MyBot (v7.7.3) is available!\nPlease download the latest from:\nhttps://mybot.run"
Updated: That's my current code.
public string myBotNewVersionURL = "https://raw.githubusercontent.com/MyBotRun/MyBot/master/LastVersion.txt";
public string myBotDownloadURL = null;
public string userDownloadFolder = #"C:\Users\XXX\Download\";
public string newMyBotVersion = null;
public string currentMyBotVersion = null;
public string currentMyBotFileName = null;
public string currentMyBotPath = null;
public void Btn_checkUpdate_Click(object sender, EventArgs e)
{
OpenFileDialog openCurrentMyBot = new OpenFileDialog();
openCurrentMyBot.Title = "Choose MyBot.run.exe";
openCurrentMyBot.Filter = "Application file|*.exe";
openCurrentMyBot.InitialDirectory = userDownloadFolder;
if (openCurrentMyBot.ShowDialog() == DialogResult.OK)
{
MyBot_set.SetValue("mybot_path", Path.GetDirectoryName(openCurrentMyBot.FileName));
MyBot_set.SetValue("mybot_exe", Path.GetFullPath(openCurrentMyBot.FileName));
string latestMyBotPath = Path.GetFullPath(openCurrentMyBot.FileName);
var latestMyBotVersionInfo = FileVersionInfo.GetVersionInfo(latestMyBotPath);
currentMyBotVersion = "v" + latestMyBotVersionInfo.FileVersion;
MyBot_set.SetValue("mybot_version", currentMyBotVersion);
WebClient myBotNewVersionClient = new WebClient();
Stream stream = myBotNewVersionClient.OpenRead(myBotNewVersionURL);
StreamReader reader = new StreamReader(stream);
String content = reader.ReadToEnd();
var sb = new StringBuilder(content.Length);
foreach (char i in content)
{
if (i == '\n')
{
sb.Append(Environment.NewLine);
}
else if (i != '\r' && i != '\t')
sb.Append(i);
}
content = sb.ToString();
var vals = content.Split(
new[] { Environment.NewLine },
StringSplitOptions.None
)
.SkipWhile(line => !line.StartsWith("[general]"))
.Skip(1)
.Take(1)
.Select(line => new
{
Key = line.Substring(0, line.IndexOf('=')),
Value = line.Substring(line.IndexOf('=') + 1).Replace("\"", "").Replace(" ", "")
});
newMyBotVersion = vals.FirstOrDefault().Value;
}
Read From local
var vals = File.ReadLines("..\\..\\test.ini")
.SkipWhile(line => !line.StartsWith("[general]"))
.Skip(1)
.Take(1)
.Select(line => new
{
Key = line.Substring(0, line.IndexOf('=')),
Value = line.Substring(line.IndexOf('=') + 1)
});
Console.WriteLine("Key : " + vals.FirstOrDefault().Key +
" Value : " + vals.FirstOrDefault().Value);
Updated
for read from Git , File.ReadLines not work with URL.
string myBotNewVersionURL = "https://raw.githubusercontent.com/MyBotRun/MyBot/master/LastVersion.txt";
WebClient myBotNewVersionClient = new WebClient();
Stream stream = myBotNewVersionClient.OpenRead(myBotNewVersionURL);
StreamReader reader = new StreamReader(stream);
String content = reader.ReadToEnd();
var sb = new StringBuilder(content.Length);
foreach (char i in content)
{
if (i == '\n')
{
sb.Append(Environment.NewLine);
}
else if (i != '\r' && i != '\t')
sb.Append(i);
}
content = sb.ToString();
var vals = content.Split(
new[] { Environment.NewLine },
StringSplitOptions.None
)
.SkipWhile(line => !line.StartsWith("[general]"))
.Skip(1)
.Take(1)
.Select(line => new
{
Key = line.Substring(0, line.IndexOf('=')),
Value = line.Substring(line.IndexOf('=') + 1)
});
Console.WriteLine("Key : " + vals.FirstOrDefault().Key + " Value : " + vals.FirstOrDefault().Value);

Copy multiple webpages source code to txt, C#

Below is my code. I have multiple webpages, and I need to save all these webpages sources codes to one txt file, is it possible? My code is saving just one webepage source, I can choose which one to save by changing number in that part of code:
string s=WebClient.DownloadString(listBox8.Items[0].ToString());
In listbox8 there is multiple webpages adresses. Those addresses are taken from tvrage_db2.txt file.
Can you help me?
List<string> link = new List<string>();
using (StreamReader sr = new StreamReader("tvrage_db2.txt"))
{
string line;
//Read and display lines from the file until the end of the file is reached
while ((line = sr.ReadLine()) != null)
{
string line2 = line;
link.Add(line2);
}
}
listBox8.DataSource = link;
using (WebClient WebClient = new WebClient())
{
for (int i = 0; i < listBox8.Items.Count; i++)
{
string s = WebClient.DownloadString(listBox8.Items[0].ToString());
Clipboard.SetText(s, TextDataFormat.Text);
string[] lines = { s };
System.IO.File.WriteAllLines(#"WriteLines.txt", lines);
}
}
}
EDIT:
Problem solved
using (WebClient WebClient = new WebClient())
{
for (int i = 0; i < listBox8.Items.Count; i++)
{
string s = WebClient.DownloadString(listBox8.Items[i].ToString());
string[] lines = { s };
System.IO.File.AppendAllText(#"WriteLines.txt", lines[0]);
}
}
You're already looping listBox8 items, so change
string s = WebClient.DownloadString(listBox8.Items[0].ToString());
to
string s = WebClient.DownloadString(listBox8.Items[i].ToString());

Remove all HTML tags and do a carriage return on <BR> in C#

I am creating a HTML to text parser. I need to remove all HTML elements and want to do a carriage return everytime there is a <BR> and then remove the <BR> as well after so there are no HTML tags left. I then want to parse the text for a certain string that is in the combobox. Thank you in advance for your help.
private void navigateWeb_Click(object sender, EventArgs e)
{
openFD.Title = "Select your configuration file";
openFD.InitialDirectory = "C:";
openFD.FileName = "";
openFD.Filter = "Config File (*.cfg)|*.cfg|Text File (*.txt)|*.txt|All Files (*.*)|*.*";
openFD.ShowDialog();
MyURL = openFD.FileName;
//Open and read file
System.IO.StreamReader objReader;
objReader = new System.IO.StreamReader(MyURL);
richTextBox1.Text = objReader.ReadToEnd();
var lines = File.ReadAllLines(MyURL)
.Select(l => l.Trim())
.Where(l => l.StartsWith(comboBox1.Text));
textBox1.Text = String.Join(Environment.NewLine, lines);
}
*********UPDATE*****
Here is the solution that got the job done:
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}
private void navigateWeb_Click(object sender, EventArgs e)
{
openFD.Title = "Enter URL in the box below";
openFD.InitialDirectory = "C:";
openFD.FileName = "http://msnconf/configtc.aspx?IP=10.6.64.200&m=c";
openFD.Filter = "HTTP://|*.*|Config File (*.cfg)|*.cfg|Text File (*.txt)|*.txt|All Files (*.*)|*.*";
//openFD.ShowDialog();
if (openFD.ShowDialog() == DialogResult.Cancel)
{
//MessageBox.Show("cancel button clicked");
}
else
{
MyURL = openFD.FileName;
webBrowser1.Visible = true;
richTextBox1.Visible = false;
permitACL.Enabled = true;
//webBrowser1.Navigate(new Uri(MyURL.SelectedItem.ToString()));
webBrowser1.Navigate(MyURL);
//Open and read file
System.IO.StreamReader objReader;
objReader = new System.IO.StreamReader(MyURL);
richTextBox1.Text = objReader.ReadToEnd();
//Read all lines of file
// String lines = objReader.ReadToEnd();
String[] crString = { "<BR> " };
String[] aLines = richTextBox1.Text.Split(crString, StringSplitOptions.RemoveEmptyEntries);
// String[] lines = File.ReadAllLines(MyURL);
String noHtml = String.Empty;
for (int x = 0; x < aLines.Length; x++)
{
if(permitACL.Checked)
{
if (aLines[x].Contains("permit"))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
if (aLines[x].Contains(comboBox1.Text))
{
noHtml += (RemoveHTML(aLines[x]) + "\r\n");
}
}
//Find lines that match our text in the combobox
//lines.Select(l => l.Trim());
//.Where(l => l.StartsWith(comboBox1.Text));
//Print results to textbox
textBox1.Text = String.Join(Environment.NewLine, noHtml);
}
}
I suggest you use the HTML Agility Pack - it is an HTML parser that you can query with using XPath syntax.
public static string RemoveHTML(string text)
{
text = text.Replace(" ", " ").Replace("<br>", "\n");
var oRegEx = new System.Text.RegularExpressions.Regex("<[^>]+>");
return oRegEx.Replace(text, string.Empty);
}

Categories

Resources