I'm new on web scraping and I would like to get an "id" and i want webbrowser to navigate next page using this "id" and do what i want to do. For example;
Go to https://www.example.com/
then get "id"
navigate https://www.example.com/id
then get title name
Go to https://www.example.com/
then get "second id"
navigate https://www.example.com/id
then get title name
...
How can i achieve this on c#?
note: this web site has "Secure Hypertext Transfer Protocol(https)"
EDIT: DocumentCompleted firing twice when url navigating to www.example.com/id
[STAThread]
static void Main(string[] args)
{
WB = new WebBrowser();
WB.AllowNavigation = true;
WB.ScriptErrorsSuppressed = true;
WB.Navigate("https://www.example.com/page");
WB.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(WB_DocumentCompleted);
while (completed)
{
Application.DoEvents();
}
}
static async void WB_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
System.Windows.Forms.HtmlDocument doc = WB.Document;
var docy = new HtmlAgilityPack.HtmlDocument();
docy.Load(new StringReader(WB.Document.Body.InnerHtml));
HtmlElementCollection divs = doc.GetElementsByTagName("td");
if (WB.Url.ToString().IndexOf("page") > -1)
{
HtmlNodeCollection prens = docy.DocumentNode.SelectNodes(".//*[#id='mid']//dıv//dıv//table//tbody//tr//td//a");
for (int i = 0; i < prens.Count; i++)
{
HtmlNode nodes= docy.DocumentNode.SelectSingleNode(".//*[#id='mid']//dıv[6]//dıv//table[2]//tbody//tr[" + satir + "]//td[2]//a");
HtmlNode links = nodes;
if (links != null)
{
hrefValue = links.GetAttributeValue("href", string.Empty);
string[] gelenevent = hrefValue .Split('.');
eventid = gelenevent[0].Remove(0, 1);
satir++;
while (WB.IsBusy)
Application.DoEvents();
for (int y = 0; y < 500; y++)
if (WB.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
await Task.Delay(5000);
Thread.Sleep(10);
}
else
break;
Application.DoEvents();
WB.Navigate(new Uri("https://www.example.com/" + eventid + ".html"));
break;
}
}
}
if (WB.Url.ToString().IndexOf(eventid) > -1)
{
var node = docy.DocumentNode.SelectSingleNode(".//*[#id='mid']//dıv[6]//dıv//table[4]//tbody//tr[2]//td[1]");
while (WB.IsBusy)
Application.DoEvents();
for (int i = 0; i < 500; i++)
if (WB.ReadyState != WebBrowserReadyState.Complete)
{
Application.DoEvents();
await Task.Delay(5000);
Thread.Sleep(10);
}
else
break;
Application.DoEvents();
WB.Navigate(new Uri("https://www.example.com/page"));
}
}
Related
I created dynamically 2 elements. Now I would like copy data from richTextBox to WebBrowser on event onChange
How can I copy data from richTextBox to WebBrowser when I created it dynamically?
My code
void OpenFile(string path, string filename)
{
StreamReader read = new StreamReader(path);
if(richTextBox1.Text.Length == 0)
{
return;
}
else
{
TabPage tp = new TabPage();
int tc = tabControl1.TabCount + 1;
tp.Text = filename;
tabControl1.TabPages.Add(tp);
RichTextBox rtb = new RichTextBox();
WebBrowser wb = new WebBrowser();
rtb.Width = tp.Width / 2;
rtb.Height = tp.Height;
rtb.TextChanged += new System.EventHandler(setText);
rtb.Text = read.ReadToEnd();
wb.Left = tp.Width / 2;
wb.Width = tp.Width / 2;
wb.Height = tp.Height;
tp.Controls.Add(rtb);
tp.Controls.Add(wb);
return;
}
}
private void setText(object sender, EventArgs e)
{
MessageBox.Show("TEXT CHANGED");
}
Try this
rtb.TextChanged += (s, e) => { wb.DocumentText = rtb.Text; };
Here's my requirement. There is a public website which takes alphanumeric string as input and Retrieves data into a table element (via button click). The table element has couple of labels which gets populated with corresponding data. I need a tool/solution which can check if a particular string exists in the website's database. If so retrieve all the Ids of all the occurrences of that string. Looking at the "view source" of the website (No JavaScript used there), I noted the input element name and the button element name and with the help of existing samples I was able to get a working solution. Below is the code which works but I want to check if there is any better and faster approach. I know the below code has some issues like "infinite loop" issue and others. But I am basically looking at alternate solution which can work quickly for a million records.
namespace SearchWebSite
{
public partial class Form1 : Form
{
bool searched = false;
long i;
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
i = 1;
WebBrowser browser = new WebBrowser();
string target = "http://www.SomePublicWebsite.com";
browser.Navigate(target);
browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(XYZ);
}
private void XYZ(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser b = null;
if (searched == false)
{
b = (WebBrowser)sender;
b.Document.GetElementById("txtId").InnerText = "M" + i.ToString();
b.Document.GetElementById("btnSearch").InvokeMember("click");
searched = true;
}
if (b.ReadyState == WebBrowserReadyState.Complete)
{
if (b.Document.GetElementById("lblName") != null)
{
string IdNo = "M" + i.ToString();
string DateString = b.Document.GetElementById("lblDate").InnerHtml;
string NameString = b.Document.GetElementById("lblName").InnerHtml;
if (NameString != null && (NameString.Contains("XXXX") || NameString.Contains("xxxx")))
{
using (StreamWriter w = File.AppendText("log.txt"))
{
w.WriteLine("Id {0}, Date {1}, Name {2}", IdNo, DateString, NameString);
i = i + 1;
searched = false;
}
}
else
{
i = i + 1;
searched = false;
}
}
else
{
i = i + 1;
searched = false;
}
}
}
}
}
If the page after seach button clicked contains txtId and btnSearch controls than you can use this code snippet, this is not faster but the correct form I think.
public partial class Form1 : Form
{
bool searched = false;
long i = 1;
private string IdNo { get { return "M" + i.ToString(); } }
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
i = 1;
WebBrowser browser = new WebBrowser();
string target = "http://www.SomePublicWebsite.com";
browser.Navigate(target);
browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(XYZ);
}
private void XYZ(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser b = (WebBrowser)sender;
if (b.ReadyState == WebBrowserReadyState. Complete)
{
if (searched == false)
{
DoSearch(b); return;
}
if (b.Document.GetElementById("lblName") != null)
{
string DateString = b.Document.GetElementById("lblDate").InnerHtml;
string NameString = b.Document.GetElementById("lblName").InnerHtml;
if (NameString != null && (NameString.Contains("XXXX") || NameString.Contains("xxxx")))
using (StreamWriter w = File.AppendText("log.txt"))
w.WriteLine("Id {0}, Date {1}, Name {2}", IdNo, DateString, NameString);
}
i++;
DoSearch(b);
}
}
private void DoSearch(WebBrowser wb)
{
wb.Document.GetElementById("txtId").InnerText = IdNo;
wb.Document.GetElementById("btnSearch").InvokeMember("click");
searched = true;
}
}
I have been looking all over the Internet for this and have found similar, but none of it will work for me. Everything I have found assumes the listbox is on the main form and not the secondary form, or the code is for an older version of C# or Visual Studio (I am using VS2008).
I am creating a web browser that has a button on the main form (called frmMyBrowser) to open a dialog (frmBookmarks) with a listbox (lstBookmark) with bookmarked URLs. I need to be able to double click on an item (the bookmarked URL) and have the text pasted into the address bar (cmbAddress) of the main form.
Any help would be GREATLY appreciated.
I figured out that if I create the window at runtime, I can get it to work. In case anyone is interested, the code I used is below.
public void frmMyBrowser_ShowFavorites(object sender, EventArgs e)
{
frmFavorites.ShowIcon = false;
frmFavorites.ShowInTaskbar = false;
frmFavorites.MinimizeBox = false;
frmFavorites.MaximizeBox = false;
frmFavorites.ControlBox = false;
frmFavorites.Text = "Bookmarks";
frmFavorites.Width = 500;
frmFavorites.Height = 350;
frmFavorites.Controls.Add(lstFavorites);
frmFavorites.Controls.Add(btnRemoveFavorite);
frmFavorites.Controls.Add(btnAddFavorite);
frmFavorites.Controls.Add(btnCloseFavorites);
frmFavorites.Controls.Add(txtCurrentUrl);
lstFavorites.Width = 484;
lstFavorites.Height = 245;
btnRemoveFavorite.Location = new Point(8, 280);
btnAddFavorite.Location = new Point(8, 255);
btnCloseFavorites.Location = new Point(400, 255);
txtCurrentUrl.Location = new Point(110, 255);
txtCurrentUrl.Size = new Size(255, 20);
btnAddFavorite.Text = "Add";
btnRemoveFavorite.Text = "Remove";
btnCloseFavorites.Text = "Close";
txtCurrentUrl.Text = wbBrowser.Url.ToString();
btnAddFavorite.Click += new EventHandler(btnAddFavorite_Click);
btnRemoveFavorite.Click += new EventHandler(btnRemoveFavorite_Click);
frmFavorites.Load += new EventHandler(frmFavorites_Load);
btnCloseFavorites.Click += new EventHandler(btnCloseFavorites_Click);
lstFavorites.MouseDoubleClick += new MouseEventHandler(lstFavorites_MouseDoubleClick);
frmFavorites.Show();
}
public void btnCloseFavorites_Click(object sender, EventArgs e)
{
if (lstFavorites.Items.Count > 0)
{
using (StreamWriter writer = new System.IO.StreamWriter(#Application.StartupPath + "\\favorites.txt"))
{
for (int i = 0; i < lstFavorites.Items.Count; i++)
{
writer.WriteLine(lstFavorites.Items[i].ToString());
}
writer.Close();
}
}
frmFavorites.Hide();
}
public void btnAddFavorite_Click(object sender, EventArgs e)
{
string strFavoriteAddress = wbBrowser.Url.ToString();
if (!lstFavorites.Items.Contains(strFavoriteAddress))
{
lstFavorites.Items.Add(strFavoriteAddress);
MessageBox.Show("Favorite Added", "Message");
}
else if (lstFavorites.Items.Contains(strFavoriteAddress))
{
MessageBox.Show("This site already exists in your Favorites list!", "Error");
}
else
{
}
}
public void btnRemoveFavorite_Click(object sender, EventArgs e)
{
try
{
lstFavorites.Items.RemoveAt(lstFavorites.SelectedIndices[0]);
}
catch
{
MessageBox.Show("You need to select an item", "Error");
}
}
public void frmFavorites_Load(object sender, EventArgs e)
{
try
{
using (StreamReader reader = new System.IO.StreamReader(#Application.StartupPath + "\\favorites.txt"))
{
while (!reader.EndOfStream)
{
for (int i = 0; i < 4; i++)
{
string strListItem = reader.ReadLine();
if (!String.IsNullOrEmpty(strListItem))
{
lstFavorites.Items.Add(strListItem);
}
}
}
reader.Close();
}
}
catch
{
MessageBox.Show("An error has occured", "Error");
}
}
private void lstFavorites_MouseDoubleClick(object sender, MouseEventArgs e)
{
string strSelectedAddress = lstFavorites.Text.ToString();
cmbAddress.Text = strSelectedAddress;
wbBrowser.Navigate(strSelectedAddress);
frmFavorites.Hide();
}
have downloaded page by webbrowser and need to get mail address. But it is generated by javastript. In code i can find this script:
<script type="text/javascript" charset="utf-8">var i='ma'+'il'+'to';var a='impexta#impexta.sk';document.write(''+a+'');</script>
I read everywhere how to Invoke script, by i don't know his name. So what i want is to get "a" variable value.
EDIT: Code before:
...
WebBrowser wb = new WebBrowser();
wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(wb_DocumentCompleted);
wb.Navigate(url);
for (; wb.ReadyState != WebBrowserReadyState.Complete; )
{
System.Windows.Forms.Application.DoEvents();
}
...
void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser wb = sender as WebBrowser;
if (wb != null)
{
if (wb.ReadyState == WebBrowserReadyState.Complete)
{
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.Load(wb.DocumentStream);
}
}
}
I found easy solution. Just finding the right part of string in HTML code:
foreach (HtmlNode link in root.SelectNodes("//script"))
{
if (link.InnerText.Contains("+a+"))
{
string[] strs = new string[] { "var a='", "';document.write" };
strs = link.InnerText.Split(strs, StringSplitOptions.None);
outMail = System.Net.WebUtility.HtmlDecode(strs[1]);
if (outMail != "")
{
break;
}
}
}
I created a webform that allows the user to dynamically add more textboxes. Right now, as the button is clicked to add another field, it postbacks to itself. The controls are added in the Pre_Init. However when the page reconstructs itself the textbox names are different each time so the data is not being retained on each postback.
protected void Page_PreInit(object sender, EventArgs e)
{
MasterPage master = this.Master; //had to do this so that controls could be added.
createNewTextField("initEnumValue",true);
RecreateControls("enumValue", "newRow");
}
private int FindOccurence(string substr)
{
string reqstr = Request.Form.ToString();
return ((reqstr.Length - reqstr.Replace(substr, "").Length) / substr.Length);
}
private void RecreateControls(string ctrlPrefix, string ctrlType)
{
string[] ctrls = Request.Form.ToString().Split('&');
int cnt = FindOccurence(ctrlPrefix);
//Response.Write(cnt.ToString() + "<br>");
if (cnt > 0)
{
for (int k = 1; k <= cnt; k++)
{
for (int i = 0; i < ctrls.Length; i++)
{
if (ctrls[i].Contains(ctrlPrefix + k.ToString()) && !ctrls[i].Contains("EVENTTARGET"))
{
string ctrlID = ctrls[i].Split('=')[0];
if (ctrlType == "newRow")
{
createNewTextField(ctrlID,false);
}
break;
}
}
}
}
}
protected void addEnum_Click(object sender, ImageClickEventArgs e)
{
int cnt = FindOccurence("enumValue");
createNewTextField("enumValue" + Convert.ToString(cnt + 1),false);
// Response.Write(cnt.ToString() + "<br>");
}
private void createNewTextField(string ID, bool button)
{
Response.Write(ID + "<br/>"); //this is where I'm getting different names each time there is a postback
if (ID != "initEnumValue") //create new line starting with the second tb.
{
LiteralControl newLine = new LiteralControl();
newLine.Text = "<br />";
this.plhEnum.Controls.Add(newLine);
}
TextBox newTb = new TextBox();
newTb.ID = ID;
this.plhEnum.Controls.Add(newTb);
if (button) //create the button only on the first one.
{
LiteralControl space = new LiteralControl();
space.Text = " ";
this.plhEnum.Controls.Add(space);
ImageButton imgbutton = new ImageButton();
imgbutton.ID = "addEnum";
imgbutton.ImageUrl = "~/images/add.png";
imgbutton.Click += new ImageClickEventHandler(addEnum_Click);
imgbutton.CausesValidation = false;
this.plhEnum.Controls.Add(imgbutton);
}
//endEnumRow();
//this.plhEnum.Controls.Add(newTb);
}
I have tried this solution How can I get data from dynamic generated controls in ASP .NET MVC?