I want to load HTML for IP protected website like https://www.homedepot.com/
this website can't be accessed in Pakistan.
and I want to get HTML Source. What should i do?
I am trying with this code given below.
public HtmlAgilityPack.HtmlDocument LoadHtml(string url)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31";
request.AllowAutoRedirect = false;
HtmlAgilityPack.HtmlDocument htmlDoc = null;
//WebProxy myproxy = new WebProxy("198.12.118.99", 80);
//myproxy.BypassProxyOnLocal = false;
request.AllowAutoRedirect = true;
//myproxy.Credentials = new NetworkCredential("dasani", "amazon1!");
//request.Proxy = myproxy;
try
{
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.Load(response.GetResponseStream());
response.Close();
}
catch (WebException ex)
{
ex.ToString();
//MessageBox.Show(ex.ToString());
}
return htmlDoc;
}
Related
I want to try get current DateTime like this :
try {
HttpWebRequest req = (HttpWebRequest) WebRequest.Create("http://www.timestampconvert.com/");
req.Method = "GET";
req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
req.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36";
req.ContentType = "text/html; charset=utf-8";
req.Referer = string.Empty;
req.KeepAlive = true;
req.Timeout = 25000;
//req.Proxy = proxy;
HttpWebResponse res = (HttpWebResponse) req.GetResponse();
Stream Stream = res.GetResponseStream();
StreamReader reader = new StreamReader(Stream);
string reader_str = reader.ReadToEnd();
HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
htmlDoc.LoadHtml(reader_str);
var divs = from div in htmlDoc.DocumentNode.Descendants("div")
select div;
DateTime dt = DateTime.Now;
foreach(var div in divs) {
if (div.Attributes["class"] != null) {
if (div.Attributes["class"].Value == "remarkrow") {
if (div.InnerText.Contains("Computation based on input date")) {
CultureInfo cultureinfo = new CultureInfo("en-US");
dt = Convert.ToDateTime(div.InnerText.Replace("\n\t*)Computation based on input date ", string.Empty), cultureinfo);
dt = dt.AddHours(2).AddMinutes(30);
break;
}
}
}
}
DateTime dt_ = dt;
}
catch(Exception ex) {
MessageBox.Show(ex.ToString());
}
But it has an exception like below :
The request was aborted: Could not create SSL/TLS secure channel
How can i fix this error?
I'm trying to scrape data from the website in my code below. The site requires that I set a session id to proceed to the second page. I'm trying to extract the session id from the first page and add it as a cookie in the request of the second page but this always returns a 302 error. However if I use a web browser and extract the session id via the developer window and hard code this into the second page request it always works. I've used Fiddler, which generated the below but still no luck in solving this.
private static void Main()
{
try
{
HttpWebResponse response;
string sessionId = "";
if (Request_flow_gassco_no(out response))
{
StreamReader sReade1 = new StreamReader(response.GetResponseStream());
string HTM1 = sReade1.ReadToEnd();
sessionId = response.Headers["Set-Cookie"];
response.Close();
}
sessionId = sessionId.Split('=').GetValue(1).ToString().Trim().Split(';').GetValue(0).ToString().Trim();
//s = "4AEEFECB6A59102D0C2F4AC2DBA4362D";
if (Request_flow_gassco_no_disclaimer(out response, sessionId))
{
StreamReader sReade1 = new StreamReader(response.GetResponseStream());
string HTM1 = sReade1.ReadToEnd();
response.Close();
}
}
}
private static bool Request_flow_gassco_no(out HttpWebResponse response)
{
response = null;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://flow.gassco.no/disclaimer");
request.Headers.Add("Upgrade-Insecure-Requests", #"1");
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.9,en;q=0.8");
response = (HttpWebResponse)request.GetResponse();
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ProtocolError) response = (HttpWebResponse)e.Response;
else return false;
}
catch (Exception)
{
if (response != null) response.Close();
return false;
}
return true;
}
private static bool Request_flow_gassco_no_disclaimer(out HttpWebResponse response, string session)
{
response = null;
try
{
Uri target = new Uri("http://flow.gassco.no/");
var cookieContainer = new CookieContainer();
var cookies = new Cookie("JSESSIONID", session) { Domain = target.Host };
cookieContainer.Add(cookies);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://flow.gassco.no/disclaimer/acceptDisclaimer?");
request.CookieContainer = cookieContainer;
request.AllowAutoRedirect = true;
request.Headers.Add("Upgrade-Insecure-Requests", #"1");
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
request.Referer = "http://flow.gassco.no/disclaimer";
request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-GB,en-US;q=0.9,en;q=0.8");
response = (HttpWebResponse)request.GetResponse();
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ProtocolError) response = (HttpWebResponse)e.Response;
else return false;
}
catch (Exception)
{
if (response != null) response.Close();
return false;
}
return true;
}
Is there a difference between a session id returned in a web browser compared to one returned in a httpwebresponse?
Found the answer to this. The steps to get it working were as follows:
Make the initial request and extract the cookie from the response. I used a cookie container for this.
Make a request to the acceptdisclaimer page. Assigned your cookie container from the first request and also set your request to NOT allow auto redirect.
Make a request to the root url again assigning the cookie container. The response returns the expected html.
I hope this helps someone in the future.
What is the value of 's' after this line is executed?
s="JSESSIONID="+s.Split('=').GetValue(1).ToString().Trim().Split(';').GetValue(0).ToString().Trim();
From the looks, I think it will be "JSESSIONID=__utma" which is not what you want.
I want to login to Rockstar Social Club page https://pl.socialclub.rockstargames.com
I have this script
public static void Login()
{
string firstUrl = "https://pl.socialclub.rockstargames.com/profile/signin";
string formParams = string.Format("login-field={0}&password-field={1}", "mynickname", "mypassword");
string cookieHeader;
WebRequest req = WebRequest.Create(firstUrl);
req.ContentType = "application/x-www-form-urlencoded";
req.Method = "POST";
byte[] bytes = Encoding.ASCII.GetBytes(formParams);
req.ContentLength = bytes.Length;
using (Stream os = req.GetRequestStream())
{
os.Write(bytes, 0, bytes.Length);
}
WebResponse resp = req.GetResponse();
cookieHeader = resp.Headers["Set-cookie"];
string pageSource;
string getUrl = "https://pl.socialclub.rockstargames.com/games/gtav/pc/career/overview/gtaonline";
WebRequest getRequest = WebRequest.Create(getUrl);
getRequest.Headers.Add("Cookie", cookieHeader);
WebResponse getResponse = getRequest.GetResponse(); //Here returns me this error: System.Net.WebException: 'An error occurred while sending the request"
using (StreamReader sr = new StreamReader(getResponse.GetResponseStream()))
{
pageSource = sr.ReadToEnd();
}
}
Error occures in WebResponse getResponse = getRequest.GetResponse();
System.Net.WebException: 'An error occurred while sending the request'
I don't know how to repair this, and succesfully login to this website.
I have accomplished what you are attempting to do, but on a different website.
Basically - a few years ago, I wanted to create a website that would track my Guild/Company details on Final Fantasy XIV.
They didn't have an API, so I made one.
In order to get the information I required, I needed to use a mix of HtmlAgilityPack along with the C# WebBrowser control.
In order to pass the verification token stage above, you need to run the page source in a Web Browser control.
This will allow dynamic fields and data to be generated.
You then need to take that data, and submit it with your post data.
This is to fool it into thinking the request is coming from the page.
Be warned, when doing your posts - you may need to allow for redirects and you may need to mirror the referrer and host fields to match the website you are emulating.
The specific process I followed was:
Navigate to login page in WebBrowser control
Get page source
Load into HtmlAgilityPack HtmlDocument class
Use XPath to scrape the login form.
Take _verification tokens, csrf tokens etc make note of them.
Post a web-request with the necessary data to the form target destination url.
Read the response
Be aware - sometimes the response will actually be html code that tells it to do a Javascript redirect - in my case with Final Fantasy XIV - it was loading up another form and performing an autopost on page load.
You will also want to use
LoggedInCookies = new CookieContainer();
In your first HttpWebRequest
followed by:
request.CookieContainer = LoggedInCookies;
for each subsequent request.
The cookie container will trap and persist the authentication related cookies, while the WebBrowser control and HtmlAgilityPack will allow you to scrape the fields from the web forms that you need to break through.
Adding some code from wayback when I solved this for Final Fantasy XIV's lodestone website.
This code is very old and may not work anymore, but the process it follows could be adapted for sites that do not use Javascript as part of the login process.
Pay attention to the areas where it allows the request to be redirected, this is because the Server endpoint you are calling may do Action redirects etc
If your request does not allow those redirects, then it will not be emulating the login process.
class LoggedInClient
{
public static CookieContainer LoginCookie(string user, string pass)
{
string sStored = "";
string url = "http://eu.finalfantasyxiv.com/lodestone/account/login/";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
CookieContainer cookies = new CookieContainer();
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36";
request.CookieContainer = cookies;
HttpWebResponse response1 = (HttpWebResponse)request.GetResponse();
Console.WriteLine(cookies.Count.ToString());
string sPage = "";
using (var vPage = new StreamReader(response1.GetResponseStream()))
{
sPage = vPage.ReadToEnd();
}
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(sPage);
sStored = doc.DocumentNode.SelectSingleNode("//input[#type='hidden' and #name='_STORED_']").Attributes["value"].Value;
string param = "sqexid="+user+"8&password="+pass+"&_STORED_=" + sStored;
string postURL = doc.DocumentNode.SelectSingleNode("//form[#name='mainForm']").Attributes["action"].Value;
//Console.WriteLine(sStored);
postURL = "https://secure.square-enix.com/oauth/oa/" + postURL;
request.Method = "POST";
byte[] paramAsBytes = Encoding.Default.GetBytes(param);
request = (HttpWebRequest)WebRequest.Create(postURL);
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "POST";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36";
request.CookieContainer = cookies;
request.AllowAutoRedirect = false;
try
{
using (Stream stream = request.GetRequestStream())
{
stream.Write(paramAsBytes, 0, paramAsBytes.Length);
}
}
catch (Exception ee)
{
Console.WriteLine(ee.ToString());
}
string sGETPage = "";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (var vPage = new StreamReader(response.GetResponseStream()))
{
sPage = vPage.ReadToEnd();
sGETPage = response.Headers["Location"];
}
}
// Console.WriteLine(sPage);
request = (HttpWebRequest)WebRequest.Create(sGETPage);
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36";
request.CookieContainer = cookies;
HttpWebResponse response2 = (HttpWebResponse)request.GetResponse();
Console.WriteLine(cookies.Count.ToString());
sPage = "";
using (var vPage = new StreamReader(response2.GetResponseStream()))
{
sPage = vPage.ReadToEnd();
}
// Console.WriteLine(sPage);
doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(sPage);
string _c = doc.DocumentNode.SelectSingleNode("//input[#type='hidden' and #name='_c']").Attributes["value"].Value;
string cis_sessid = doc.DocumentNode.SelectSingleNode("//input[#type='hidden' and #name='cis_sessid']").Attributes["value"].Value;
string action = doc.DocumentNode.SelectSingleNode("//form[#name='mainForm']").Attributes["action"].Value;
string sParams = "_c=" + _c + "&cis_sessid=" + cis_sessid;
byte[] bData = Encoding.Default.GetBytes(sParams);
// Console.WriteLine(sStored);
request = (HttpWebRequest)WebRequest.Create(action);
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "POST";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36";
request.CookieContainer = cookies;
request.AllowAutoRedirect = true;
try
{
using (Stream stream = request.GetRequestStream())
{
stream.Write(bData, 0, bData.Length);
}
}
catch (Exception ee)
{
Console.WriteLine(ee.ToString());
}
string nextPage = "";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (var vPage = new StreamReader(response.GetResponseStream()))
{
nextPage = vPage.ReadToEnd();
}
}
// Console.WriteLine(nextPage);
doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(nextPage);
string csrf_token = doc.DocumentNode.SelectSingleNode("//input[#type='hidden' and #name='csrf_token']").Attributes["value"].Value;
string cicuid = "51624738";
string timestamp = Convert.ToInt32(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalSeconds).ToString() + "100";
action = "http://eu.finalfantasyxiv.com/lodestone/api/account/select_character/";
sParams = "csrf_token=" + csrf_token + "&cicuid=" + cicuid + "×tamp=" + timestamp;
bData = Encoding.Default.GetBytes(sParams);
request = (HttpWebRequest)WebRequest.Create(action);
request.ContentType = "application/x-www-form-urlencoded";
request.Method = "POST";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36";
request.CookieContainer = cookies;
request.AllowAutoRedirect = true;
try
{
using (Stream stream = request.GetRequestStream())
{
stream.Write(bData, 0, bData.Length);
}
}
catch (Exception ee)
{
Console.WriteLine(ee.ToString());
}
nextPage = "";
using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
{
using (var vPage = new StreamReader(response.GetResponseStream()))
{
nextPage = vPage.ReadToEnd();
}
}
return cookies;
}
}
Everything Working Properly but we click on submitData() button i will show me login page instead of myhomepage,
so very first I navigate login page it will set the cookie, then cookie stored in cookie container code is
CookieContainer cookieJar = new CookieContainer();
public Form1()
{
String captcha;
InitializeComponent();
}
private void BUTTON_LOGIN_Click(object sender, EventArgs e)
{
//Get captcha
var request = (HttpWebRequest)HttpWebRequest.Create("https://www.irctc.co.in/eticketing/loginHome.jsf");
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
request.KeepAlive = true;
request.CookieContainer = cookieJar;
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
var response = request.GetResponse();
using (var stream = response.GetResponseStream())
{
}
// need array, different storage for every cookies
//
foreach (Cookie c in cookieJar.GetCookies(response.ResponseUri))
{
request.CookieContainer.Add(c);
MessageBox.Show("Cookie['" + c.Name + "']: " + c.Value);
}
}
Then I request for captcha using cookie
private void pictureBox1_Click(object sender, EventArgs e) // This Event Refresh thr captcha
{
//Get captch
var request =(HttpWebRequest)HttpWebRequest.Create("https://www.irctc.co.in/eticketing/captchaImage");
request.Accept = "image/webp,image/*,*/*;q=0.8";
WebHeaderCollection myWebHeaderCollection = request.Headers;
myWebHeaderCollection.Add("Accept-Language", "en-US;q=0.8");
myWebHeaderCollection.Add("Upgrade-Insecure-Requests", "1");
request.KeepAlive = true;
Uri target = new Uri("https://www.irctc.co.in");
cookieJar.Add(new Cookie("language", "en_IN") { Domain = target.Host });
request.CookieContainer = cookieJar;
request.Referer = "https://www.irctc.co.in/eticketing/loginHome.jsf";
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
var response = request.GetResponse();
using (var stream = response.GetResponseStream())
{
pictureBox1.Image = Bitmap.FromStream(stream);
}
// need array, different storage for every cookies
//
foreach (Cookie c in cookieJar.GetCookies(response.ResponseUri))
{
request.CookieContainer.Add(c);
MessageBox.Show("Cookie['" + c.Name + "']: " + c.Value);
}
}
private void button1_Click(object sender, EventArgs e)
{
submitData();
}
private void displayHtml(String html)
{
webBrowser1.Navigate("about:blank");
while (webBrowser1.Document == null || webBrowser1.Document.Body == null)
Application.DoEvents();
webBrowser1.Document.OpenNew(true).Write(html);
}
And finally I submit data after filling captcha to text box and here is error this is redirect me login page again so whats going wrong.
private void submitData()
{
try
{
String user = "myusername";
String pass = "mypassword";
String submit = "Submit";
String captcha=richTextBox1.Text.ToString() ;
ASCIIEncoding encoding = new ASCIIEncoding();
// MessageBox.Show(captcha);
string postData = "j_username=" + user + "&j_password=" + pass + "&j_captcha=" + captcha + "&submit=" + submit + "&tneg=" ;
byte[] data = Encoding.UTF8.GetBytes(postData);
Uri target = new Uri("https://www.irctc.co.in");
cookieJar.Add(new Cookie("language", "en_IN"){ Domain = target.Host });
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create("https://www.irctc.co.in/eticketing/home");
request.CookieContainer = cookieJar;
WebHeaderCollection myWebHeaderCollection = request.Headers;
myWebHeaderCollection.Add("Accept-Language", "en-US;q=0.8");
myWebHeaderCollection.Add("Upgrade-Insecure-Requests", "1");
request.Method = "POST";
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
request.ContentType = "application/x-www-form-urlencode";
request.Referer = "https://www.irctc.co.in/eticketing/loginHome.jsf";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
request.ServicePoint.Expect100Continue = false;
// var bytes = Encoding.UTF8.GetBytes("action=login&from=http%3A%2F%2Fwww.dofus.com%2Ffr&login=user123&password=password1232F&remember=1");
// request.ContentLength = bytes.Length;
request.ContentLength = data.Length;
request.KeepAlive = true;
// request.AutomaticDecompression = DecompressionMethods.GZip;
// request.AutomaticDecompression = DecompressionMethods.Deflate;
// request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate, br");
// request.SendChunked = true;
// request.TransferEncoding = "gzip, deflate, br";
request.Host = "www.irctc.co.in";
//request.Headers.Add
Stream stream = request.GetRequestStream();
stream.Write(data, 0, data.Length);
stream.Close();
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
stream = response.GetResponseStream();
foreach (Cookie c in cookieJar.GetCookies(request.RequestUri))
{
MessageBox.Show("Cookie['" + c.Name + "']: " + c.Value);
}
StreamReader sr = new StreamReader(stream);
//richTextBox1.Text = sr.ReadToEnd();
String myhtml = sr.ReadToEnd().ToString();
displayHtml(myhtml);
sr.Close();
stream.Close();
}
catch (Exception ex)
{
MessageBox.Show("Error : " + ex.Message);
}
}
In the method submitData()
request.ContentType = "application/x-www-form-urlencode"
here is a typo, you have missed one 'd'
it should be like this :-
request.ContentType = "application/x-www-form-urlencoded"
ContentType is "application/x-www-form-urlencoded".
If the server is unable to understand the form data type(in this case "application/x-www-form-urlencode" is unknown to the server),then it will reject the request and redirects the user to login page as you mentioned.
You can also check Form ContentType in HTML Forms(w3.org) for more information.
Hope this helps :)
I am trying to download YouTube HTML in my ASP.Net Application with :
string downloaded = string.Empty;
try
{
String url = String.Format(YouTubeDownloadedVideo, videoID);
WebClient Client = new WebClient();
downloaded = Client.DownloadString(url);
}
catch { }
But i don't get the full HTML, i try the same code in a Console application that i created and it's work perfectly.
Any idea what can be the problem?
Instead of using the webclient you can just use webresponse if you only need the HTML in a string.
private string GetWebPage(string URL)
{
string strHTMLPage = "";
try
{
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(URL);
webRequest.UserAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)";
webRequest.Method = "GET";
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
if (webResponse != null)
{
StreamReader oSR = new StreamReader(webResponse.GetResponseStream());
strHTMLPage = oSR.ReadToEnd();
oSR.Close();
}
}
catch (Exception e)
{
strHTMLPage = "";
}
return strHTMLPage;
}