How to implement SSL with HttpWebRequest? - c#

I'm going to get information from https web pages. But the downloaded data is coded and illegible.
\u001f�\b\0\0\0\0\0\0\0�iw۸�0�Y�\u0015\u0018��(y�z��,v���rܝ\u0019�v�N/�s��P\",3�H]��r��\u007f\u007fQ�\b���N���\u0011Il�B��\0\u0014�^�#�ŋ\u0017����\u007f���Gt��\u0003t����S\u001f9k\u001b\u001b\u007fl�76\u000e/\u000fџ�^~>F[��2v��O�(t����'\u000er��t���qww�~��\u001eţ���{�k\v\n�ǵT)��}�\u001eR�\a�^�E��\u0018�.����?���\a�,vGc�A�(Lq�~p�hm�\u000e���a�߇��>�\u001dGA�b\u001f�'~�\u0013���\u0016�\n\bB�/��\K\u001f&X)���t\u0003z����n���×ˣ��\u000e\"�YkjOh\u001f۲��\0�]\���\rY?\u001b\u0015u�����%��\0w�o��{����\u000fo�u��>8��\u001b�$�\b\"��_\lv6�v6�ެ�o\u000e�q��I҇\0'�
How to get the SSL certificate into the HttpWebRequest class?
class MyHttpWebRequest : IDisposable
{
private static int Count = 0;
private HttpWebRequest request;
private Stream dataStream;
private string Status { get; set; }
private string userAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36";
private string accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
private string acceptLanguage = "Accept-Language: en-US,en;q=0.9";
private string acceptEncoding = "Accept-Encoding: gzip, deflate, br";
private string host;
private X509Certificate2 clientCertificates;
private String referer { get; set; } = "";
private CookieCollection cookieCollection { get; set; } = null;
public MyHttpWebRequest()
{
Count++;
clientCertificates = new X509Certificate2(#"mahan.cer");
//clientCertificates = X509Certificate.CreateFromCertFile(#"mahan.cer");//(#"LocalAuthority.crt");
//ServicePointManager.Expect100Continue = true;
//ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3;
//ServicePointManager.Expect100Continue = false;
//ServicePointManager.SecurityProtocol = SecurityProtocolType.Ssl3 | SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12;
//WebRequestHandler handler = new WebRequestHandler();
//X509Certificate2 certificate = GetMyX509Certificate();
//handler.ClientCertificates.Add(certificate);
//HttpClient client = new HttpClient(handler);
}
public MyHttpWebRequest(string host) : this()
{
this.host = host;
}
~MyHttpWebRequest()
{
Count--;
}
public string GetResponse()
{
try
{
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if (cookieCollection == null) cookieCollection = new CookieCollection();
cookieCollection.Add(response.Cookies);
this.Status = response.StatusDescription;
dataStream = response.GetResponseStream();
StreamReader reader = new StreamReader(dataStream);
string responseFromServer = reader.ReadToEnd();
// Clean up the streams.
reader.Close();
//dataStream.Close();
response.Close();
//clientCertificates = new X509Certificate2(request.ServicePoint.Certificate);
this.Status = "Successful";
return responseFromServer;
}
catch (Exception ex)
{
this.Status = ex.Message;
return ex.ToString();
}
}
public string SetRequest(string url)
{
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.UserAgent = userAgent;
request.Accept = accept;
request.Headers.Add(acceptLanguage);
request.Headers.Add(acceptEncoding);
request.KeepAlive = true;
request.CookieContainer = new CookieContainer();
if (cookieCollection != null)
request.CookieContainer.Add(cookieCollection);
request.ProtocolVersion = HttpVersion.Version11;
request.AllowAutoRedirect = false;
request.ContentType = "application/x-www-form-urlencoded";
request.Referer = referer;
referer = url;
request.Host = host;
//request.ClientCertificates.Clear();
request.ClientCertificates.Add(clientCertificates);
request.PreAuthenticate = true;
this.Status = "Successful";
}
catch (Exception ex)
{
this.Status = ex.ToString();
}
return this.ToString();
}
public string SetRequest(string url, string method)
{
try
{
request = (HttpWebRequest)WebRequest.Create(url);
if (method.Equals("GET") || method.Equals("POST"))
{
request.Method = method;
}
else
{
throw new Exception("Invalid Method Type");
}
request.UserAgent = userAgent;
request.Accept = accept;
request.Headers.Add(acceptLanguage);
request.Headers.Add(acceptEncoding);
request.KeepAlive = true;
request.CookieContainer = new CookieContainer();
if (cookieCollection != null)
request.CookieContainer.Add(cookieCollection);
request.ProtocolVersion = HttpVersion.Version11;
request.AllowAutoRedirect = false;
request.ContentType = "application/x-www-form-urlencoded";
request.Referer = referer;
referer = url;
request.Host = host;
//request.ClientCertificates.Clear();
request.ClientCertificates.Add(clientCertificates);
request.PreAuthenticate = true;
this.Status = "Successful";
}
catch (Exception ex)
{
this.Status = ex.ToString();
}
return this.ToString();
}
public string SetRequest(string url, string method, string data)
{
try
{
request = (HttpWebRequest)WebRequest.Create(url);
if (method.Equals("GET") || method.Equals("POST"))
{
request.Method = method;
}
else
{
throw new Exception("Invalid Method Type");
}
request.UserAgent = userAgent;
request.Accept = accept;
request.Headers.Add(acceptLanguage);
request.Headers.Add(acceptEncoding);
request.KeepAlive = true;
request.CookieContainer = new CookieContainer();
if (cookieCollection != null)
request.CookieContainer.Add(cookieCollection);
request.ProtocolVersion = HttpVersion.Version11;
request.AllowAutoRedirect = false;
request.ContentType = "application/x-www-form-urlencoded";
request.Referer = referer;
referer = url;
request.Host = host;
//request.ClientCertificates.Clear();
request.ClientCertificates.Add(clientCertificates);
request.PreAuthenticate = true;
byte[] byteArray = Encoding.UTF8.GetBytes(data);
request.ContentLength = byteArray.Length;
dataStream = request.GetRequestStream();
dataStream.Write(byteArray, 0, byteArray.Length);
dataStream.Close();
this.Status = "Successful";
}
catch (Exception ex)
{
this.Status = ex.Message;
}
return this.Status;
}
public string SetRequest(string url, string method, string data, string contentType)
{
try
{
request = (HttpWebRequest)WebRequest.Create(url);
if (method.Equals("GET") || method.Equals("POST"))
{
request.Method = method;
}
else
{
throw new Exception("Invalid Method Type");
}
request.UserAgent = userAgent;
request.Accept = accept;
request.Headers.Add(acceptLanguage);
request.Headers.Add(acceptEncoding);
request.KeepAlive = true;
request.CookieContainer = new CookieContainer();
if (cookieCollection != null)
request.CookieContainer.Add(cookieCollection);
request.ProtocolVersion = HttpVersion.Version11;
request.AllowAutoRedirect = false;
request.ContentType = contentType;
request.Referer = referer;
referer = url;
request.Host = host;
//request.ClientCertificates.Clear();
request.ClientCertificates.Add(clientCertificates);
request.PreAuthenticate = true;
byte[] byteArray = Encoding.UTF8.GetBytes(data);
request.ContentLength = byteArray.Length;
dataStream = request.GetRequestStream();
dataStream.Write(byteArray, 0, byteArray.Length);
dataStream.Close();
this.Status = "Successful";
}
catch (Exception ex)
{
this.Status = ex.Message;
}
return this.Status;
}
public void Dispose()
{
request.Abort();
request = null;
dataStream.Close();
dataStream.Dispose();
dataStream = null;
}
}

How to get the SSL certificate into the HttpWebRequest class?
Your problem is not related to SSL at all.
private string acceptEncoding = "Accept-Encoding: gzip, deflate, br";
...
request.Headers.Add(acceptEncoding);
With this code you explicitly tell the server that you'll support various content compression algorithms. Only, this is a lie since you don't deal with compression when reading the response.
\u001f�\b\0\0\0\0\0\0\0
This looks the beginning of a gzip data stream. It starts with hex \x1f\x8b but you interpret this wrongly as UTF-8 which it is not.
The easiest way to fix this part is to remove the Accept-Encoding from your request or use Accept-Encoding: identity to signal that you don't accept any compression. This way a well behaving server will send you the body without any compression.

HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://yourwebsite.nld");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
response.Close();
X509Certificate certV1 = request.ServicePoint.Certificate;
Certificate is a X509Certificate not a X509Certificate2.
To convert add
X509Certificate2 certV2 = new X509Certificate2(certV1);

Related

c# HttpWebResponse just work with Italy IP

I want call API with HttpWebRequest but just with Italy IP return "OK". With other countries IP return "not implemented".
What can I do to work with IP of all countries?
private void MakeRequests()
{
HttpWebResponse response;
if (Request_onlineplus_mofid_com(out response))
{
using (Stream responseStream = response.GetResponseStream())
{
StreamReader reader = new StreamReader(responseStream, Encoding.UTF8);
WriteToLog(reader.ReadToEnd());
label1.Text = response.StatusCode.ToString();
}
response.Close();
}
}
private bool Request_onlineplus_mofid_com(out HttpWebResponse response)
{
response = null;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://******.com/Customer/SendOrder");
request.UserAgent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0";
request.Accept = "*/*";
request.Headers.Set(HttpRequestHeader.AcceptLanguage, "en-US,en;q=0.5");
request.Headers.Set(HttpRequestHeader.AcceptEncoding, "gzip, deflate, br");
request.Headers.Add("X-Requested-With", #"XMLHttpRequest");
request.ContentType = "application/json";
request.Headers.Add("Origin", #"https://********.com");
request.KeepAlive = true;
request.Referer = "https://*******.com/Home/Default/page-1";
request.Headers.Set(HttpRequestHeader.Cookie, #"GuidedTourVersion=1; SiteVersion=3.7.4; _ga=GA1.2.1641357646.1586548762; _gid=GA1.2.898069884.1586548762; crisp-client%2Fsession%2Fe95056ad-2681-452d-976d-0c2a304165c9=session_747ab3aa-5e67-4895-bf1f-1ebc4228674d; Token=cffb7683-749f-4778-95f1-445f30844f24; silverse=caiqiybzlgduowvc4bfpaip0; .ASPXAUTH=8BB49592D10170B8AC065895D255222E76D2D7D5B7F60DFE945F4E401568D8C0261BA121CA9D99FC95121378A7F0F7F94E11557B8D127A58D822802D70700CFB4872C885A45F969A514731895DE691040C961713A44634ADF984900888BA8F8ADF7AF0FD7BA1A8BAD1F5F5AABD68A3CD611686198DD4AB5BBF0D3200618169D8");
request.SendChunked = true;
request.TransferEncoding = "Trailers";
request.Method = "POST";
request.ServicePoint.Expect100Continue = false;
string body = #"{""IsSymbolCautionAgreement"":false,""CautionAgreementSelected"":false,""IsSymbolSepahAgreement"":false,""SepahAgreementSelected"":false,""orderCount"":1,""orderPrice"":52570,""FinancialProviderId"":1,""minimumQuantity"":"""",""maxShow"":0,""orderId"":0,""isin"":""IRO1GARN0001"",""orderSide"":65,""orderValidity"":74,""orderValiditydate"":null,""shortSellIsEnabled"":false,""shortSellIncentivePercent"":0}";
byte[] postBytes = System.Text.Encoding.UTF8.GetBytes(body);
request.ContentLength = postBytes.Length;
Stream stream = request.GetRequestStream();
stream.Write(postBytes, 0, postBytes.Length);
stream.Close();
response = (HttpWebResponse)request.GetResponse();
}
catch (WebException e)
{
if (e.Status == WebExceptionStatus.ProtocolError) response = (HttpWebResponse)e.Response;
else return false;
}
catch (Exception)
{
if (response != null) response.Close();
return false;
}
return true;
}
The above codes are generated by fiddler.
this api with restsharp always retuen 401. but in postman retuen 200.

C# HttpWebRequest Use cookie

I have a little problem with cookie handling in C#
So on my web site, I have a login page, once logged in, I am redirected to the home page. I get with HttpWebRequest to connect and follow the redirection, I created a class, here it is :
class webReq
{
private string urlConnection;
private string login;
private string password;
private CookieCollection cookieContainer;
private long executionTime = 0;
public webReq(string urlCo, string login, string pass)
{
this.urlConnection = urlCo;
this.login = login;
this.password = pass;
this.cookieContainer = null;
}
public void StartConnection()
{
string WriteHTML = "D:/REM/Connection.html";
List<string> datas = new List<string>();
datas.Add("Username=" + this.login);
datas.Add("Password=" + this.password);
datas.Add("func=ll.login");
datas.Add("NextURL=/admin/livelink.exe");
datas.Add("loginbutton=Sign in");
string postData = "";
postData = string.Join("&", datas);
var buffer = Encoding.ASCII.GetBytes(postData);
try
{
var watch = System.Diagnostics.Stopwatch.StartNew();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(this.urlConnection);
request.AllowAutoRedirect = true;
request.Method = "POST";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1003.1 Safari/535.19";
request.Accept = "text/html, application/xhtml+xml, */*";
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = buffer.Length;
request.CookieContainer = new CookieContainer();
Stream stream = request.GetRequestStream();
stream.Write(buffer, 0, buffer.Length);
stream.Close();
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
stream = response.GetResponseStream();
watch.Stop();
this.executionTime = watch.ElapsedMilliseconds;
StreamReader reader = new StreamReader(stream);
System.IO.File.WriteAllText(WriteHTML, reader.ReadToEnd());
this.cookieContainer = new CookieCollection();
foreach (Cookie cookie in response.Cookies)
{
this.cookieContainer.Add(cookie);
}
}
catch (WebException ex)
{
Console.WriteLine(ex.GetBaseException().ToString());
}
}
}
I load the home page well, and I manage to get a cookie.
So I developed a function to use my cookie to browse the website :
public void connectUrl(string url, int numeroTest)
{
string WriteHTML = "D:/REM/Page"+numeroTest+".html";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
//Add cookie to request.CookieContainer
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(this.cookieContainer);
var watch = System.Diagnostics.Stopwatch.StartNew();
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream stream = response.GetResponseStream();
watch.Stop();
this.executionTime = watch.ElapsedMilliseconds;
StreamReader reader = new StreamReader(stream);
System.IO.File.WriteAllText(WriteHTML, reader.ReadToEnd());
}
Normally, I have to retrieve three cookies, like on the website :
Only, I can't navigate on the website, I end up on the login page, the cookies are not good, and that I'm in debug, I only loaded one cookie(BrowseSettings) out of the three(LLCookie & LLTZCookie) :
I don't understand why I can't retrieve all the cookies on the website.... If anyone has a solution!
I found the reason why I can't get all the cookies, even if I can't find exactly why it works by disabling redirection, in my StartConnection() method :
request.AllowAutoRedirect = true;

How to shim HttpWebRequest Headers?

I am trying to Shim the following code:
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Method = "GET";
request.Headers.Add("Authorization", "Bearer " + authToken.token.access_token);
request.Accept = "application/json";
But running the Unit Test throws an exception in this part: request.Headers.Add() because request.Headers is null. This, in spite of initializing Headers in my test:
ShimHttpWebRequest request = new ShimHttpWebRequest();
ShimWebRequest.CreateString = (urio) => {
request.Instance.Headers = new WebHeaderCollection {
{"Authorization", "Bearer abcd1234"}
};
//also tried initilizing it like this:
//WebHeaderCollection headers = new WebHeaderCollection();
//headers[HttpRequestHeader.Authorization] = "Bearer abcd1234";
//request.Instance.Headers = headers;
return request.Instance;
};
But request.Instance.Headers is still null.
What am I missing?
I solved this by creating a getter for Headers so that it would return a WebHeaderCollection instead of null.
ShimHttpWebRequest request = new ShimHttpWebRequest();
ShimWebRequest.CreateString = (urio) => request.Instance;
request.HeadersGet = () => {
WebHeaderCollection headers = new WebHeaderCollection();
headers.Add("Authorization", "Bearer abcd1234");
return headers;
};
I solved this by instantiating Header property of ShimHttpWebRequest as follows,
var httpWebRequest = new ShimHttpWebRequest() { HeadersGet = () => new WebHeaderCollection() };
ShimWebRequest.CreateString = (arg1) => httpWebRequest.Instance;
This is my code, you can try it:
public static string HttpPostWebRequest(string requestUrl, int timeout, string requestXML, bool isPost, string encoding, out string msg)
{
msg = string.Empty;
string result = string.Empty;
try
{
byte[] bytes = System.Text.Encoding.GetEncoding(encoding).GetBytes(requestXML);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(requestUrl);
request.ContentType = "application/x-www-form-urlencoded";
request.Referer = requestUrl;
request.Method = isPost ? "POST" : "GET";
request.ContentLength = bytes.Length;
request.Timeout = timeout * 1000;
using (Stream requestStream = request.GetRequestStream())
{
requestStream.Write(bytes, 0, bytes.Length);
requestStream.Close();
}
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
if (responseStream != null)
{
StreamReader reader = new StreamReader(responseStream, System.Text.Encoding.GetEncoding(encoding));
result = reader.ReadToEnd();
reader.Close();
responseStream.Close();
request.Abort();
response.Close();
return result.Trim();
}
}
catch (Exception ex)
{
msg = ex.Message + ex.StackTrace;
}
return result;
}

Login to instagram programmatically

I am trying to log in to instagram using web requests. I am having a bad time understanding what's going on. Getting this: The remote server returned an error: (403) Forbidden. What I have so far:
public static string csrf;
CookieContainer c1 = new CookieContainer();
private void button1_Click(object sender, EventArgs e)
{
string PostData = String.Format("csrfmiddlewaretoken={0}&username=ra123&password=ra12345678",getToken());
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("https://instagram.com/accounts/login/");
req.Method = "POST";
req.ContentType = "application/x-www-form-urlencoded";
req.KeepAlive = true;
req.AllowAutoRedirect = true;
req.CookieContainer = c1;
byte[] byteArray = Encoding.ASCII.GetBytes(PostData);
req.ContentLength = byteArray.Length;
Stream dataStream = req.GetRequestStream();
dataStream.Write(byteArray, 0, byteArray.Length);
dataStream.Flush();
dataStream.Close();
HttpWebResponse webResp = (HttpWebResponse)req.GetResponse();
Stream datastream = webResp.GetResponseStream();
StreamReader reader = new StreamReader(datastream);
string s = reader.ReadToEnd();
MessageBox.Show(s);
if (s.Contains("ra123"))
{
MessageBox.Show("Loggedin");
}
else
{
MessageBox.Show("Not");
}
}
string getToken()
{
string p = "<input type=\"hidden\" name=\"csrfmiddlewaretoken\" value=\"(.*)\"/>";
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("https://instagram.com/accounts/login/");
req.Method = "GET";
req.CookieContainer = c1;
HttpWebResponse resp = (HttpWebResponse)req.GetResponse();
Stream data = resp.GetResponseStream();
StreamReader sr = new StreamReader(data);
string src = sr.ReadToEnd();
Match m = Regex.Match(src, p);
if (m.Success)
{
return (m.Groups[1].Value.ToString());
}
return false.ToString();
}
The problem with the login is that the request needs to set the cookie at the header, and the container is not setting it since is changes at every login when you access from an unknown explorer. Here is what you can do:
WebResponse Response;
HttpWebRequest Request;
Uri url = new Uri("http://thewebpage.com:port/login/");
CookieContainer cookieContainer = new CookieContainer();
Request = (HttpWebRequest)WebRequest.Create(url);
Request.Method = "GET";
Request.CookieContainer = cookieContainer;
// Get the first response to obtain the cookie where you will find the "csrfmiddlewaretoken" value
Response = Request.GetResponse();
string Parametros = "csrfmiddlewaretoken=" + cookieContainer.GetCookies(url)["csrftoken"].Value + "&username=USER&password=PASSWORD&next="; // This whill set the correct url to access
Request = (HttpWebRequest)WebRequest.Create(url); // it is important to use the same url used for the first request
Request.Method = "POST";
Request.ContentType = "application/x-www-form-urlencoded";
Request.UserAgent = "Other";
// Place the cookie container to obtain the new cookies for further access
Request.CookieContainer = cookieContainer;
Request.Headers.Add("Cookie",Response.Headers.Get("Set-Cookie")); // This is the most important step, you have to place the cookies at the header (without this line you will get the 403 Forbidden exception
byte[] byteArray = Encoding.UTF8.GetBytes(Parametros);
Request.ContentLength = byteArray.Length;
Stream dataStream = Request.GetRequestStream();
dataStream.Responseite(byteArray, 0, byteArray.Length);
dataStream.Close();
Response = Request.GetResponse();
FYI, this won't solve your problem, but you need to learn to place your Stream and other objects that implement IDisposable into using blocks:
public static string csrf;
CookieContainer c1 = new CookieContainer();
private void button1_Click(object sender, EventArgs e)
{
string PostData = String.Format("csrfmiddlewaretoken={0}&username=ra123&password=ra12345678", getToken());
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("https://instagram.com/accounts/login/");
req.Method = "POST";
req.ContentType = "application/x-www-form-urlencoded";
req.KeepAlive = true;
req.AllowAutoRedirect = true;
req.CookieContainer = c1;
byte[] byteArray = Encoding.ASCII.GetBytes(PostData);
req.ContentLength = byteArray.Length;
using (Stream dataStream = req.GetRequestStream())
{
dataStream.Write(byteArray, 0, byteArray.Length);
dataStream.Flush();
dataStream.Close();
}
string s;
using (HttpWebResponse webResp = (HttpWebResponse)req.GetResponse())
{
using (Stream datastream = webResp.GetResponseStream())
{
using (StreamReader reader = new StreamReader(datastream))
{
s = reader.ReadToEnd();
}
}
}
MessageBox.Show(s);
if (s.Contains("ra123"))
{
MessageBox.Show("Loggedin");
}
else
{
MessageBox.Show("Not");
}
}
string getToken()
{
string p = "<input type=\"hidden\" name=\"csrfmiddlewaretoken\" value=\"(.*)\"/>";
HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create("https://instagram.com/accounts/login/");
req.Method = "GET";
req.CookieContainer = c1;
string src;
using (HttpWebResponse resp = (HttpWebResponse)req.GetResponse())
{
using (Stream data = resp.GetResponseStream())
{
using (StreamReader sr = new StreamReader(data))
{
src = sr.ReadToEnd();
}
}
}
Match m = Regex.Match(src, p);
if (m.Success)
{
return (m.Groups[1].Value.ToString());
}
return false.ToString();
}

different view source content in browser and from httpwebrequest in c#

I am trying to scrape content from this page: https://www.google.com/search?hl=en&biw=1920&bih=956&tbm=shop&q=Xenon+12640&oq=Xenon+12640&aq=f&gs_l=serp.3...3743.3743.0.3905.1.1.0.0.0.0.0.0..0.0.ekh..0.0.Hq3XS7AxFDU&sei=Dr_MT_WOM6nO2AWE25mTCA&gbv=2
The problem I am experiencing is that opening that url in a browser I get everything I need to scrape but scraping the same link in the code, two (important) pieces are missing, the reviews number and the ratings, below the price and the seller info.
Here is the screenshot from the internal web client in c#: http://gyazo.com/908a37c7f70712fba1f82ec90a604d4d.png?1338822369
Here is the code with which I am trying to get the content:
public string navGet(string inURL, CookieContainer inCookieContainer, bool GZip, string proxyAddress, int proxyPort,string proxyUserName, string proxyPassword)
{
try
{
this.currentUrl = inURL;
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(inURL);
webRequest.Timeout = this.TimeOutSetting;
webRequest.CookieContainer = inCookieContainer;
if (proxyAddress == "0" || proxyPort == 0)
{ }
else
{
webRequest.Proxy = new WebProxy(proxyAddress, proxyPort);
// Use login credentials to access proxy
NetworkCredential networkCredential = new NetworkCredential(proxyUserName, proxyPassword);
webRequest.Proxy.Credentials = networkCredential;
}
Uri destination = webRequest.Address;
webRequest.KeepAlive = true;
webRequest.Method = "GET";
webRequest.Accept = "*/*";
webRequest.Headers.Add("Accept-Language", "en-us");
if (GZip)
{
webRequest.Headers.Add("Accept-Encoding", "gzip, deflate");
}
webRequest.AllowAutoRedirect = true;
webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
webRequest.ContentType = "text/xml";
//webRequest.CookieContainer.Add(inCookieContainer.GetCookies(destination));
try
{
string strSessionID = inCookieContainer.GetCookies(destination)["PHPSESSID"].Value;
webRequest.Headers.Add("Cookie", "USER_OK=1;PHPSESSID=" + strSessionID);
}
catch (Exception ex2)
{
}
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
if (webRequest.HaveResponse)
{
// First handle cookies
foreach(Cookie retCookie in webResponse.Cookies)
{
bool cookieFound = false;
foreach(Cookie oldCookie in inCookieContainer.GetCookies(destination))
{
if (retCookie.Name.Equals(oldCookie.Name))
{
oldCookie.Value = retCookie.Value;
cookieFound = true;
}
}
if (!cookieFound)
inCookieContainer.Add(retCookie);
}
// Read response
Stream responseStream = responseStream = webResponse.GetResponseStream();
if (webResponse.ContentEncoding.ToLower().Contains("gzip"))
{
responseStream = new GZipStream(responseStream, CompressionMode.Decompress);
}
else if (webResponse.ContentEncoding.ToLower().Contains("deflate"))
{
responseStream = new DeflateStream(responseStream, CompressionMode.Decompress);
}
StreamReader stream = new StreamReader(responseStream, System.Text.Encoding.Default);
string responseString = stream.ReadToEnd();
stream.Close();
this.currentUrl = webResponse.ResponseUri.ToString();
this.currentAddress = webRequest.Address.ToString();
setViewState(responseString);
return responseString;
}
throw new Exception("No response received from host.");
return "An error was encountered";
}
catch(Exception ex)
{
//MessageBox.Show("NavGet:" + ex.Message);
return ex.Message;
}
}
Looks like it happens because the reviews number and the ratings are generated dynamically using Java Script (probably AJAX or something else). In this case you need to analyze additional traffic that takes place when the page is loaded in the browser and find where this data is transfered or analize JavaScript code to see how it's generated.

Categories

Resources