Error with C# Web crawler - c#

Can some one please assist me with this webcrawler, I keep getting the error :
Cannot implicitly convert type
'System.Collections.Generic.ISt' to 'string.
This error is in line where it is String Links = GetNewLinks(Rstring);, can someone please help, here is my code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace Crawler
{
public partial class Crawler : Form
{
String Rstring;
public Crawler()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
WebRequest myWebRequest;
WebResponse myWebResponse;
String URL = txt1.Text;
myWebRequest = WebRequest.Create(URL);
myWebResponse = myWebRequest.GetResponse();
Stream streamResponse = myWebResponse.GetResponseStream();
StreamReader sreader = new StreamReader(streamResponse);
Rstring = sreader.ReadToEnd();
String Links = GetNewLinks(Rstring);
txt2.Text = Rstring;
txt3.Text = Links;
sreader.Close();
streamResponse.Close();
myWebResponse.Close();
}
public ISet<string> GetNewLinks(string content)
{
Regex regexL = new Regex("(?<=<a\\s*?href=(?:'|\"))[^'\"]*?(?=(?:'|\"))");
ISet<string> newLinks = new HashSet<string>();
foreach (var match in regexL.Matches(content))
{
if (!newLinks.Contains(match.ToString()))
newLinks.Add(match.ToString());
}
return newLinks;
}
}
}

GetNewLinks() returns a set of strings (ISet<String>), not one only. So if you want to assign to a single string (String Links) then you have to select a string from the set, e.g. using First().

Related

Webclient doesnt like UrlEncoded slash. How to make it work?

I'm trying to pass an URL to an API using a .net 2.0 webclient (unable to upgrade). The webclient call only works if there are no slashes in the encoded value. Any idea why it is failing and how to make it work?
using System.Net;
using System.Text;
using System.Web;
namespace ConsoleAppWebClient
{
class Program
{
static void Main(string[] args)
{
using (var client = new WebClient())
{
client.Encoding = Encoding.UTF8;
client.Headers[HttpRequestHeader.Accept] = "application/xml";
var requestUrl = HttpUtility.UrlEncode("https://www.somewebsite.com");
var stringResult = client.DownloadString("https://localhost:12345/api/getstuff/" + requestUrl);
}
}
}
}
The above doesnt work but the below works just fine
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;
using System.Xml.Serialization;
using System.Web;
namespace ConsoleAppWebClient
{
class Program
{
static void Main(string[] args)
{
using (var client = new WebClient())
{
client.Encoding = Encoding.UTF8;
client.Headers[HttpRequestHeader.Accept] = "application/xml";
var requestUrl = HttpUtility.UrlEncode("https:www.somewebsite.com");
var stringResult = client.DownloadString("https://localhost:12345/api/getstuff/" + requestUrl);
}
}
}
}
It looks like requestUrl is meant to be a query parameter, but you're adding it to the URL's path.
The result is
https://localhost:12345/api/getstuff/https%3A%2F%2Fwww.somewebsite.com
"%" is an unsafe character which can lead to unpredictable results.
Instead, try making it a querystring parameter:
var requestUrl = HttpUtility.UrlEncode("https:www.somewebsite.com");
var stringResult = client.DownloadString(
"https://localhost:12345/api/getstuff/?requestUrl=" + requestUrl);
Now that the URL-encoded parameter is in the querystring instead of the path it should be okay.

Rest URL consumption guidance required

I have a rest url provided by client , It looks something like www.baseurl/api// I couldn't get result if I try to consume it from console application getting error 503.
However I m able to browse it from my browser and it returns the proper json of Entity Details. Please help me with this.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp15
{
public class Class1
{
private const string URL = "www.baseurl/api/<EntityName>/<EntityID>";
static void Main(string[] args)
{
Class1.CreateObject();
}
private static void CreateObject()
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(URL);
request.Method = "GET";
request.ContentType = "application/json";
try
{
WebResponse webResponse = request.GetResponse();
using (Stream webStream = webResponse.GetResponseStream() ?? Stream.Null)
using (StreamReader responseReader = new StreamReader(webStream))
{
string response = responseReader.ReadToEnd();
Console.Out.WriteLine(response);
}
}
catch (Exception e)
{
Console.Out.WriteLine("-----------------");
Console.Out.WriteLine(e.Message);
}
}
}
}

C# Grabbing text from a span

I tried this:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Net.Sockets;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Danish to English: ");
string tittyfuck = Console.ReadLine();
Console.Beep();
WebRequest webRequest = new WebRequest.Create("http://translate.google.com/#da/en/" + tittyfuck);
WebResponse webResponse = webRequest.GetResponse();
Stream data = webResponse.GetResponseStream();
string html;
using (StreamReader streamReader = new StreamReader(data))
{
string line;
while ((line = streamReader.ReadLine() != null))
{
if (line == "<span class=\"hps\">")
{
Console.Beep();
Console.WriteLine(line);
}
}
}
}
}
}
Okay, so I try that but I get these errors:
Error 1 'System.Net.WebRequest.Create(System.Uri)' is a 'method' but is used like a 'type' C:\Users\Dylan\AppData\Local\Temporary Projects\ConsoleApplication1\Program.cs 18 52 ConsoleApplication1
and
Error 2 Cannot implicitly convert type 'bool' to 'string' C:\Users\Dylan\AppData\Local\Temporary Projects\ConsoleApplication1\Program.cs 27 32 ConsoleApplication1
As you can probably tell, I'm trying to open a request to translate.google.com with the text after the link, which then grabs the text that's printed to the which is the translated text.. It's basically a translator.
Please help.
Line 18:
WebRequest webRequest = WebRequest.Create(new URI("http://translate.google.com/#da/en/" + tittyfuck));
Line 27:
while ((line = streamReader.ReadLine()) != null)
Remove the new keyword, and set other parenthesis.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Net.Sockets;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("Danish to English: ");
string tittyfuck = Console.ReadLine();
Console.Beep();
WebRequest webRequest = WebRequest.Create("http://translate.google.com/#da/en/" + tittyfuck);
WebResponse webResponse = webRequest.GetResponse();
Stream data = webResponse.GetResponseStream();
string html;
using (StreamReader streamReader = new StreamReader(data))
{
string line;
while ((line = streamReader.ReadLine()) != null)
{
if (line == "<span class=\"hps\">")
{
Console.Beep();
Console.WriteLine(line);
}
}
}
}
}
}
You can't use google translate in this way because the translation is requested by javascript, you can try with a webbrowser or buying some characters for using the translate api
Another way is parsing the result of the request(http://translate.google.com/translate_a/t?....) , that is in json style

c# changing this line so it outputs the string as a variable

UPDATE TO POST ORIGIANAL POST AFTER THIS CODE --- this code is an update to what david has been helping me do its throwing one error need help
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections.Specialized;
using System.Net;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string URL = "http://localhost/test2.php";
WebClient webClient = new WebClient();
NameValueCollection formData = new NameValueCollection();
formData["var1"] = formData["var1"] = string.Format("MachineName: {0}", System.Environment.MachineName);
formData["var2"] = ip();
byte[] responseBytes = webClient.UploadValues(URL, "POST", formData);
string responsefromserver = Encoding.UTF8.GetString(responseBytes);
Console.WriteLine(responsefromserver);
webClient.Dispose();
System.Threading.Thread.Sleep(5000);
}
public void ip()
{
String publicIP = "";
System.Net.WebRequest request = System.Net.WebRequest.Create("http://checkip.dyndns.org/");
using (System.Net.WebResponse response = request.GetResponse())
{
using (System.IO.StreamReader stream = new System.IO.StreamReader(response.GetResponseStream()))
{
publicIP = stream.ReadToEnd();
}
}
//Search for the ip in the html
int first = publicIP.IndexOf("Address: ") + 9;
int last = publicIP.LastIndexOf("</body>");
publicIP = publicIP.Substring(first, last - first);
Console.WriteLine(publicIP);
System.Threading.Thread.Sleep(5000);
}
}
}
this is the error I am getting
Error 2 - An object reference is required for the non-static field, method, or property 'ConsoleApplication1.Program.ip()'
I am trying to create a function that will send the out put as var2
I have this c# script
using System;
using System.Collections.Generic;
using System.Text;
namespace ConsoleApplication4
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("MachineName: {0}", System.Environment.MachineName);
System.Threading.Thread.Sleep(5000);
}
}
}
how can I change this so it outputs the string to a variable say "VAR2" and use it in this script
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections.Specialized;
using System.Net;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
string URL = "http://localhost/test2.php";
WebClient webClient = new WebClient();
NameValueCollection formData = new NameValueCollection();
formData["var1"] = "THIS IS WHERE VAR2 NEEDS TO BE ";
byte[] responseBytes = webClient.UploadValues(URL, "POST", formData);
string responsefromserver = Encoding.UTF8.GetString(responseBytes);
Console.WriteLine(responsefromserver);
webClient.Dispose();
System.Threading.Thread.Sleep(5000);
}
}
}
SO HOW CAN I ADD THE MACHINE NAME SCRIPT TO THIS FUNCTION AND THEN USE IT AS VAR2
any help would be brilliant
using System;
using System.Collections.Generic;
using System.Text;
namespace ConsoleApplication5
{
class Program
{
public static int Main(string[] args)
{
String publicIP = "";
System.Net.WebRequest request = System.Net.WebRequest.Create("http://checkip.dyndns.org/");
using (System.Net.WebResponse response = request.GetResponse())
{
using (System.IO.StreamReader stream = new System.IO.StreamReader(response.GetResponseStream()))
{
publicIP = stream.ReadToEnd();
}
}
//Search for the ip in the html
int first = publicIP.IndexOf("Address: ") + 9;
int last = publicIP.LastIndexOf("</body>");
publicIP = publicIP.Substring(first, last - first);
Console.WriteLine(publicIP);
System.Threading.Thread.Sleep(5000);
return 0;
}
}
}
her is the update david I would like to incude this script in my other script so it looks like this
formData["var1"] = formData["var1"] = string.Format("MachineName: {0}", System.Environment.MachineName);
formData["var2"] = "this is where this script needs to be ";
Why does it need to be in a separate application? If one application's output is going to be the command-line argument for another application's input, then they're both running on the same machine. Which means, in this case, they'd both get the same value from System.Environment.MachineName.
You can just get the value in the application where it's needed:
formData["var1"] = string.Format("MachineName: {0}", System.Environment.MachineName);

how to convert regex match to integer? (returns "Input string was not in a correct format")

i have a problem in converting a regex match (that is basically a string) to an integer.
Match results = Regex.Match(websr.ReadToEnd(),
"\\d+\\S+\\d+ results", RegexOptions.Singleline);
var count = Regex.Match(results.ToString(), "\\d+\\S+\\d+");
these two lines are regex . i want to extract the number of results. "count" shows the correct number of results. in next step i want to convert it to integer type
i tried {int countN = int.parse(count.ToString())} or {Int32.TryParse(count,out countN)} and many other cases but returns "Input string was not in a correct format" or shows 0 in listbox.
i'm really confused by this.i tried many tricks but no success.
thanks for help :)
edit :
here is the code :
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Web;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace bing
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
CookieCollection cookies = new CookieCollection();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.bing.com/");
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(cookies);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader response1 = new StreamReader(response.GetResponseStream());
cookies = response.Cookies;
try
{
string web = "http://www.bing.com";
//post
string getUrl = (web + "/search?q=" + textBox1.Text);
HttpWebRequest webreq = (HttpWebRequest)WebRequest.Create(getUrl);
HttpWebResponse webrep = (HttpWebResponse)webreq.GetResponse();
StreamReader websr = new StreamReader(webrep.GetResponseStream());
Match results = Regex.Match(websr.ReadToEnd(), "\\d+\\S+\\d+ results", RegexOptions.Singleline);
var count = Regex.Match(results.ToString(), "\\d+\\S+\\d+");
int countN = int.Parse(count.Value);
listBox1.Items.Add(countN.ToString());
}
catch (Exception ex)
{
MessageBox.Show("Error: " + ex.Message);
}
}
}
}
You should use:
var count = int.Parse(Regex.Match(results.ToString(), "\\d+\\S+\\d+").Value);
Verify there is only numbers in the input string.
solved
when i extracted the result number, it was something like 123,456,789 and the problem was "," between digits.
Match results = Regex.Match(websr.ReadToEnd(), "\\d+\\S+\\d+ results", RegexOptions.Singleline);
Match count = Regex.Match(results.ToString(), "\\d+\\S+\\d+");
string countN = count.Value.Replace(",", "");
int countM = int.Parse(countN);
and countM shows 123456789
thanks to all friends whom answered :)

Categories

Resources