How to get the first N result from google search using C#? - c#

How to get the first N result from google search using c#?
using (var webclient = new WebClient())
{
const string url = "https://www.google.com.au/search?num=100&q=my+search+term";
var result = webclient.DownloadString(url);
}
Update:
How can I get where and how many times a specific url appeared ?

The following will return the first 100 result of searching 'my search term' and return the order of a specified target 'mytarget'
internal class Program
{
private const string Url = "http://www.google.com/search?num=100&q=my+search+term";
private static void Main(string[] args)
{
var result = new HtmlWeb().Load(Url);
var nodes = result.DocumentNode.SelectNodes("//html//body//div[#class='g']");
var indexes = nodes == null
? new List<int> { 0 }
: nodes.Select((x, i) => new { i, x.InnerHtml })
.Where(x => x.InnerHtml.Contains("mytarget"))
.Select(x => x.i + 1)
.ToList();
Console.WriteLine(String.Join(", ", indexes));
Console.ReadLine();
}
}
another way to do it using regex:
string html;
using (var webClient = new WebClient())
{
html = webClient.DownloadString(searchUrl);
}
var regex = new Regex("<div class=\"g\">(.*?)</div>");
var matches = regex.Matches(html).Cast<Match>().ToList();
var indexes = matches.Select((x, i) => new { i, x })
.Where(x => x.ToString().Contains("mytarget"))
.Select(x => x.i + 1)
.ToList();

Related

Elasticsearch Nest client Search

I try to compose a dynamic query with NEST (Elastichsearc library for dotnet 5.0) but only the first code work:
Case 1: (Work)
var response1 = await client.SearchAsync<VideoManifestElasticDto>(s =>
s.Query(q => q
.Bool(b => b
.Must(mu => mu
.Wildcard(f => f.Title, '*' + dtoSearch.Title + '*')
))));
var aaa1 = response1.Documents;
Return 4 documents. It's OK
Case 2: (Not Work)
var response2 = await client.SearchAsync<VideoManifestElasticDto>(s =>
s.Query(q => q
.Bool(b => b
.Must(mu => new WildcardQuery() { Field = nameof(VideoManifestElasticDto.Title), CaseInsensitive = true, Value = '*' + dtoSearch.Title + '*' }
))));
var aaa2 = response2.Documents;
Return 0 documents. Why?
Case 3: (Not Work)
Last case, this is my goal i wan't create a dynamic query
var response3 = await client.SearchAsync<VideoManifestElasticDto>(Blah(dtoSearch));
var aaa3 = response3.Documents;
public static SearchDescriptor<VideoManifestElasticDto> Blah(VideoManifestElasticDto videoManifestElasticDto)
{
return new SearchDescriptor<VideoManifestElasticDto>().Query(b => b.Bool( c => c.Must(Orso(videoManifestElasticDto))));
}
public static QueryContainer[] Orso(VideoManifestElasticDto videoManifestElasticDto)
{
List<QueryContainer> queryContainerList = new List<QueryContainer>();
if (videoManifestElasticDto == null)
{
return queryContainerList.ToArray();
}
if (!string.IsNullOrWhiteSpace(videoManifestElasticDto.Title))
{
var orQuery = new WildcardQuery() { Field = nameof(VideoManifestElasticDto.Title), CaseInsensitive = true, Value = '*' + videoManifestElasticDto.Title + '*' };
queryContainerList.Add(orQuery);
}
else if (!string.IsNullOrWhiteSpace(videoManifestElasticDto.Description))
{
var orQuery = new MatchQuery() { Field = "Description", Query = videoManifestElasticDto.Description };
queryContainerList.Add(orQuery);
}
else if (!string.IsNullOrWhiteSpace(videoManifestElasticDto.VideoId))
{
var orQuery = new MatchQuery() { Field = "VideoId", Query = videoManifestElasticDto.VideoId };
queryContainerList.Add(orQuery);
}
return queryContainerList.ToArray();
}

Make Elasticsearch diacritics insensitive

I am using Elasticsearch 6.6.0 and NEST in a .NET MVC project.
I am indexing some products using this code:
var esSettings = new ConnectionSettings(node);
esSettings = esSettings.DefaultIndex(IndexInstanceName);
esSettings = esSettings
.DefaultMappingFor<SearchableProduct>(s => s.IdProperty("Id").IndexName(IndexInstanceName + "-products-" + ConfigurationManager.AppSettings["DefaultCulture"]));
var elastic = new ElasticClient(esSettings);
var mapResponse = elastic.Map<SearchableProduct>(x => x.AutoMap().Index(IndexInstanceName + "-products-" + culture));
var indexState = new IndexState
{
Settings = new IndexSettings()
};
indexState.Settings.Analysis = new Analysis
{
Analyzers = new Analyzers()
};
indexState.Settings.Analysis.Analyzers.Add("nospecialchars", new CustomAnalyzer
{
Tokenizer = "standard",
Filter = new List<string> { "standard", "lowercase", "stop", "asciifolding" }
});
//products
if (!elastic.IndexExists(IndexInstanceName + "-products-" + culture).Exists)
{
var response = elastic.CreateIndex(
IndexInstanceName + "-products-" + culture,
s => s.InitializeUsing(indexState)
.Mappings(m => m.Map<SearchableProduct>(sc => sc.AutoMap())));
}
await this.IndexProductsAsync(context, products, elastic, culture);
await elastic.RefreshAsync(new RefreshRequest(IndexInstanceName + "-products-" + culture));
and for the search I use the below code:
ISearchResponse<SearchableProduct> result = await elastic.SearchAsync<SearchableProduct>(s => s
.Index(elasticIndexName + "-products-" + culture)
.Take(DefaultPageSize)
.Source(src => src.IncludeAll())
.Query(query =>
query.QueryString(qs =>
qs.Query(q).DefaultOperator(Operator.And).Fuzziness(Fuzziness.EditDistance(0)).Fields(x => x.Field(d => d.Name, 2)
.Field(d => d.MetaTitle, 1)
.Field(d => d.Image, 1)
.Field(d => d.SystemId, 2)
.Field(d => d.Manufacturer, 1)
)
))
.Sort(d => d.Ascending(SortSpecialField.Score))
);
When i search for a word with accent in greek (eg παγωτό) I get results (Because in my index the product is indexed with accent), but when i use the same word without accent (eg παγωτο) i get no results.
Is anything wrong with the indexing settings or the search code?
Can I index my data without accents or alternatively index them as is but make the search or index accent insensitive?
Creating a field with a greek analyzer will make sure indexed text and query string pass the same analysis path. For παγωτό that means, during indexing, the text will be tokenized to παγωτ as well as during making the query request.
Please check my example which creates a field with greek analyzer and the example outputs both documents with παγωτό and παγωτο when looking for παγωτό or παγωτο.
class Program
{
static async Task Main(string[] args)
{
var connectionPool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var settings = new ConnectionSettings(connectionPool)
.DefaultIndex("index_name")
.DisableDirectStreaming()
.PrettyJson();
var client = new ElasticClient(settings);
await client.Indices.DeleteAsync("index_name");
var createIndexResponse = await client.Indices.CreateAsync("index_name",
c => c
.Map(map => map.AutoMap<Document>()));
await client.IndexManyAsync(new []
{new Document {Id = 1, Text = "παγωτό"}, new Document {Id = 2, Text = "παγωτο"},});
await client.Indices.RefreshAsync();
var query = "παγωτό";
var searchResponse = await client.SearchAsync<Document>(s => s
.Query(q => q.Match(m => m.Field(f => f.Text).Query(query))));
Console.OutputEncoding = Encoding.UTF8;
Print(query, searchResponse);
query = "παγωτο";
var searchResponse2 = await client.SearchAsync<Document>(s => s
.Query(q => q.Match(m => m.Field(f => f.Text).Query(query))));
Print(query, searchResponse2);
}
private static void Print(string query, ISearchResponse<Document> searchResponse)
{
Console.WriteLine($"For {query} found:");
foreach (var document in searchResponse.Documents)
{
Console.WriteLine($"Document {document.Id} {document.Text}");
}
}
}
public class Document
{
public int Id { get; set; }
[Text(Analyzer = "greek")]
public string Text { get; set; }
}
Prints:
For παγωτό found:
Document 1 παγωτό
Document 2 παγωτο
For παγωτο found:
Document 1 παγωτό
Document 2 παγωτο
Hope that helps.

Having error while using storedFields from searchResponse in NEST (ElasticSearch)

I need to bind the search result from NEST (ElasticSearch) to a Gridview in ASP.NET (Webform).
Code I get the result from ElasticSearch from using NEST:
public class Address
{
public int SN { get; set; }
public string JLN { get; set; }
}
protected void BtnSearch_Clicked(object sender, EventArgs e)
{
string SearchValue = txtSearchValue.Text;
string es_host = System.Configuration.ConfigurationManager.AppSettings["cnStringIP"];
string es_port = System.Configuration.ConfigurationManager.AppSettings["cnStringPort"];
string es_index = System.Configuration.ConfigurationManager.AppSettings["cnStringIndex"];
var settings = new ConnectionSettings(new Uri("http://" + es_host + ":" + es_port + ""))
.DefaultIndex("masterlist*");
var client = new ElasticClient(settings);
var searchResponse = client.Search<Address>(s => s
.Index("masterlist*")
.From(0)
.Size(10)
.Query(q => q
.QueryString(qs => qs
.Query("JLN:\""+ SearchValue +"\"")
)
)
);
var address = searchResponse.Documents.ToList();
ESGridview.DataSource = address;
ESGridview.DataBind();
}
With this code, the gridview can auto-generate two fields of correct header which is "SN" and "JLN", and it can auto generate 10 rows (I limit the size to 10 rows max in search syntax) but it's empty data in the column.
I did found another POST with this link
https://www.elastic.co/guide/en/elasticsearch/client/net-api/6.x/returned-fields.html#returned-fields
After check with this link,
I changed my code to:
string SearchValue = txtSearchValue.Text;
string es_host = System.Configuration.ConfigurationManager.AppSettings["cnStringIP"];
string es_port = System.Configuration.ConfigurationManager.AppSettings["cnStringPort"];
string es_index = System.Configuration.ConfigurationManager.AppSettings["cnStringIndex"];
var settings = new ConnectionSettings(new Uri("http://" + es_host + ":" + es_port + ""))
.DefaultIndex("masterlist*");
var client = new ElasticClient(settings);
var searchResponse = client.Search<Address>(s => s
.StoredFields(sf => sf
.Fields(
f => f.SN,
f => f.JLN
)
)
.From(0)
.Size(10)
.Query(q => q
.QueryString(qs => qs
.Query("JLN:\""+ SearchValue +"\"")
)
)
);
foreach (var fieldValues in searchResponse.Fields)
{
var document = new
{
SN = fieldValues.ValueOf<Address, int>(p => p.SN),
JLN = fieldValues.Values<Address, string>(p => p.JLN)
};
}
var address = searchResponse.Documents;
var count = "MaxScore" + searchResponse.MaxScore;
ESGridview.DataSource = address;
ESGridview.DataBind();
But I get an error while run the code from start on whole foreach (var...) area :
System.NullReferenceException:'Object reference not set to an instance of an object.'
Did anyone can teach me how can solve this problem or anything I do fault ?
Many many thanks ~
ElasticSearch 7.0.1
NEST 7.0.0
C#
ASP.NET (Webform)
I solve my problem already.
The code below is how to get the searchResult from ElasticSearch and bind the data into Gridview in ASP.NET by using NEST.
public class Address
{
[Text(Name = "SN")]
public string SN { get; set; }
[Text(Name = "JLN")]
public string JLN { get; set; }
}
protected void BtnSearch_Clicked(object sender, EventArgs e)
{
string SearchValue = txtSearchValue.Text;
string es_host = System.Configuration.ConfigurationManager.AppSettings["cnStringIP"];
string es_port = System.Configuration.ConfigurationManager.AppSettings["cnStringPort"];
string es_index = System.Configuration.ConfigurationManager.AppSettings["cnStringIndex"];
var settings = new ConnectionSettings(new Uri("http://" + es_host + ":" + es_port + ""))
.DefaultIndex("masterlist*");
var client = new ElasticClient(settings);
var searchResponse = client.Search<Address>(s => s
.From(0)
.Size(100)
.Query(q => q
.QueryString(qs => qs
.Query("JLN:\"" + SearchValue + "\"")
)
)
);
var address = searchResponse.Documents.ToList();
ESGridview.DataSource = address;
ESGridview.DataBind();
}

LINQ to XML Get (children?) of element?

How would I go about getting the ID information using Linq. I'm trying to add them to an array of int.
<FactionAttributes>
<name>Player</name>
<id>0</id>
<relationModifier>1</relationModifier>
<relations>
<id0>100</id0>
<id1>50</id1>
<id2>50</id2>
<id3>50</id3>
<id4>50</id4>
<id5>50</id5>
</relations>
</FactionAttributes>
That is my XML.
Here is the code I'm using so far.
void InitFactions()
{
int count = 0;
string filepath = Application.dataPath + "/Resources/factiondata.xml";
XDocument factionXML = XDocument.Load(filepath);
var factionNames = from factionName in factionXML.Root.Elements("FactionAttributes")
select new {
factionName_XML = (string)factionName.Element("name"),
factionID_XML = (int)factionName.Element("id"),
factionRelations_XML = factionName.Element("relations")// Need to turn this into array.
};
foreach ( var factionName in factionNames)
++count;
foreach ( var factionName in factionNames)
{
Factions f = new Factions();
f.otherFactionsName = new string[count];
f.otherFactionsRelation = new int[count];
int others = 0;
f.FactionName = factionName.factionName_XML;
Debug.Log(factionName.factionRelations_XML);
// Adds Rivals, not self to other list.
foreach (var factionName2 in factionNames)
{
if (factionName.factionID_XML == factionName2.factionID_XML)
continue;
f.otherFactionsName[(int)factionName2.factionID_XML] = factionName2.factionName_XML;
// THIS IS WHERE IM ADDING THE RELATIONS IN //
f.otherFactionsRelation[(int)factionName2.factionID_XML] = factionName.factionRelations_XML[(int)factionName2.factionID_XML];
Debug.Log(f.FactionName + " adds: " + factionName2.factionName_XML);
++others;
}
}
}
I have made multiple attempts using nodes and what not. I can't seem to figure out the correct syntax.
XDocument doc = XDocument.Load(Path);
//To get <id>
var MyIds = doc.Element("FactionAttributes").Element("id").Value;
//To get <id0>, <id1>, etc.
var result = doc.Element("FactionAttributes")
.Element("relations")
.Elements()
.Where(E => E.Name.ToString().Contains("id"))
.Select(E => new { IdName = E.Name, Value = E.Value});
If you want array of ints replace the select with this
.Select(E => Convert.ToInt32(E.Value)).ToArray();
If you are just after the relations Ids use this simple query
var doc = XDocument.Load("c:\\tmp\\test.xml");
var ids = doc.Descendants("relations").Elements().Select(x => x.Value);
If you want the Id and the relations ids in one array use this
var id = doc.Descendants("id").Select(x=>x.Value).Concat(doc.Descendants("relations").Elements().Select(x => x.Value));

WebClient download many files error — Invalid URI: The URI is empty

I want to download all the album artist from last.fm, and each cover was named album title.
But when start the program, I get an error: "Invalid URI: The URI is empty."
program code:
public static void GetXML()
{
string url = #"http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&artist=radiohead&api_key=b25b959554ed76058ac220b7b2e0a026";
string albumName = String.Empty;
string coverLink = String.Empty;
int[] numArr = new int[30];
for (int x = 1; x < numArr.Length; x++)
{
numArr[x] = x;
}
XDocument xml = XDocument.Load(url);
foreach (var c in numArr)
{
var name = xml.XPathSelectElements(String.Format("//album[#rank='{0}']", c))
.Select(x => x.Element("name").Value)
.ToList();
foreach (var item in name)
{
albumName = item.ToString();
}
var covers = xml.XPathSelectElements(String.Format("//album[#rank='{0}']/image[#size='extralarge']", c))
.Select(x => x.Value)
.ToList();
foreach (var item in covers)
{
coverLink = item.ToString();
}
WebClient web = new WebClient();
web.DownloadFileCompleted += new System.ComponentModel.AsyncCompletedEventHandler(web_DownloadFileCompleted);
web.DownloadFileAsync(new Uri(coverLink), #"X:\Code\T\" + albumName + ".jpg");
}
}
You initialize your array as
for (int x = 1; x < numArr.Length; x++)
{
numArr[x] = x;
}
but never set the value numArr[0] which is used in foreach (var c in numArr) (and there is no album with rank=0).
Isn't this much more easier?
var albums = xml.Descendants("album")
.Select(a => new
{
Rank = (int)a.Attribute("rank"),
Name = a.Element("name").Value,
ImageUrl = a.XPathSelectElement("image[#size='extralarge']").Value
})
.ToList();

Categories

Resources