Elastic Search to search for words that starts with phrase - c#

I'm trying to create a search function for my website using Elastic Search and NEST. You can see my code below and I get results if I search for complete (and almost comlete) words.
Ie, if I search for "Buttermilk" or "Buttermil" I get a hit on my document containing the word "Buttermilk".
However, what I try to accomplish is if I search for "Butter", I should have a result with all three documents which have words that starts with "Butter". I thought this was solved by using FuzzyLikeThis?
Can anyone see what I'm doing wrong and point me in the right direction?
I created a console-app and the complete code you can see here:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nest;
using Newtonsoft.Json;
namespace ElasticSearchTest
{
class Program
{
static void Main(string[] args)
{
var indexSettings = new IndexSettings();
indexSettings.Analysis.Analyzers["text-en"] = new SnowballAnalyzer { Language = "English" };
ElasticClient.CreateIndex("elastictesting", indexSettings);
var testItem1 = new TestItem {
Id = 1,
Name = "Buttermilk"
};
ElasticClient.Index(testItem1, "elastictesting", "TestItem", testItem1.Id);
var testItem2 = new TestItem {
Id = 2,
Name = "Buttercream"
};
ElasticClient.Index(testItem2, "elastictesting", "TestItem", testItem2.Id);
var testItem3 = new TestItem {
Id = 3,
Name = "Butternut"
};
ElasticClient.Index(testItem3, "elastictesting", "TestItem", testItem3.Id);
Console.WriteLine("Write search phrase:");
var searchPhrase = Console.ReadLine();
var searchResults = Search(searchPhrase);
Console.WriteLine("Number of search results: " + searchResults.Count());
foreach (var item in searchResults) {
Console.WriteLine(item.Name);
}
Console.WriteLine("Press any key to exit");
Console.ReadKey();
}
private static List<TestItem> Search(string searchPhrase)
{
var query = BuildQuery(searchPhrase);
var result = ElasticClient
.Search(query)
.Documents
.Select(d => d)
.Distinct()
.ToList();
return result;
}
public static ElasticClient ElasticClient
{
get
{
var localhost = new Uri("http://localhost:9200");
var setting = new ConnectionSettings(localhost);
setting.SetDefaultIndex("elastictesting");
return new ElasticClient(setting);
}
}
private static SearchDescriptor<TestItem> BuildQuery(string searchPhrase)
{
var querifiedKeywords = string.Join(" AND ", searchPhrase.Split(' '));
var filters = new BaseFilter[1];
filters[0] = Filter<TestItem>.Bool(b => b.Should(m => m.Query(q =>
q.FuzzyLikeThis(flt =>
flt.OnFields(new[] {
"name"
}).LikeText(querifiedKeywords)
.PrefixLength(2)
.MaxQueryTerms(1)
.Boost(2))
)));
var searchDescriptor = new SearchDescriptor<TestItem>()
.Filter(f => f.Bool(b => b.Must(filters)))
.Index("elastictesting")
.Type("TestItem")
.Size(500);
var jsons = JsonConvert.SerializeObject(searchDescriptor, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
return searchDescriptor;
}
}
class TestItem {
public int Id { get; set; }
[ElasticProperty(Analyzer = "text-en", Index = FieldIndexOption.analyzed)]
public string Name { get; set; }
}
}
Edited 2014-04-01 11:18
Well, I ended up using MultiMatch and QueryString, so this it how my code looks now. Hope it mey help anyone in the furure. Also, I added a Description property to my TestItem to illustrate multimatch.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Nest;
using Newtonsoft.Json;
namespace ElasticSearchTest
{
class Program
{
static void Main(string[] args)
{
var indexSettings = new IndexSettings();
ElasticClient.CreateIndex("elastictesting", indexSettings);
var testItem1 = new TestItem {
Id = 1,
Name = "Buttermilk",
Description = "butter with milk"
};
ElasticClient.Index(testItem1, "elastictesting", "TestItem", testItem1.Id);
var testItem2 = new TestItem {
Id = 2,
Name = "Buttercream",
Description = "Butter with cream"
};
ElasticClient.Index(testItem2, "elastictesting", "TestItem", testItem2.Id);
var testItem3 = new TestItem {
Id = 3,
Name = "Butternut",
Description = "Butter with nut"
};
ElasticClient.Index(testItem3, "elastictesting", "TestItem", testItem3.Id);
Console.WriteLine("Write search phrase:");
var searchPhrase = Console.ReadLine();
var searchResults = Search(searchPhrase);
Console.WriteLine("Number of search results: " + searchResults.Count());
foreach (var item in searchResults) {
Console.WriteLine(item.Name);
Console.WriteLine(item.Description);
}
Console.WriteLine("Press any key to exit");
Console.ReadKey();
}
private static List<TestItem> Search(string searchPhrase)
{
var query = BuildQuery(searchPhrase);
var result = ElasticClient
.Search(query)
.Documents
.Select(d => d)
.Distinct()
.ToList();
return result;
}
public static ElasticClient ElasticClient
{
get
{
var localhost = new Uri("http://localhost:9200");
var setting = new ConnectionSettings(localhost);
setting.SetDefaultIndex("elastictesting");
return new ElasticClient(setting);
}
}
private static SearchDescriptor<TestItem> BuildQuery(string searchPhrase)
{
var searchDescriptor = new SearchDescriptor<TestItem>()
.Query(q => q
.MultiMatch(m =>
m.OnFields(new[] {
"name",
"description"
}).QueryString(searchPhrase).Type(TextQueryType.PHRASE_PREFIX)
)
)
.Index("elastictesting")
.Type("TestItem")
.Size(500);
var jsons = JsonConvert.SerializeObject(searchDescriptor, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore });
return searchDescriptor;
}
}
class TestItem {
public int Id { get; set; }
public string Name { get; set; }
public string Description { get; set; }
}
}

Instead of using FuzzyLikequery.. use prefix query its more fast and accurate..!
for more information refer
curl -XPOST "http://localhost:9200/try/indextype/_search" -d'
{
"query": {
"prefix": {
"field": {
"value": "Butter"
}
}
}
}'
create above query in NEST and try again..!

This has nothing to do with FuzzyLikeThis.
You can use prefixquery as suggested by #BlackPOP out of the box.
You could also opt for using EdgeNGrams, this will tokenize your input on index-time. The result faster performance as compared to prefixquery, offset against increased index size.
One thing to keep in mind is that prefixquery only works on non-analyzed fields, so if you want to do any anaylzing at indexing-time, you're probably better off using EdgeNGrams.
Please read up on anaylzers etc, if you don't know what they are.
Some refs:
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-analyzers.html
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/analysis-edgengram-tokenizer.html
See How can I do a prefix search in ElasticSearch in addition to a generic query string? for a similar question.

Related

How to filter a list using another list in LINQ C#

The list needs to filter is having data like: '1000', '1000A', '1000B', '2000', '2000C', '2003', '2006A'
The list by which I am filtering having data like: '1000', '2000', '2003'
Expected output: 1000', '1000A', '1000B', '2000', '2000C', '2003'
(output is expected like we do in SQL server LIKE operator)
Suppose you are having two class like below,
public class MainClass
{
public string ActualValue { get; set; }
}
public class FilterClass
{
public string Description { get; set; }
}
I am loading some dummy data like this,
List<MainClass> mainList = new List<MainClass>();
mainList.Add(new MainClass() { ActualValue = "1000" });
mainList.Add(new MainClass() { ActualValue = "1000A" });
mainList.Add(new MainClass() { ActualValue = "1002F" });
mainList.Add(new MainClass() { ActualValue = "1002A" });
mainList.Add(new MainClass() { ActualValue = "1003" });
List<FilterClass> filterList = new List<FilterClass>();
filterList.Add(new FilterClass() { Description = "1003" });
filterList.Add(new FilterClass() { Description = "1002" });
The O/P will be given as per your requirement by,
var output1 = mainList.Where(x => filterList.Any(y => x.ActualValue.Contains(y.Description))).ToList();
Try with regex, like this:
var list1 = new List<string>{"1000", "1000A", "1000B","2000","2000A","3000BV"};
var list2 = new List<string>{"1000","2000"};
var result = list1.Where(x => list2.Any(y => Regex.IsMatch(x, $".*{y}.*"))).ToList();
Note: .* are the equivalent of % in SQL.
you could use linq in this way :
var filterList = new List<string>(){"1000", "1000A", "1000B", "2000", "2000C", "2003", "2006A"};
var filterLikeList = new List<string>(){"1000", "2000", "2003"};
var results = filterList.Where(x=> filterLikeList.Any(y=>x.Contains(y)));

How to split string and assign as column and row in table

I have a input strings example
str1 = ""Type":"#Microsoft.Azure","Email":"abc#tmail.com","DisplayName":"abc","Dpt":"home"";
str2 = ""Type":"#Microsoft.Azure","Email":"xyz#tmail.com","DisplayName":"xyz","Dpt":"home"";
In compileable form it looks like this:
string str = #"""Type"":""#Microsoft.Azure"",""Email"":""abc#tmail.com"",""DisplayName"":""abc"",""Dpt"":""home""";
Can i split on "," delimiter and assign left to ":" as columns and right to ":" as rows in table.
Example:
"Type" "Email" "DisplayName" "Dpt"
"#Microsoft.Azure" "abc#tmail.com" "abc" "home"
"#Microsoft.Azure" "xyz#tmail.com" "xyz" "home"
i tried something like this
string str = ' "name":"abd","":""m"":"" ';
string[] strS1 = str.split(',');
foreach(string S1 in strS1){
string[] strS2 = str.split(':');
foreach(string S2 in strS2){
console.write(s2)
}
}
You can try something like this : Demo
The Json Way.
You assume it's a Json.
Join the line with },{. Add a Starting [{ and Ending }]. Bim, You are ready to go.
Deserilise to you custom type : I ignored Type property here
JsonConvert.DeserializeObject<List<CsvItem>>(myJSON);
public class CsvItem
{
public string Email { get; set; }
public string DisplayName { get; set; }
public string Dpt { get; set; }
}
Your parsing way: String Split.
After the S1.Split(':'), you end up with a small array the first value is the property name the second the value.
Trim the ", compare and assign.
if (strS2[0].Trim('"') == "Email") temp.Email = strS2[1].Trim('"');
if (strS2[0].Trim('"') == "DisplayName") temp.DisplayName = strS2[1].Trim('"');
if (strS2[0].Trim('"') == "Dpt") temp.Dpt = strS2[1].Trim('"');
Same thing in LinQ:
At this point it's not faster, easier to maintain, not even easier to read. It's just compact
lines
.Select(x => x.Split(','))
.Select(x =>
new CsvItem
{
Email = x[1].Split(':')[1].Trim('"'),
DisplayName = x[2].Split(':')[1].Trim('"'),
Dpt = x[3].Split(':')[1].Trim('"')
})
What's left?
Regex like : "(\w+)":"(.+?)", could easy replace the split. With a more detailed regex you can catch only the value you need.
What solution in the end?
Depending on the quality of the file. If it's human generated and can containt error.
You way to handle the error: Do you reject the whole file in case of error? Do you return only the list of valid data? etc.
I will choose either Solution #1 or #2. #1 for sometime broken file. #2 for meticulous error handleing and debugging.
using System;
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Newtonsoft.Json;
public class Program
{
public static void Main()
{
string inputCSV = #"""Type"":""#Microsoft.Azure"",""Email"":""abc#tmail.com"",""DisplayName"":""abc"",""Dpt"":""home""
""Type"":""#Microsoft.Azure"",""Email"":""xyz#tmail.com"",""DisplayName"":""xyz"",""Dpt"":""home""";
// ReadAllLines mock
string[] lines = inputCSV.Split(new[] { Environment.NewLine }, StringSplitOptions.None);
{ // The Json Way
var bringJsonBack = "[\n{" + string.Join("},\n{", lines) + "}\n]";
var results = JsonConvert.DeserializeObject<List<CsvItem>>(bringJsonBack);
results.Dump();
}
{ // Your working way
var results = new List<CsvItem>();
foreach (var line in lines)
{
var temp = new CsvItem();
string[] strS1 = line.Split(',');
foreach (string S1 in strS1)
{
string[] strS2 = S1.Split(':');
// You have a part Before the : and one after we just string check to know what property we re on.
if (strS2[0].Trim('"') == "Email")
{
temp.Email = strS2[1].Trim('"');
}
if (strS2[0].Trim('"') == "DisplayName")
{
temp.DisplayName = strS2[1].Trim('"');
}
if (strS2[0].Trim('"') == "Dpt")
{
temp.Dpt = strS2[1].Trim('"');
}
}
results.Add(temp);
}
results.Dump();
}
{ // LinQ Version of your algo.
var results = lines
.Select(x => x.Split(','))
.Select(x =>
new CsvItem
{
Email = x[1].Split(':')[1].Trim('"'),
DisplayName = x[2].Split(':')[1].Trim('"'),
Dpt = x[3].Split(':')[1].Trim('"')
})
.ToList();
results.Dump();
}
}
public class CsvItem
{
public string Email { get; set; }
public string DisplayName { get; set; }
public string Dpt { get; set; }
}
}
Your original string looks suspiciously like it was extracted from a JSON response. You should just deserialize the original JSON response directly into a DataTable with Newtonsoft.Json, ala:
//Install-Package Newtonsoft.Json
using Newtonsoft.Json;
using System.Data;
namespace Split_string_and_assign_as_table
{
class Program
{
static void Main(string[] args)
{
string json = #"[
{
""Type"": ""#Microsoft.Azure"",
""Email"": ""abc#tmail.com"",
""DisplayName"": ""abc"",
""Dpt"": ""home""
},
{
""Type"": ""#Microsoft.Azure"",
""Email"": ""xyz#tmail.com"",
""DisplayName"": ""xyz"",
""Dpt"": ""home""
}
]";
var dataTable = JsonConvert.DeserializeObject<DataTable>(json);
}
}
}

How to iterate through JObject Properties via Foreach/LINQ

I have an established JObject object. Trying to loop through it to acquire a Key/value based on anothers Key/value (example of json below with code currently stuck on)
For a tad more detail - looking to loop through "value", get the "KeyID" based on "MailState"
definitely feel like I am missing the step of filtering by MailState/ColName apparently - I have searched through threads a bunch but if someone knows of one that answered this that i was unable to find i will happily pull this down/reference it
// JSON DATA
{
"odata.metadata": "https://..com/odata/$metadata#JCJMCDXes",
"value": [
{
"KeyID": "10379",
"MailCity": "Chicago",
"MailState": "IL"
},
{
"KeyID": "9846",
"MailCity": "Chicago",
"MailState": "IL"
},
{
"KeyID": "2234",
"MailCity": "Madison",
"MailState": "WI"
}]
}
// Current code example
// class in play
public class datastorage
{
public string ID { get; set; }
public string Col { get; set; }
}
public class listData
{
public string ColName {get;set;}
}
// getVPData is a string response from a call to an API
getVPData.Replace(System.Environment.NewLine, "");
JObject jobj = (JObject)Newtonsoft.Json.JsonConvert.DeserializeObject(getVPData);
List<datastorage> data = new List<datastorage>();
// Loop
foreach(var r in listData) // has distinct State abeviations so only 1 occurence
{
foreach (var j in jobj) // This the right path?
{
//add KeyID into ID
data.add(new datastorage
{
ID = ,//add KeyID into ID
Col = r.ColName
});
}
}
You can use Newtonsoft.Json library to parse and loop to the items of value
here is a sample code:
dynamic json = JsonConvert.DeserializeObject(getVPData);
foreach (dynamic item in json["value"])
{
//you can access the fields inside value.
var KeyID = item["KeyID"];
var MailCity = item["MailCity"];
var MailState = item["MailState"];
//just for showing...
Console.WriteLine("KeyID:{0}, MailCity:{1}, MailState:{2}", KeyID, MailCity, MailState);
}
Let me know if the snippet works.
Straightforward ways are:
using System;
using System.Linq;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
namespace ConsoleApp7
{
internal class Program
{
private static void Main(string[] args)
{
var mailStates = new[] {"IL", "WI"};
var jObject = (JObject) JsonConvert.DeserializeObject(json);
var values = (JArray) jObject["value"];
// 1st way
foreach (var mailState in mailStates)
{
var key = values
.Where(v => mailState == v.SelectToken("MailState").Value<string>())
.Select(v => v.Value<string>("KeyID"))
.FirstOrDefault();
Console.WriteLine($"1st case: {mailState} - {key}");
}
/* 2nd way based on JSONPath
* api: https://www.newtonsoft.com/json/help/html/QueryJsonSelectTokenJsonPath.htm
* dox: https://support.smartbear.com/alertsite/docs/monitors/api/endpoint/jsonpath.html
* tester: https://jsonpath.curiousconcept.com/
*/
foreach (var mailState in mailStates)
{
var key = values.SelectTokens($"$[?(#.MailState == '{mailState}')].KeyID")
.Select(v => v.ToString())
.FirstOrDefault();
Console.WriteLine($"2nd case: {mailState} - {key}");
}
Console.ReadKey();
}
private static string json = #"
{
""odata.metadata"": ""https://cdxapiclient.palmercg.com/odata/$metadata#JCJMCDXes"",
""value"": [
{
""KeyID"": ""10379"",
""MailCity"": ""Chicago"",
""MailState"": ""IL""
},
{
""KeyID"": ""9846"",
""MailCity"": ""Chicago"",
""MailState"": ""IL""
},
{
""KeyID"": ""2234"",
""MailCity"": ""Madison"",
""MailState"": ""WI""
}]
}";
}
}

variables not working inside Lambdas in Mono.Csharp script

thows Mono.Csharp.InternalError exception. InnerException basically says object reference not set. Any idea folks? code used:
using System;
using System.IO;
using Mono.CSharp;
using System.Reflection;
using System.Collections.Generic;
namespace TestMonoCSharp
{
public class testmodel
{
public string a {get;set;}
public double b {get;set;}
}
class MainClass
{
public static void Main (string[] args)
{
var tw = new StreamWriter(new MemoryStream());
tw.AutoFlush = true;
CompilerContext c = new CompilerContext(new CompilerSettings(), new StreamReportPrinter(tw));
var csc = new Evaluator(c);
csc.ReferenceAssembly(Assembly.GetExecutingAssembly());
csc.Run("using System;");
csc.Run("using System.Linq;");
csc.Run("using System.Collections.Generic;");
csc.Run("using TestMonoCSharp;");
var query = #"new System.Func<IEnumerable<testmodel>, IEnumerable<testmodel>>((pos) =>
{
var avg = pos.Average(x=>x.b);
//return pos.Where(x=>x.b < 3 ).ToArray(); //works
return pos.Where(x=>x.b < avg ).ToArray(); //doesn't work
});";
var list = new List<testmodel> () {new testmodel{ a = "a", b = 3}, new testmodel{ a = "a", b = 2} };
var func = csc.Evaluate(query) as Func<IEnumerable<testmodel>, IEnumerable<testmodel>>;
var val = func(list);
}
}
}
You are trying to evaluate a Delegate...
new System.Func<IEnumerable<string>, IEnumerable<string>>((pos) => {
var avg = pos.Average(x=>x.Length);
return pos.Where(x=>x.Length < avg ).ToArray(); //doesn't work
});
This causes an internal System.NullReferenceException in the mcs compiler as nothing can dynamically evaluated by that statement's execution as there is no context for it.
Cut/Paste this into Mono's csharp repl:
public class testmodel
{
public string a {get;set;}
public double b {get;set;}
}
testmodel[] list = {new testmodel{a="1",b=1}, new testmodel{a="22",b=2}, new testmodel{a="333",b=3}, new testmodel{a="4444",b=4}, new testmodel{a="55555", b=5}}
var averageEvaluator = new System.Func<IEnumerable<testmodel>, IEnumerable<testmodel>>((pos) => {
var avg = pos.Average(x=>x.b);
return pos.Where(x=>x.b < avg ).ToArray();
})
var results = averageEvaluator(list)
foreach(var x in results){ print(x.a);}

C# Linq Groupby and Where

I have a custom list, shown below.
class ActionToDo
{
public string Name {get;set;}
public DateTime dtDate {get;set;}
public string EventCode {get;set;}
public string EventDescription {get;set;}
}
What I would like to do is find items which have the same date & same event description and where the EventCode = "AQ". I am guessing LINQ is the best way to achieve this? I do not know how to do this though using LINQ.
In my head using Sql I think it would roughly be something like below.
SELECT * FROM SomeTable
WHERE [EventDescription] = 'AQ'
GROUP BY [dtDate], [EventDescription]
.Where(x=> x.EventDescription == "AQ")
.GroupBy(x => new { x.dtDate.Date, x.EventDescription})
So let's assume you have a
List<ActionToDo> actionToDo = new List<ActionToDo>();
actionToDo.Where(i => i.EventDescription == "AQ" && i.EventCode="AQ")
.GroupBy(i => new { i.dtDate.Date, i.EventDescription });
Here is a LINQ query expressed using query syntax that meets your requirements. In addition, it also sorts the grouping properties dtDate and EventDescription and then the members of each group by Name:
var actions = new List<ActionToDo>();
// populate 'actions'
var results =
from a in actions
where a.EventCode == "AQ"
orderby a.dtDate, a.EventDescription, a.Name
group a by new { a.dtDate, a.EventDescription };
To demonstrate this query, I created a program with some sample ActionToDo data in random order. Please see below for the formatted output from the program followed by the program itself.
Demonstration Program Output
[2014-02-12] [Desc.AQ.12]
AQ.12a
AQ.12b
[2014-02-13] [Desc.AQ.13]
AQ.13a
AQ.13b
AQ.13c
[2014-02-14] [Desc.AQ.14]
AQ.14a
AQ.14b
Demonstration Program
using System;
using System.Collections.Generic;
using System.Linq;
class GroupByDemo
{
static public void Main(string[] args)
{
var actions = new List<ActionToDo>()
{
new ActionToDo("AQ.14b", "2014-02-14", "AQ", "Desc.AQ.14"),
new ActionToDo("AQ.12a", "2014-02-12", "AQ", "Desc.AQ.12"),
new ActionToDo("AQ.13b", "2014-02-13", "AQ", "Desc.AQ.13"),
new ActionToDo("XX.01", "2014-02-01", "XX", "Desc.XX.01"),
new ActionToDo("AQ.14a", "2014-02-14", "AQ", "Desc.AQ.14"),
new ActionToDo("AQ.12b", "2014-02-12", "AQ", "Desc.AQ.12"),
new ActionToDo("AQ.13a", "2014-02-13", "AQ", "Desc.AQ.13"),
new ActionToDo("XX.02", "2014-02-02", "XX", "Desc.XX.02"),
new ActionToDo("AQ.13c", "2014-02-13", "AQ", "Desc.AQ.13"),
new ActionToDo("XX.03", "2014-02-03", "XX", "Desc.XX.03")
};
var results =
from a in actions
where a.EventCode == "AQ"
orderby a.dtDate, a.EventDescription, a.Name
group a by new { a.dtDate, a.EventDescription };
foreach (var group in results)
{
Console.WriteLine("[{0}] [{1}]",
group.Key.dtDate.ToString("yyyy-MM-dd"),
group.Key.EventDescription);
foreach (var action in group)
{
Console.WriteLine(" {0}", action.Name);
}
}
}
}
class ActionToDo
{
public string Name {get;set;}
public DateTime dtDate {get;set;}
public string EventCode {get;set;}
public string EventDescription {get;set;}
public ActionToDo(
string name,
string dtDateString,
string eventCode,
string eventDescription)
{
this.Name = name;
this.dtDate = DateTime.Parse(dtDateString);
this.EventCode = eventCode;
this.EventDescription = eventDescription;
}
}

Categories

Resources