ElasticSearch & Nest - Wrong convertion - c#

I have this pure call to Elastic that works. It returns 29 docs:
GET /idxsearch-test/movies/_search
{
"size": 20,
"query": {
"bool": {
"must": {
"bool" : {
"should": [
{"term": {"tag.name": "Paris"}},
{"multi_match" : {
"operator": "and",
"query" : "Paris",
"fields": ["movie_title.default^10",
"movie_title.snowball^2",
"movie_title.shingles^2",
"movie_title.ngrams"]}}
]
}
},
"filter": {
"term": { "is_adult": false }
}
}
},
"_source": ["id_content", "movie_title", "vote_average", "tag.name", "is_adult"]
}
I need to use Nest, so I converted to this. It returns 0:
var vod = client.Search<dynamic>(s => s
.Size(10)
.From(1)
.Index(defaultIndex)
.Type("movies")
.Query(qry1 => qry1
.FunctionScore(fs =>
fs.Query(qry2 =>
qry2.Bool(bool1 =>
bool1.Must(must1 =>
must1.Bool(bool2 =>
bool2.Should(should1 =>
{
QueryContainer qc = null;
qc &= should1.Term(tt => tt.Field("tag.name").Value(keywords));
qc &= should1.MultiMatch(mm1 => mm1.Fields(ff => ff
.Field("movie_title.default^10")
.Field("movie_title.snowball^2")
.Field("movie_title.shingles^2")
.Field("movie_title.ngrams"))
.Operator(Operator.And)
.Query(keywords));
return qc;
})
)
)
.Filter(fil =>
{
QueryContainer query = null;
if (!includeAdult)
query &= fil.Terms(fil2 => fil2.Field("is_adult").Terms(false));
return query;
})
)
)
.BoostMode(FunctionBoostMode.Sum)
.Functions(ff => ff.FieldValueFactor(fv => fv.Field("vote_average")
.Factor(0.5)
.Modifier(FieldValueFactorModifier.Log1P)))
))
);
But the results are different... What am I missing?
Is there a way to make the same call in a cleaner and correct way?
cheers

The first query can be written as
//Fluent
client.Search<dynamic>(
s => s.
Index("idxsearch-test").
Type("movies").
Take(20).
Query(q => q.Bool(
b => b.Must(m => m.Term(t => t.Field("tag.name").Value("Paris")) ||
m.MultiMatch(mm => mm.Fields(f => f.
Field("movie_title.default", 10).
Field("movie_title.snowball", 2).
Field("movie_title.shingles", 2).
Field("movie_title.ngrams")))).
Filter(f => includeAdult ? f.Term(t => t.Field("is_adult").Value("")) : null))).
Source(sc => sc.Includes(i => i.Field("id_content").Field("movie_title").Field("vote_average").Field("tag.name").Field("is_adult"))));
//Object
client.Search<dynamic>(new SearchRequest<dynamic>("idxsearch-test", "movies")
{
Size = 20,
Query = new BoolQuery
{
Must = new QueryContainer[]
{
new BoolQuery
{
Should = new QueryContainer[]
{
new TermQuery() { Field = "tag.name", Value = "Paris" },
new MultiMatchQuery
{
Fields = new [] { "movie_title.default^10", "movie_title.snowball^2", "movie_title.shingles^2", "movie_title.ngrams" }
}
}
}
},
Filter = includeAdult ? new QueryContainer[]
{
new TermQuery { Field = "is_adult", Value = false }
} : null
},
Source = new Union<bool, ISourceFilter>(new SourceFilter { Includes = new[] { "id_content", "movie_title", "vote_average", "tag.name", "is_adult" } })
});

Related

How can I generate filtered nested aggregation using Nest Fluent DSL?

I have the following ElasticSearch query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SS",
"fuzziness": 3
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
I am using Nest as client library and here how I generate nested aggregation query using Fluent DSL:
var searchResponse = elasticClient.Search<ProductType>(s => s
.Index(indices)
.Type(Types.Type(typeof(ProductType)))
.Size(0)
.Aggregations(a => a
.Nested("product_attribute_nested_agg", n => n
.Path(Infer.Field<ProductType>(ff => ff.ProductAttributes))
.Aggregations(aa => aa
.Terms("terms_nested_agg", t => t
.Field(p => p.ProductAttributes.Suffix("name"))
)
)
)
)
);
But how can I generate the inner filter using Fluent DSL syntax?
After some investigation and searching, I was able to generate filtered nested aggregation using Nest fluent DSL:
var searchResponse = elasticClient.Search<ProductType>(s => s
.Index(indices)
.Type(Types.Type(typeof(ProductType)))
.Size(0)
.Aggregations(a => a
.Nested("product_attribute_nested_agg", n => n
.Path(Infer.Field<ProductType>(ff => ff.ProductAttributes))
.Aggregations(a1 => a1
.Filter("inner", ia => ia
.Filter(f => f
.Fuzzy(fuzzy => fuzzy
.Field(Infer.Field<ProductType>(ff => ff.ProductAttributes.First().Name))
.Value(productAttribute)
.Fuzziness(Fuzziness.EditDistance(3))
)
).Aggregations(na => na
.Terms("terms_nested_agg", t => t
.Field(p => p.ProductAttributes.Suffix("name"))
)
)
)
)
)
)
);
var result = new List<ProductAttributesSuggestionResult>();
var nestedAgg = searchResponse.Aggregations.Nested("product_attribute_nested_agg");
var inner = (SingleBucketAggregate)nestedAgg.Values.FirstOrDefault();
var termsAgg = inner.Terms("terms_nested_agg");
foreach (var bucket in termsAgg.Buckets)
{
result.Add(new ProductAttributesSuggestionResult
{
Name = bucket.Key,
ProductsCount = bucket.DocCount ?? 0
});
}
I hope my answer helping others having the same issue.

LINQ - sum of column in hierarchical data

My data looks like this:
I am trying to create a JSON ouput (using JSON .NET) which will have the sum of the Value column by levels.
For example:
{
'id': 'AB',
'sum': '53',
'level2' : [
{
'id' : 'CD',
'sum' : '23',
'level3' : [
{
'id' : 'd1',
'sum' : '12'
},
{
'id' : 'd2',
'sum' : '11'
}
]
...
I am trying to use LINQ to create this. So far I have the following code:
var query = reader.SelectRows(r => new
{
level1 = r["level1"].ToString(),
sum = r["sum"] != DBNull.Value ? Convert.ToDouble(r["sum"]) : 0,
level2 = new
{
level2 = r["level2"].ToString(),
sum = r["sum "] != DBNull.Value ? Convert.ToDouble(r["sum"]) : 0,
level3 = new
{
level3 = r["level3 "].ToString(),
sum = r["sum"] != DBNull.Value ? Convert.ToDouble(r["sum"]) : 0
}
}
})
.GroupBy(r => new { r.level1 })
.Select(g => new
{
id = g.Key.level1,
sum = g.Sum(x => x.sum),
level2 = g.GroupBy(l => new { l.level2.level2 })
.Select(l => new
{
id = l.Key.level2,
sum = g.Sum(y => y.sum),
level3 = l.GroupBy(m => new { m.level2.level3.level3 })
.Select(m => new
{
id = m.Key.level3,
sum = g.Sum(z => z.sum),
})
})
});
retJSON = JsonConvert.SerializeObject(new { data = query }, Formatting.Indented);
The SelectRows function is like this:
// Adapted from this answer https://stackoverflow.com/a/1202973
// To https://stackoverflow.com/questions/1202935/convert-rows-from-a-data-reader-into-typed-results
// By https://stackoverflow.com/users/3043/joel-coehoorn
public static IEnumerable<T> SelectRows<T>(this IDataReader reader, Func<IDataRecord, T> select)
{
while (reader.Read())
{
yield return select(reader);
}
}
However, I am getting sum repeated at every level, i.e. the same value. Any direction on how to achieve this will be greatly appreciated.
I think your query is nearly there just 2 changes (shown by comments below).
.GroupBy(r => new { r.level1 })
.Select(g => new
{
id = g.Key.level1,
sum = g.Sum(x => x.sum),
level2 = g.GroupBy(l => new { l.level2.level2 })
.Select(l => new
{
id = l.Key.level2,
sum = l.Sum(y => y.sum), //l not g
level3 = l.GroupBy(m => new { m.level2.level3.level3 })
.Select(m => new
{
id = m.Key.level3,
sum = m.Sum(z => z.sum), //m not g
})
})
});

How to priortize based on range/filter of Elastic Search DSL, such that a list can be filtered, first to show with availability > 60% first and then <

The relevance of the Applicants needs to sort, based on availability % in the month.
First, the applicants with %availabiliity more than 60% should come and then the applicants with %availability less than 60% should come.
The Fluent DSL query using ElasticSearch.net which I am trying with
var response = await
_elasticClient.SearchAsync<ApplicantsWithDetailsResponse>(s =>
s.Aggregations(a => a
.Filter("higer_average", f => f.Filter(fd => fd.Range(r => r.Field(p
=> p.AvailablePercentage).GreaterThanOrEquals(60).Boost(5))))
.Filter("lower_average", f => f.Filter(fd => fd.Range(r => r.Field(p
=> p.AvailablePercentage).GreaterThan(0).LessThan(60).Boost(3)))
)));
or
var response = await _elasticClient.SearchAsync<ApplicantsWithDetailsResponse>(
s => s
.Query(q => q
.Bool(p =>
p.Must(queryFilter => queryFilter.MatchAll())
.Filter(f => f.Range(r => r.Field("AvailablePercentage").GreaterThanOrEquals(60)))
.Boost(5)
.Filter(f => f.Range(r => r.Field("AvailablePercentage").GreaterThan(0).LessThan(60)))
.Boost(1.2)
)));
The applicant's list coming is not as per the logic. They get mixed.
Even If I try to filter to show only values greater than 60, that also does not work
Your query is not correct; it serializes to
{
"query": {
"bool": {
"boost": 1.2,
"filter": [
{
"range": {
"AvailablePercentage": {
"gt": 0.0,
"lt": 60.0
}
}
}
],
"must": [
{
"match_all": {}
}
]
}
}
}
the boost is applied to the entire bool query
the last Filter assigned overwrites any previous filters
Filters are anded, so all would need to be satisfied for a match
It's useful during development to be observe what JSON the client sends to Elasticsearch. There are numerous ways that you can do this, and one that is useful is
var defaultIndex = "default-index";
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var settings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex)
.DisableDirectStreaming()
.PrettyJson()
.OnRequestCompleted(callDetails =>
{
if (callDetails.RequestBodyInBytes != null)
{
Console.WriteLine(
$"{callDetails.HttpMethod} {callDetails.Uri} \n" +
$"{Encoding.UTF8.GetString(callDetails.RequestBodyInBytes)}");
}
else
{
Console.WriteLine($"{callDetails.HttpMethod} {callDetails.Uri}");
}
Console.WriteLine();
if (callDetails.ResponseBodyInBytes != null)
{
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" +
$"{Encoding.UTF8.GetString(callDetails.ResponseBodyInBytes)}\n" +
$"{new string('-', 30)}\n");
}
else
{
Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" +
$"{new string('-', 30)}\n");
}
});
var client = new ElasticClient(settings);
This will write all requests and responses out to the Console. Note that you may not want to do this in production for every request, as there is a performance overhead in buffering requests and responses in this way.
Your query should look something like
var response = client.Search<ApplicantsWithDetailsResponse>(s => s
.Query(q => q
.Bool(p => p
.Must(queryFilter => queryFilter
.MatchAll()
)
.Should(f => f
.Range(r => r
.Field("AvailablePercentage")
.GreaterThanOrEquals(60)
.Boost(5)
), f => f
.Range(r => r
.Field("AvailablePercentage")
.GreaterThan(0)
.LessThan(60)
.Boost(1.2)
)
)
.MinimumShouldMatch(1)
)
)
);
Which emits the following query
{
"query": {
"bool": {
"minimum_should_match": 1,
"must": [
{
"match_all": {}
}
],
"should": [
{
"range": {
"AvailablePercentage": {
"boost": 5.0,
"gte": 60.0
}
}
},
{
"range": {
"AvailablePercentage": {
"boost": 1.2,
"gt": 0.0,
"lt": 60.0
}
}
}
]
}
}
}
Combine range queries with should clauses and specify that at least one must match using MinimumShouldMatch. This is needed because of the presence of a must clause, which means that the should clauses act as boosting signal to documents, but a document does not have to satisfy any of the clauses to be considered a match. With MinimumShouldMatch set to 1, at least one of the should clauses has to be satisfied to be considered a match.
Since the must clause is a match_all query in this case, we could simply omit it and remove MinimumShouldMatch. A should clause without a must clause implies that at least one of the clauses must match.
We can also combine queries using operator overloading, for brevity. The final query would look like
var response = client.Search<ApplicantsWithDetailsResponse>(s => s
.Query(q => q
.Range(r => r
.Field("AvailablePercentage")
.GreaterThanOrEquals(60)
.Boost(5)
) || q
.Range(r => r
.Field("AvailablePercentage")
.GreaterThan(0)
.LessThan(60)
.Boost(1.2)
)
)
);
which emits the query
{
"query": {
"bool": {
"should": [
{
"range": {
"AvailablePercentage": {
"boost": 5.0,
"gte": 60.0
}
}
},
{
"range": {
"AvailablePercentage": {
"boost": 1.2,
"gt": 0.0,
"lt": 60.0
}
}
}
]
}
}
}

NEST FunctionScore() returns all indexed items before adding functions, throws exceptions after adding them

Alright, so the query works perfectly in Sense in Chrome. I use the following query:
{
"size":127,
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"prefix": {
"name": {
"value": "incomp"
}
}
},
{
"match": {
"name": "a word that is"
}
}
]
}
},
"functions": [
{
"exp": {
"date": {
"origin": "now/d",
"scale": "3w",
"offset": "10d",
"decay": "0.88"
}
}
}
]
}
}
}
In short, I match on the indexed "name" property of a custom type in ES, giving priority to recently added items and supporting "suggestions as you type" - thus the prefix query. It works perfectly well, tuned as it is, so my next step would be to reproduce in NEST.
However, I'm facing some issues with the .NET NEST code below:
var results4 = _client.Search<customDataType>(
s => s.Size(5030)
.Query(q => q
.FunctionScore(fs => fs
.Name("another_named_query")
.BoostMode(FunctionBoostMode.Multiply)
.ScoreMode(FunctionScoreMode.Multiply)
.Query(qu => qu
.Bool(b => b
.Must(m => m
.Prefix(p => p
.Field(ff => ff.Name)
.Value(prefixVal)))
.Must(m2 => m2
.Match(mh => mh
.Field(f2 => f2.Name)
.Query(stringBeforePrefixVal)))))
/*.Functions( fcs => fcs.ExponentialDate(
exp => exp
.Origin(DateMath.Now)
.Scale(new Time(1814400000))
.Offset(new Time(864000000))
.Decay(0.88d))
)*/)));
I can't figure out why any attempt to use the "FunctionScore" method results in what a MatchAll() would do - all records are returned.
Meanwhile, when adding the Functions (commented above) I get an UnexpectedElasticsearchClientException with a NullReference inner exception at Nest.FieldResolver.Resolve(Field field) in C:\code\elasticsearch-net\src\Nest\CommonAbstractions\Infer\Field\FieldResolver.cs:line 31.
I'm baffled by all of this, and there don't seem to be similar problems that I can use as a starting point. Is there anything I can do to get the query above running, or should I resort to manually doing a restful API call?
Almost correct, but you're missing the field on which the exponential date decay function should run. Assuming your POCO looks like
public class customDataType
{
public string Name { get; set; }
public DateTime Date { get; set; }
}
the query would be
var prefixVal = "incomp";
var stringBeforePrefixVal = "a word that is";
var results4 = client.Search<customDataType>(s => s
.Size(5030)
.Query(q => q
.FunctionScore(fs => fs
.Name("another_named_query")
.BoostMode(FunctionBoostMode.Multiply)
.ScoreMode(FunctionScoreMode.Multiply)
.Query(qu => qu
.Bool(b => b
.Must(m => m
.Prefix(p => p
.Field(ff => ff.Name)
.Value(prefixVal)))
.Must(m2 => m2
.Match(mh => mh
.Field(f2 => f2.Name)
.Query(stringBeforePrefixVal)))))
.Functions(fcs => fcs
.ExponentialDate(exp => exp
.Field(f => f.Date)
.Origin("now/d")
.Scale("3w")
.Offset("10d")
.Decay(0.88)
)
)
)
)
);
which yields
{
"size": 5030,
"query": {
"function_score": {
"_name": "another_named_query",
"query": {
"bool": {
"must": [
{
"match": {
"name": {
"query": "a word that is"
}
}
}
]
}
},
"functions": [
{
"exp": {
"date": {
"origin": "now/d",
"scale": "3w",
"offset": "10d",
"decay": 0.88
}
}
}
],
"score_mode": "multiply",
"boost_mode": "multiply"
}
}
}
You can take advantage of operator overloading in NEST to shorten the bool query further, by &&ing the prefix and match query
var results4 = client.Search<customDataType>(s => s
.Size(5030)
.Query(q => q
.FunctionScore(fs => fs
.Name("another_named_query")
.BoostMode(FunctionBoostMode.Multiply)
.ScoreMode(FunctionScoreMode.Multiply)
.Query(qu => qu
.Prefix(p => p
.Field(ff => ff.Name)
.Value(prefixVal)
) && qu
.Match(mh => mh
.Field(f2 => f2.Name)
.Query(stringBeforePrefixVal)
)
)
.Functions(fcs => fcs
.ExponentialDate(exp => exp
.Field(f => f.Date)
.Origin("now/d")
.Scale("3w")
.Offset("10d")
.Decay(0.88)
)
)
)
)
);

Match and Match_phrase on multiple fields with the same query string- Elastic Search(Nest)

I have to search for a keyword on multiple fields for the same query string.
"bool": {
"should": [
{
"match": {
"ABC": "Apple"
}
},
{
"match": {
"XYZ": "Apple"
}
}
]
}
When I wrote the query DSL , it has been translated as multimatch query(Not sure if the above code and DSL are same)
.Bool(b => b
.Should(sh => sh
.MultiMatch(c => c
.Fields(f => f.Field(p => p.ABC).Field("XYZ"))
.Query(keyz)))))
Similarly i want to write a DSL query but i want to do match_phrase operation. Can some one help me in solving this.
TIA
Given a document type
public class Document
{
public string ABC { get; set; }
public string XYZ { get; set; }
}
This would be
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var defaultIndex = "default-index";
var connectionSettings = new ConnectionSettings(pool)
.DefaultIndex(defaultIndex)
.DefaultFieldNameInferrer(p => p);
var client = new ElasticClient(connectionSettings);
var keyz = "Apple";
client.Search<Document>(s => s
.Query(q => q
.Bool(b => b
.Should(sh => sh
.Match(c => c
.Field(p => p.ABC)
.Query(keyz)
),
sh => sh
.Match(c => c
.Field(p => p.XYZ)
.Query(keyz)
)
)
)
)
);
You can shorten this by taking advantage of operator overloading
client.Search<Document>(s => s
.Query(q => q
.Match(c => c
.Field(p => p.ABC)
.Query(keyz)
)
|| q.Match(c => c
.Field(p => p.XYZ)
.Query(keyz)
)
)
);
Both produce
{
"query": {
"bool": {
"should": [
{
"match": {
"ABC": {
"query": "Apple"
}
}
},
{
"match": {
"XYZ": {
"query": "Apple"
}
}
}
]
}
}
}

Categories

Resources