@@ -58,7 +58,9 @@ template<typename Invalid, typename Term, typename Levenshtein>
5858inline auto executeLevenshtein (byte_type max_distance,
5959 by_edit_distance_options::pdp_f provider,
6060 bool with_transpositions,
61- Invalid inv, Term t, Levenshtein lev) {
61+ const bytes_ref prefix, const bytes_ref target,
62+ Invalid&& inv, Term&& t, Levenshtein&& lev) {
63+
6264 if (!provider) {
6365 provider = &default_pdp;
6466 }
@@ -74,7 +76,7 @@ inline auto executeLevenshtein(byte_type max_distance,
7476 return inv ();
7577 }
7678
77- return lev (d);
79+ return lev (d, prefix, target );
7880}
7981
8082template <typename StatesType>
@@ -186,17 +188,19 @@ template<typename Collector>
186188bool collect_terms (
187189 const index_reader& index,
188190 const string_ref& field,
191+ const bytes_ref& prefix,
189192 const bytes_ref& term,
190193 const parametric_description& d,
191194 Collector& collector) {
192- const auto acceptor = make_levenshtein_automaton (d, term);
195+ const auto acceptor = make_levenshtein_automaton (d, prefix, term);
193196
194197 if (!validate (acceptor)) {
195198 return false ;
196199 }
197200
198201 auto matcher = make_automaton_matcher (acceptor);
199- const uint32_t utf8_term_size = std::max (1U , uint32_t (utf8_utils::utf8_length (term)));
202+ const uint32_t utf8_term_size = std::max (1U , uint32_t (utf8_utils::utf8_length (prefix)) +
203+ uint32_t (utf8_utils::utf8_length (term)));
200204 const byte_type max_distance = d.max_distance () + 1 ;
201205
202206 for (auto & segment : index) {
@@ -217,6 +221,7 @@ filter::prepared::ptr prepare_levenshtein_filter(
217221 const order::prepared& order,
218222 boost_t boost,
219223 const string_ref& field,
224+ const bytes_ref& prefix,
220225 const bytes_ref& term,
221226 size_t terms_limit,
222227 const parametric_description& d) {
@@ -228,13 +233,13 @@ filter::prepared::ptr prepare_levenshtein_filter(
228233 all_terms_collector<decltype (states)> term_collector (states, field_stats, term_stats);
229234 term_collector.stat_index (0 ); // aggregate stats from different terms
230235
231- if (!collect_terms (index, field, term, d, term_collector)) {
236+ if (!collect_terms (index, field, prefix, term, d, term_collector)) {
232237 return filter::prepared::empty ();
233238 }
234239 } else {
235240 top_terms_collector term_collector (terms_limit, field_stats);
236241
237- if (!collect_terms (index, field, term, d, term_collector)) {
242+ if (!collect_terms (index, field, prefix, term, d, term_collector)) {
238243 return filter::prepared::empty ();
239244 }
240245
@@ -267,23 +272,27 @@ DEFINE_FACTORY_DEFAULT(by_edit_distance)
267272
268273/* static*/ field_visitor by_edit_distance::visitor (const options_type::filter_options& opts) {
269274 return executeLevenshtein (
270- opts.max_distance , opts.provider , opts.with_transpositions ,
275+ opts.max_distance , opts.provider , opts.with_transpositions , opts. prefix , opts. term ,
271276 []() -> field_visitor {
272277 return [](const sub_reader&, const term_reader&, filter_visitor&){};
273278 },
274279 [&opts]() -> field_visitor {
275280 // must copy term as it may point to temporary string
276- return [term = opts.term ](
281+ return [target = opts. prefix + opts.term ](
277282 const sub_reader& segment,
278283 const term_reader& field,
279284 filter_visitor& visitor){
280- return by_term::visit (segment, field, term , visitor);
285+ return by_term::visit (segment, field, target , visitor);
281286 };
282287 },
283- [&opts](const parametric_description& d) -> field_visitor {
288+ [](const parametric_description& d,
289+ const bytes_ref prefix,
290+ const bytes_ref term) -> field_visitor {
284291 struct automaton_context : util::noncopyable {
285- automaton_context (const parametric_description& d, const bytes_ref& term)
286- : acceptor(make_levenshtein_automaton(d, term)),
292+ automaton_context (const parametric_description& d,
293+ const bytes_ref& prefix,
294+ const bytes_ref& term)
295+ : acceptor(make_levenshtein_automaton(d, prefix, term)),
287296 matcher (make_automaton_matcher(acceptor)) {
288297 }
289298
@@ -292,13 +301,14 @@ DEFINE_FACTORY_DEFAULT(by_edit_distance)
292301 };
293302
294303 // FIXME
295- auto ctx = memory::make_shared<automaton_context>(d, opts. term);
304+ auto ctx = memory::make_shared<automaton_context>(d, prefix, term);
296305
297306 if (!validate(ctx->acceptor)) {
298307 return [](const sub_reader&, const term_reader&, filter_visitor&){};
299308 }
300309
301- const uint32_t utf8_term_size = std::max(1U , uint32_t (utf8_utils::utf8_length(opts.term)));
310+ const uint32_t utf8_term_size = std::max(1U , uint32_t (utf8_utils::utf8_length(prefix) +
311+ utf8_utils::utf8_length (term)));
302312 const byte_type max_distance = d.max_distance() + 1 ;
303313
304314 return [ctx, utf8_term_size, max_distance](
@@ -321,18 +331,30 @@ DEFINE_FACTORY_DEFAULT(by_edit_distance)
321331 size_t scored_terms_limit,
322332 byte_type max_distance,
323333 options_type::pdp_f provider,
324- bool with_transpositions) {
334+ bool with_transpositions,
335+ const bytes_ref& prefix) {
336+
325337 return executeLevenshtein (
326- max_distance, provider, with_transpositions,
338+ max_distance, provider, with_transpositions, prefix, term,
327339 []() -> filter::prepared::ptr {
328340 return prepared::empty ();
329341 },
330- [&index, &order, boost, &field, &term]() -> filter::prepared::ptr {
331- return by_term::prepare (index, order, boost, field, term);
342+ [&index, &order, boost, &field, &prefix, &term]() -> filter::prepared::ptr {
343+ if (!prefix.empty () && !term.empty ()) {
344+ bstring target;
345+ target.reserve (prefix.size () + term.size ());
346+ target += prefix;
347+ target += term;
348+ return by_term::prepare (index, order, boost, field, target);
349+ }
350+
351+ return by_term::prepare (index, order, boost, field, prefix.empty () ? term : prefix);
332352 },
333- [&field, &term, scored_terms_limit, &index, &order, boost](
334- const parametric_description& d) -> filter::prepared::ptr {
335- return prepare_levenshtein_filter (index, order, boost, field, term, scored_terms_limit, d);
353+ [&field, scored_terms_limit, &index, &order, boost](
354+ const parametric_description& d,
355+ const bytes_ref prefix,
356+ const bytes_ref term) -> filter::prepared::ptr {
357+ return prepare_levenshtein_filter (index, order, boost, field, prefix, term, scored_terms_limit, d);
336358 }
337359 );
338360}
0 commit comments