8000 Feature/search 213 optimize levenshtein startswith by Dronplane · Pull Request #14744 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Feature/search 213 optimize levenshtein startswith #14744

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
64944e5
group conjunction nodes
Dronplane Sep 3, 2021
c6d0ab3
Merge branch 'devel' into feature/SEARCH-213-Optimize-Levenshtein-sta…
Dronplane Sep 6, 2021
4664cc4
wip
Dronplane Sep 6, 2021
b545d56
tests
Dronplane Sep 6, 2021
b026fa4
tests
Dronplane Sep 7, 2021
9dfe7b9
tests
Dronplane Sep 7, 2021
0abc986
Merge branch 'devel' into feature/SEARCH-213-Optimize-Levenshtein-sta…
Dronplane Sep 7, 2021
5b8eda2
moar tests
Dronplane Sep 7, 2021
6f11b5d
add option to explicitly block merging
Dronplane Sep 7, 2021
337cd90
moar corner cases
Dronplane Sep 8, 2021
17ea36a
empty case optimization
Dronplane Sep 8, 2021
55c038c
optimization
Dronplane Sep 8, 2021
229c972
cleanup
Dronplane Sep 8, 2021
f5d8684
fix startsWith boost
Dronplane Sep 8, 2021
af37ee0
cleanup
Dronplane Sep 8, 2021
eae7ebb
wip
Dronplane Sep 8, 2021
26386d4
wip
Dronplane Sep 8, 2021
37be60d
comments
Dronplane Sep 8, 2021
bcd2093
add node serialization tests
Dronplane Sep 8, 2021
d649660
Update arangod/IResearch/IResearchFilterFactory.cpp
Dronplane Sep 9, 2021
4cd8331
adress review comments
Dronplane Sep 9, 2021
a969139
address review comments
Dronplane Sep 9, 2021
0e6b6cf
add js test
Dronplane Sep 9, 2021
0c6d9ca
adress review comments
Dronplane Sep 9, 2021
09c678c
Update CHANGELOG
Dronplane Sep 9, 2021
1708a0a
Merge branch 'devel' into feature/SEARCH-213-Optimize-Levenshtein-sta…
Dronplane Sep 9, 2021
d234738
Merge branch 'devel' into feature/SEARCH-213-Optimize-Levenshtein-sta…
Dronplane Sep 10, 2021
f89293e
None -> NONE renaming
Dronplane Sep 10, 2021
340087d
Merge branch 'feature/SEARCH-213-Optimize-Levenshtein-startswith' of …
Dronplane Sep 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
empty case optimization
  • Loading branch information
Dronplane committed Sep 8, 2021
commit 17ea36a45fa3640c542688f4d912d0d6dc3aeaf7
15 changes: 10 additions & 5 deletions arangod/IResearch/IResearchFilterFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3600,7 +3600,7 @@ Result fromFuncStartsWith(
return {};
}
} else {
// maybe we could enlarge prefix to cover us?
// maybe we could enlarge prefix to cover us?
if (std::memcmp(options->prefix.data(), startsWith.c_str(),
options->prefix.size()) == 0) {
// looks promising - beginning of the levenshtein prefix is ok
Expand All @@ -3616,12 +3616,17 @@ Result fromFuncStartsWith(
return {};
}
}
// FIXME: if levenshtein term.size() + prefix.size() + max_distance < startsWith.size()
// this is impossible condition so we could replace whole conjunction with an empty
// filter. But current API does not allow filters removal
// Also we could check if there is a contradiction in prefixes and make an empty filter.
}
}
if ((options->term.size() +
options->prefix.size() +
options->max_distance) < startsWith.size()) {
// last optimization effort - we can't fulfill this conjunction.
// make it empty
filter->clear();
filter->add<irs::empty>();
return {};
}
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions tests/IResearch/IResearchQueryOptimization-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12139,6 +12139,28 @@ TEST_P(IResearchQueryOptimizationTest, mergeLevenshteinStartsWith) {
"RETURN d",
expected);
}
// make it empty with prefix
{
irs::Or expected;
auto& andFilter = expected.add<irs::And>().add<irs::empty>();
assertFilterOptimized(
vocbase(),
"FOR d IN testView SEARCH STARTS_WITH(d.name, 'foobar12345')"
"AND LEVENSHTEIN_MATCH(d.name, 'bar', 2, false, 63, 'foo')"
"RETURN d",
expected);
}
// make it empty
{
irs::Or expected;
auto& andFilter = expected.add<irs::And>().add<irs::empty>();
assertFilterOptimized(
vocbase(),
"FOR d IN testView SEARCH STARTS_WITH(d.name, 'foobar12345')"
"AND LEVENSHTEIN_MATCH(d.name, 'foobar', 2, false, 63)"
"RETURN d",
expected);
}
// empty prefix case - not match
{
irs::Or expected;
Expand Down
7 changes: 7 additions & 0 deletions tests/IResearch/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ namespace iresearch {
return os;
}

std::ostream& operator<<(std::ostream& os, empty const&) {
os << "EMPTY[]";
return os;
}

std::ostream& operator<<(std::ostream& os, by_edit_distance const& lev) {
os << "LEVENSHTEIN_MATCH[";
os << lev.field() << ", '";
Expand Down Expand Up @@ -180,6 +185,8 @@ namespace iresearch {
return os << static_cast<by_edit_distance const&>(filter);
} else if (type == irs::type<by_prefix>::id()) {
return os << static_cast<by_prefix const&>(filter);
} else if (type == irs::type<empty>::id()) {
return os << static_cast<empty const&>(filter);
} else {
return os << "[Unknown filter]";
}
Expand Down
0