8000 bug-fix/internal-issue-#731 (#12457) · adamjm/arangodb@75dd04f · GitHub
[go: up one dir, main page]

Skip to content

Commit 75dd04f

Browse files
authored
bug-fix/internal-issue-#731 (arangodb#12457)
* fallback to LEVENSHTEIN_DISTANCE for Damerau Levenshtein case * make damerau distance default for LEVENSHTEIN_MATCH
1 parent 52496a8 commit 75dd04f

File tree

6 files changed

+59
-21
lines changed

6 files changed

+59
-21
lines changed

arangod/Aql/Functions.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,7 +1779,7 @@ AqlValue Functions::LevenshteinMatch(ExpressionContext* ctx, transaction::Method
17791779
return arangodb::aql::AqlValue{arangodb::aql::AqlValueHintNull{}};
17801780
}
17811781

1782-
bool withTranspositionsValue = false;
1782+
bool withTranspositionsValue = true;
17831783
int64_t maxDistanceValue = maxDistance.toInt64();
17841784

17851785
if (args.size() > 3) {
@@ -1794,13 +1794,13 @@ AqlValue Functions::LevenshteinMatch(ExpressionContext* ctx, transaction::Method
17941794
}
17951795

17961796
if (maxDistanceValue < 0 ||
1797-
(withTranspositionsValue &&
1798-
maxDistanceValue > arangodb::iresearch::MAX_DAMERAU_LEVENSHTEIN_DISTANCE)) {
1797+
(!withTranspositionsValue &&
1798+
maxDistanceValue > arangodb::iresearch::MAX_LEVENSHTEIN_DISTANCE)) {
17991799
registerInvalidArgumentWarning(ctx, AFN);
18001800
return AqlValue{AqlValueHintNull{}};
18011801
}
18021802

1803-
if (!withTranspositionsValue && maxDistanceValue > arangodb::iresearch::MAX_LEVENSHTEIN_DISTANCE) {
1803+
if (withTranspositionsValue && maxDistanceValue > arangodb::iresearch::MAX_DAMERAU_LEVENSHTEIN_DISTANCE) {
18041804
// fallback to LEVENSHTEIN_DISTANCE
18051805
auto const dist = Functions::LevenshteinDistance(ctx, trx, args);
18061806
TRI_ASSERT(dist.isNumber());

arangod/IResearch/IResearchFilterFactory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2402,7 +2402,7 @@ arangodb::Result getLevenshteinArguments(char const* funcName, bool isFilter,
24022402
}
24032403

24042404
// optional (3 - First) argument defines transpositions
2405-
bool withTranspositions = false;
2405+
bool withTranspositions = true;
24062406
if (3 - First < argc) {
24072407
res = ElementTraits::evaluateArg(withTranspositions, tmpValue, funcName, args, 3 - First, isFilter, ctx);
24082408

tests/Aql/LevenshteinMatchFunctionTest.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ TEST(LevenshteinMatchFunctionTest, test) {
136136
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{3}), &Damerau);
137137
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{4}), &Levenshtein);
138138
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{4}));
139-
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{5}), &Levenshtein);
139+
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{4}), &Damerau);
140+
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{5}));
141+
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{5}), &Damerau);
142+
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{6}));
143+
assertLevenshteinMatch(true, AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{6}), &Damerau);
140144
assertLevenshteinMatch(true, AqlValue(AqlValueHintNull{}), AqlValue("aa"), AqlValue(AqlValueHintInt{2}), &Levenshtein);
141145
assertLevenshteinMatch(false, AqlValue(AqlValueHintEmptyArray{}), AqlValue("aa"), AqlValue(AqlValueHintInt{1}), &Levenshtein);
142146
assertLevenshteinMatch(true, AqlValue(AqlValueHintEmptyObject{}), AqlValue("aa"), AqlValue(AqlValueHintInt{2}), &Levenshtein);
@@ -162,6 +166,6 @@ TEST(LevenshteinMatchFunctionTest, test) {
162166
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{-1}));
163167
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{-1}), &Damerau);
164168
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{-1}), &Levenshtein);
165-
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{4}), &Damerau);
166-
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{5}), &Damerau);
169+
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{5}), &Levenshtein);
170+
assertLevenshteinMatchFail(AqlValue("aa"), AqlValue("aaaa"), AqlValue(AqlValueHintInt{6}), &Levenshtein);
167171
}

tests/IResearch/IResearchFilterFunction-test.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6024,7 +6024,7 @@ TEST_F(IResearchFilterFunctionTest, levenshteinMatch) {
60246024
*filter.mutable_field() = mangleStringIdentity("name");
60256025
auto* opts = filter.mutable_options();
60266026
opts->max_distance = 1;
6027-
opts->with_transpositions = false;
6027+
opts->with_transpositions = true;
60286028
opts->term = irs::ref_cast<irs::byte_type>(irs::string_ref("foo"));
60296029
opts->max_terms = arangodb::iresearch::FilterConstants::DefaultLevenshteinTermsLimit;
60306030

@@ -6038,7 +6038,7 @@ TEST_F(IResearchFilterFunctionTest, levenshteinMatch) {
60386038
expected);
60396039
}
60406040

6041-
// LEVENSHTEIN_MATCH(d.name, 'foo', 1, 42)
6041+
// LEVENSHTEIN_MATCH(d.name, 'foo', 1, false, 42)
60426042
{
60436043
irs::Or expected;
60446044
auto& filter = expected.add<irs::by_edit_distance>();
@@ -6084,6 +6084,10 @@ TEST_F(IResearchFilterFunctionTest, levenshteinMatch) {
60846084
vocbase(),
60856085
"FOR d IN myView FILTER ANALYZER(LEVENSHTEIN_MATCH(d.name[_FORWARD_('foo')], 'fooo', 0, true), 'test_analyzer') RETURN d",
60866086
expected, &ctx);
6087+
assertFilterSuccess(
6088+
vocbase(),
6089+
"FOR d IN myView FILTER ANALYZER(LEVENSHTEIN_MATCH(d.name[_FORWARD_('foo')], 'fooo', 0), 'test_analyzer') RETURN d",
6090+
expected, &ctx);
60876091
assertFilterSuccess(
60886092
vocbase(),
60896093
"LET y='o' LET transp=true LET dist=1 LET x='foo' FOR d IN myView FILTER ANALYZER(LEVENSHTEIN_MATCH(d.name[x], CONCAT('foo', y), dist-1, transp), 'test_analyzer') RETURN d",
@@ -6118,10 +6122,6 @@ TEST_F(IResearchFilterFunctionTest, levenshteinMatch) {
61186122
vocbase(),
61196123
"FOR d IN myView FILTER BOOST(ANALYZER(LEVENSHTEIN_MATCH(d.name[4], 'fooo', 2, false), 'test_analyzer'), 0.5) RETURN d",
61206124
expected, &ctx);
6121-
assertFilterSuccess(
6122-
vocbase(),
6123-
"FOR d IN myView FILTER BOOST(ANALYZER(LEVENSHTEIN_MATCH(d.name[4], 'fooo', 2), 'test_analyzer'), 0.5) RETURN d",
6124-
expected, &ctx);
61256125
assertFilterSuccess(
61266126
vocbase(),
61276127
"FOR d IN myView FILTER ANALYZER(BOOST(LEVENSHTEIN_MATCH(d.name[4], 'fooo', 2, false), 0.5), 'test_analyzer') RETURN d",
@@ -6130,10 +6130,6 @@ TEST_F(IResearchFilterFunctionTest, levenshteinMatch) {
61306130
vocbase(),
61316131
"FOR d IN myView FILTER BOOST(ANALYZER(LEVENSHTEIN_MATCH(d.name[_FORWARD_(4)], 'fooo', 2, false), 'test_analyzer'), 0.5) RETURN d",
61326132
expected, &ctx);
6133-
assertFilterSuccess(
6134-
vocbase(),
6135-
"FOR d IN myView FILTER BOOST(ANALYZER(LEVENSHTEIN_MATCH(d.name[_FORWARD_(4)], 'fooo', 2), 'test_analyzer'), 0.5) RETURN d",
6136-
expected, &ctx);
61376133
assertFilterSuccess(
61386134
vocbase(),
61396135
"LET y='o' LET transp=false LET dist=1 LET x='foo' FOR d IN myView FILTER ANALYZER(BOOST(LEVENSHTEIN_MATCH(d.name[x], CONCAT('foo', y), dist+1, transp), 0.5), 'test_analyzer') RETURN d",

tests/IResearch/IResearchQueryLevenshteinMatch-test.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,29 @@ TEST_F(IResearchQueryLevenhsteinMatchTest, test) {
249249
EXPECT_EQ(i, expected.size());
250250
}
251251

252+
// distance 1, default limit, default damerau
253+
{
254+
std::vector<arangodb::velocypack::Slice> expected = {
255+
insertedDocs[29].slice(),
256+
};
257+
auto result = arangodb::tests::executeQuery(
258+
vocbase,
259+
"FOR d IN testView SEARCH LEVENSHTEIN_MATCH(d.title, 'cba', 1) RETURN d");
260+
ASSERT_TRUE(result.result.ok());
261+
auto slice = result.data->slice();
262+
ASSERT_TRUE(slice.isArray());
263+
size_t i = 0;
264+
265+
for (arangodb::velocypack::ArrayIterator itr(slice); itr.valid(); ++itr) {
266+
auto const resolved = itr.value().resolveExternals();
267+
ASSERT_TRUE(i < expected.size());
268+
EXPECT_TRUE((0 == arangodb::basics::VelocyPackHelper::compare(expected[i++],
269+
resolved, true)));
270+
}
271+
272+
EXPECT_EQ(i, expected.size());
273+
}
274+
252275
// distance 2, defatul limit
253276
{
254277
std::vector<arangodb::velocypack::Slice> expected = {
@@ -341,7 +364,7 @@ TEST_F(IResearchQueryLevenhsteinMatchTest, test) {
341364
};
342365
auto result = arangodb::tests::executeQuery(
343366
vocbase,
344-
"FOR d IN testView SEARCH LEVENSHTEIN_MATCH(d.title, 'ababab', 3) RETURN d");
367+
"FOR d IN testView SEARCH LEVENSHTEIN_MATCH(d.title, 'ababab', 3, false) RETURN d");
345368
ASSERT_TRUE(result.result.ok());
346369
auto slice = result.data->slice();
347370
ASSERT_TRUE(slice.isArray());
@@ -805,6 +828,14 @@ TEST_F(IResearchQueryLevenhsteinMatchTest, test) {
805828
ASSERT_TRUE(result.result.is(TRI_ERROR_BAD_PARAMETER));
806829
}
807830

831+
// test max Damerau-Levenshtein distance
832+
{
833+
auto result = arangodb::tests::executeQuery(
834+
vocbase,
835+
"FOR d IN testView SEARCH LEVENSHTEIN_MATCH(d.value, 'foo', 4) RETURN d");
836+
ASSERT_TRUE(result.result.is(TRI_ERROR_BAD_PARAMETER));
837+
}
838+
808839
// test missing value
809840
{
810841
auto result = arangodb::tests::executeQuery(

tests/js/common/aql/aql-view-arangosearch-noncluster.js

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1932,14 +1932,14 @@ function iResearchAqlTestSuite () {
19321932
},
19331933

19341934
testLevenshteinMatch0 : function() {
1935-
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lazi', 0), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
1935+
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lazi', 0, false), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
19361936
assertEqual(2, res.length);
19371937
assertEqual("full", res[0].name);
19381938
assertEqual("half", res[1].name);
19391939
},
19401940

19411941
testLevenshteinMatch1 : function() {
1942-
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lzi', 1), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
1942+
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lzi', 1, false), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
19431943
assertEqual(2, res.length);
19441944
assertEqual("full", res[0].name);
19451945
assertEqual("half", res[1].name);
@@ -1952,6 +1952,13 @@ function iResearchAqlTestSuite () {
19521952
assertEqual("half", res[1].name);
19531953
},
19541954

1955+
testLevenshteinDamerauMatch1Default : function() {
1956+
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lzai', 1), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
1957+
assertEqual(2, res.length);
1958+
assertEqual("full", res[0].name);
1959+
assertEqual("half", res[1].name);
1960+
},
1961+
19551962
testLevenshteinDamerauMatch1NoLimit : function() {
19561963
var res = db._query("FOR doc IN UnitTestsView SEARCH ANALYZER(LEVENSHTEIN_MATCH(doc.text, 'lzai', 1, true), 'text_en') OPTIONS { waitForSync : true } SORT doc.name RETURN doc").toArray();
19571964
assertEqual(2, res.length);

0 commit comments

Comments
 (0)
0