8000 Feature/ngram similarity function by Dronplane · Pull Request #11276 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

Feature/ngram similarity function #11276

New issue < 8000 button aria-label="Close dialog" data-close-dialog="" type="button" data-view-component="true" class="Link--muted btn-link position-absolute p-4 right-0">

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 16, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adressed review comments
  • Loading branch information
Dronplane committed Mar 16, 2020
commit 69cb1612ebd06a91dc07a5a9848d204310782303
91 changes: 45 additions & 46 deletions arangod/Aql/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1586,56 +1586,55 @@ AqlValue Functions::LevenshteinDistance(ExpressionContext*, transaction::Methods


namespace {
template<bool search_semantics>
AqlValue NgramSimilarityHelper(char const* AFN, ExpressionContext* ctx, transaction::Methods* trx,
VPackFunctionParameters const& args) {

if (args.size() < 3) {
registerWarning(
ctx, AFN,
arangodb::Result{ TRI_ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH,
"Minimum 3 arguments are expected." });
return AqlValue(AqlValueHintNull());
}
template<bool search_semantics>
AqlValue NgramSimilarityHelper(char const* AFN, ExpressionContext* ctx, transaction::Methods* trx,
VPackFunctionParameters const& args) {
if (args.size() < 3) {
registerWarning(
ctx, AFN,
arangodb::Result{ TRI_ERROR_QUERY_FUNCTION_ARGUMENT_NUMBER_MISMATCH,
"Minimum 3 arguments are expected." });
return AqlValue(AqlValueHintNull());
}

auto const& attribute = extractFunctionParameterValue(args, 0);
if (ADB_UNLIKELY(!attribute.isString())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN);
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const attributeValue = arangodb::iresearch::getStringRef(attribute.slice());
auto const& attribute = extractFunctionParameterValue(args, 0);
if (ADB_UNLIKELY(!attribute.isString())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN);
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const attributeValue = arangodb::iresearch::getStringRef(attribute.slice());

auto const& target = extractFunctionParameterValue(args, 1);
if (ADB_UNLIKELY(!target.isString())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN);
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const targetValue = arangodb::iresearch::getStringRef(target.slice());
auto const& target = extractFunctionParameterValue(args, 1);
if (ADB_UNLIKELY(!target.isString())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN);
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const targetValue = arangodb::iresearch::getStringRef(target.slice());

auto const& ngramSize = extractFunctionParameterValue(args, 2);
if (ADB_UNLIKELY(!ngramSize.isNumber())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN);
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const ngramSizeValue = ngramSize.toInt64();
auto const& ngramSize = extractFunctionParameterValue(args, 2);
if (ADB_UNLIKELY(!ngramSize.isNumber())) {
arangodb::aql::registerInvalidArgumentWarning(ctx, AFN); 8000
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
auto const ngramSizeValue = ngramSize.toInt64();

if (ADB_UNLIKELY(ngramSizeValue < 1)) {
arangodb::aql::registerWarning(ctx, AFN,
arangodb::Result{TRI_ERROR_BAD_PARAMETER,
"Invalid ngram size. Should be 1 or greater"});
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}
if (ADB_UNLIKELY(ngramSizeValue < 1)) {
arangodb::aql::registerWarning(ctx, AFN,
arangodb::Result{TRI_ERROR_BAD_PARAMETER,
"Invalid ngram size. Should be 1 or greater"});
return arangodb::aql::AqlValue{ arangodb::aql::AqlValueHintNull{} };
}

auto utf32Attribute = basics::StringUtils::characterCodes(attributeValue.c_str(), attributeValue.size());
auto utf32Target = basics::StringUtils::characterCodes(targetValue.c_str(), targetValue.size());
auto utf32Attribute = basics::StringUtils::characterCodes(attributeValue.c_str(), attributeValue.size());
auto utf32Target = basics::StringUtils::characterCodes(targetValue.c_str(), targetValue.size());

auto const similarity =
irs::ngram_similarity<uint32_t, search_semantics>(
utf32Target.data(), utf32Target.size(),
utf32Attribute.data(), utf32Attribute.size(),
ngramSizeValue);
return AqlValue(AqlValueHintDouble(similarity));
}
auto const similarity =
irs::ngram_similarity<uint32_t, search_semantics>(
utf32Target.data(), utf32Target.size(),
utf32Attribute.data(), utf32Attribute.size(),
ngramSizeValue);
return AqlValue(AqlValueHintDouble(similarity));
}
}

/// Executes NGRAM_SIMILARITY based on binary ngram similarity
Expand Down Expand Up @@ -6756,7 +6755,7 @@ AqlValue Functions::ReplaceNth(ExpressionContext* expressionContext, transaction
registerInvalidArgumentWarning(expressionContext, AFN);
return AqlValue(AqlValueHintNull());
}

if (offset.isNull(true)) {
THROW_ARANGO_EXCEPTION_PARAMS(TRI_ERROR_QUERY_FUNCTION_ARGUMENT_TYPE_MISMATCH, AFN);
}
Expand All @@ -6776,7 +6775,7 @@ AqlValue Functions::ReplaceNth(ExpressionContext* expressionContext, transaction
AqlValueMaterializer materializer(trx);
VPackSlice arraySlice = materializer.slice(baseArray, false);
VPackSlice replaceValue = materializer.slice(newValue, false);

transaction::BuilderLeaser builder(trx);
builder->openArray();

Expand Down
2 changes: 1 addition & 1 deletion arangod/IResearch/IResearchFilterFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct FilterFactory {
}; // FilterFactory


class FilterConstants {
struct FilterConstants {
FilterConstants() = delete;
public:
// Defaults
Expand Down
0