8000 Feature/internal issue #654 (#10456) · arangodb/arangodb@31a580d · GitHub
[go: up one dir, main page]

Skip to content

Commit 31a580d

Browse files
Dronplanegnusi
authored andcommitted
Feature/internal issue #654 (#10456)
* Implemented PHRASE * Added more tests * Implemented array recursion * Fixed tests * Fixed tests * Minor comments added
1 parent f58c1f8 commit 31a580d

File tree

5 files changed

+478
-150
lines changed

5 files changed

+478
-150
lines changed

arangod/IResearch/IResearchFilterFactory.cpp

Lines changed: 88 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1908,9 +1908,88 @@ arangodb::Result fromFuncMinMatch(irs::boolean_filter* filter, QueryContext cons
19081908
return {};
19091909
}
19101910

1911+
1912+
arangodb::Result processPhraseArgs(
1913+
irs::by_phrase* phrase, QueryContext const& ctx,
1914+
FilterContext const& filterCtx, arangodb::aql::AstNode const& valueArgs,
1915+
size_t valueArgsBegin, size_t valueArgsEnd, irs::analysis::analyzer::ptr& analyzer,
1916+
size_t offset, bool allowDefaultOffset, bool allowRecursion) {
1917+
irs::string_ref value;
1918+
bool expectingOffset = false;
1919+
for (size_t idx = valueArgsBegin; idx < valueArgsEnd; ++idx) {
1920+
auto currentArg = valueArgs.getMemberUnchecked(idx);
1921+
if (!currentArg) {
1922+
auto message = "'PHRASE' AQL function: Unable to parse argument on position "s + std::to_string(idx);
1923+
LOG_TOPIC("44bed", WARN, arangodb::iresearch::TOPIC) << message;
1924+
return { TRI_ERROR_BAD_PARAMETER, message };
1925+
}
1926+
if (currentArg->isArray() && (!expectingOffset || allowDefaultOffset)) {
1927+
// array arg is processed with possible default 0 offsets - to be easily compatible with TOKENS function
1928+
// No array recursion allowed. This could be allowed, but just looks tangled.
1929+
// Anyone interested coud use FLATTEN to explicitly require processing all recurring arrays as one array
1930+
if (allowRecursion) {
1931+
auto subRes = processPhraseArgs(phrase, ctx, filterCtx, *currentArg, 0, currentArg->numMembers(), analyzer, offset, true, false);
1932+
if (subRes.fail()) {
1933+
return subRes;
1934+
}
1935+
expectingOffset = true;
1936+
offset = 0;
1937+
continue;
1938+
} else {
1939+
auto message = "'PHRASE' AQL function: recursive arrays not allowed at position "s + std::to_string(idx);
1940+
LOG_TOPIC("66c24", WARN, arangodb::iresearch::TOPIC) << message;
1941+
return { TRI_ERROR_BAD_PARAMETER, message };
1942+
}
1943+
}
1944+
ScopedAqlValue currentValue(*currentArg);
1945+
if (phrase || currentValue.isConstant()) {
1946+
if (!currentValue.execute(ctx)) {
1947+
auto message = "'PHRASE' AQL function: Unable to parse argument on position " + std::to_string(idx);
1948+
LOG_TOPIC("d819d", WARN, arangodb::iresearch::TOPIC) << message;
1949+
return { TRI_ERROR_BAD_PARAMETER, message };
1950+
}
1951+
if (arangodb::iresearch::SCOPED_VALUE_TYPE_DOUBLE == currentValue.type() && expectingOffset) {
1952+
offset = static_cast<uint64_t>(currentValue.getInt64());
1953+
expectingOffset = false;
1954+
continue; // got offset let`s go search for value
1955+
} else if ( (arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != currentValue.type() || !currentValue.getString(value)) || // value is not a string at all
1956+
expectingOffset && !allowDefaultOffset) { // offset is expected mandatory but got value
1957+
std::string expectedValue;
1958+
if (expectingOffset && allowDefaultOffset) {
1959+
expectedValue = " as a value or offset";
1960+
} else if (expectingOffset) {
1961+
expectedValue = " as an offset";
1962+
} else {
1963+
expectedValue = " as a value";
1964+
}
1965+
auto message = "'PHRASE' AQL function: Unable to parse argument on position " + std::to_string(idx) + expectedValue;
1966+
LOG_TOPIC("ac06b", WARN, arangodb::iresearch::TOPIC) << message;
1967+
return { TRI_ERROR_BAD_PARAMETER, message };
1968+
}
1969+
} else {
1970+
// in case of non const node encountered while parsing we can not decide if current and following args are correct before execution
1971+
// so at this stage we say all is ok
1972+
return {};
1973+
}
1974+
if (phrase) {
1975+
TRI_ASSERT(analyzer);
1976+
appendTerms(*phrase, value, *analyzer, offset);
1977+
}
1978+
offset = 0;
1979+
expectingOffset = true;
1980+
}
1981+
if (!expectingOffset) { // that means last arg is numeric - this is error as no term to apply offset to
1982+
auto message = "'PHRASE' AQL function : Unable to parse argument on position " + std::to_string(valueArgsEnd - 1) + "as a value"s;
1983+
LOG_TOPIC("5fafe", WARN, arangodb::iresearch::TOPIC) << message;
1984+
return { TRI_ERROR_BAD_PARAMETER, message };
1985+
}
1986+
return {};
1987+
}
1988+
1989+
// note: <value> could be either string ether array of strings with offsets inbetween . Inside array
1990+
// 0 offset could be omitted e.g. [term1, term2, 2, term3] is equal to: [term1, 0, term2, 2, term3]
19111991
// PHRASE(<attribute>, <value> [, <offset>, <value>, ...] [, <analyzer>])
1912-
// PHRASE(<attribute>, '[' <value> [, <offset>, <value>, ...] ']' [,
1913-
// <analyzer>])
1992+
// PHRASE(<attribute>, '[' <value> [, <offset>, <value>, ...] ']' [,<analyzer>])
19141993
arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const& ctx,
19151994
FilterContext const& filterCtx, arangodb::aql::AstNode const& args) {
19161995
if (!args.isDeterministic()) {
@@ -1940,7 +2019,7 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
19402019
ctx, argc, "PHRASE");
19412020

19422021
if (!analyzerPool._pool) {
1943-
return {TRI_ERROR_INTERNAL};
2022+
return {TRI_ERROR_BAD_PARAMETER};
19442023
}
19452024
}
19462025

@@ -1958,70 +2037,15 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
19582037
}
19592038

19602039
// ...........................................................................
1961-
// 2nd argument defines a value
2040+
// 2nd argument and later defines a values
19622041
// ...........................................................................
1963-
1964-
auto const* valueArg = args.getMemberUnchecked(1);
1965-
1966-
if (!valueArg) {
1967-
auto message = "'PHRASE' AQL function: 2nd argument is invalid";
1968-
LOG_TOPIC("c3aec", WARN, arangodb::iresearch::TOPIC) << message;
1969-
return {TRI_ERROR_BAD_PARAMETER, message};
1970-
}
1971-
19722042
auto* valueArgs = &args;
19732043
size_t valueArgsBegin = 1;
19742044
size_t valueArgsEnd = argc;
19752045

1976-
if (valueArg->isArray()) {
1977-
valueArgs = valueArg;
1978-
valueArgsBegin = 0;
1979-
valueArgsEnd = valueArg->numMembers();
1980-
1981-
if (0 == (valueArgsEnd & 1)) {
1982-
auto message = "'PHRASE' AQL function: 2nd argument has an invalid number of members (must be an odd number)";
1983-
LOG_TOPIC("05c0c", WARN, arangodb::iresearch::TOPIC) << message;
1984-
return {TRI_ERROR_BAD_PARAMETER, message};
1985-
}
1986-
1987-
valueArg = valueArgs->getMemberUnchecked(valueArgsBegin);
1988-
1989-
if (!valueArg) {
1990-
std::stringstream ss;;
1991-
ss << valueArg;
1992-
auto message = "'PHRASE' AQL function: 2nd argument has an invalid member at offset: "s + ss.str();
1993-
LOG_TOPIC("892bc", WARN, arangodb::iresearch::TOPIC) << message;
1994-
return {TRI_ERROR_BAD_PARAMETER, message};
1995-
}
1996-
}
1997-
1998-
irs::string_ref value;
1999-
ScopedAqlValue inputValue(*valueArg);
2000-
2001-
if (filter || inputValue.isConstant()) {
2002-
if (!inputValue.execute(ctx)) {
2003-
auto message = "'PHRASE' AQL function: Failed to evaluate 2nd argument";
2004-
LOG_TOPIC("14a81", WARN, arangodb::iresearch::TOPIC) << message;
2005-
return {TRI_ERROR_BAD_PARAMETER, message};
2006-
}
2007-
2008-
if (arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != inputValue.type()) {
2009-
auto message = "'PHRASE' AQL function: 2nd argument has invalid type '"s +
2010-
ScopedAqlValue::typeString(inputValue.type()).c_str() + "' (string expected)";
2011-
LOG_TOPIC("a91b6", WARN, arangodb::iresearch::TOPIC) << message;
2012-
return {TRI_ERROR_BAD_PARAMETER, message};
2013-
}
2014-
2015-
if (!inputValue.getString(value)) {
2016-
auto message = "'PHRASE' AQL function: Unable to parse 2nd argument as string";
2017-
LOG_TOPIC("b546d", WARN, arangodb::iresearch::TOPIC) << message;
2018-
return {TRI_ERROR_BAD_PARAMETER, message};
2019-
}
2020-
}
2021-
20222046
irs::by_phrase* phrase = nullptr;
20232047
irs::analysis::analyzer::ptr analyzer;
2024-
2048+
// prepare filter if execution phase
20252049
if (filter) {
20262050
std::string name;
20272051

@@ -2032,7 +2056,7 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
20322056
}
20332057

20342058
TRI_ASSERT(analyzerPool._pool);
2035-
analyzer = analyzerPool._pool->get(); // get analyzer from pool
2059+
analyzer = analyzerPool._pool->get();
20362060

20372061
if (!analyzer) {
20382062
auto message = "'PHRASE' AQL function: Unable to instantiate analyzer '"s + analyzerPool._pool->name() + "'";
@@ -2045,63 +2069,10 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
20452069
phrase = &filter->add<irs::by_phrase>();
20462070
phrase->field(std::move(name));
20472071
phrase->boost(filterCtx.boost);
2048-
2049-
TRI_ASSERT(analyzer);
2050-
appendTerms(*phrase, value, *analyzer, 0);
2051-
}
2052-
2053-
decltype(fieldArg) offsetArg = nullptr;
2054-
size_t offset = 0;
2055-
2056-
for (size_t idx = valueArgsBegin + 1, end = valueArgsEnd; idx < end; idx += 2) {
2057-
offsetArg = valueArgs->getMemberUnchecked(idx);
2058-
2059-
if (!offsetArg) {
2060-
auto message = "'PHRASE' AQL function: Unable to parse argument on position "s + std::to_string(idx) + " as an offset"s;
2061-
LOG_TOPIC("44bed", WARN, arangodb::iresearch::TOPIC) << message;
2062-
return {TRI_ERROR_BAD_PARAMETER, message};
2063-
}
2064-
2065-
valueArg = valueArgs->getMemberUnchecked(idx + 1);
2066-
2067-
if (!valueArg) {
2068-
auto message = "'PHRASE' AQL function: Unable to parse argument on position " + std::to_string(idx + 1) + " as a value";
2069-
LOG_TOPIC("ac06b", WARN, arangodb::iresearch::TOPIC) << message;
2070-
return {TRI_ERROR_BAD_PARAMETER, message};
2071-
}
2072-
2073-
ScopedAqlValue offsetValue(*offsetArg);
2074-
2075-
if (filter || offsetValue.isConstant()) {
2076-
if (!offsetValue.execute(ctx) ||
2077-
arangodb::iresearch::SCOPED_VALUE_TYPE_DOUBLE != offsetValue.type()) {
2078-
auto message = "'PHRASE' AQL function: Unable to parse argument on position " + std::to_string(idx) + " as an offset";
2079-
LOG_TOPIC("d819d", WARN, arangodb::iresearch::TOPIC) << message;
2080-
return {TRI_ERROR_BAD_PARAMETER, message};
2081-
}
2082-
2083-
offset = static_cast<uint64_t>(offsetValue.getInt64());
2084-
}
2085-
2086-
ScopedAqlValue inputValue(*valueArg);
2087-
2088-
if (filter || inputValue.isConstant()) {
2089-
if (!inputValue.execute(ctx) ||
2090-
arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != inputValue.type() ||
2091-
!inputValue.getString(value)) {
2092-
auto message = "'PHRASE' AQL function: Unable to parse argument on position " + std::to_string(idx + 1) + " as a value";
2093-
LOG_TOPIC("39e12", WARN, arangodb::iresearch::TOPIC) << message;
2094-
return {TRI_ERROR_BAD_PARAMETER, message};
2095-
}
2096-
}
2097-
2098-
if (phrase) {
2099-
TRI_ASSERT(analyzer);
2100-
appendTerms(*phrase, value, *analyzer, offset);
2101-
}
21022072
}
2103-
2104-
return { }; //ok;
2073+
// on top level we require explicit offsets - to be backward compatible and be able to distinguish last argument as analyzer or value
2074+
// Also we allow recursion inside array to support older syntax (one array arg) and add ability to pass several arrays as args
2075+
return processPhraseArgs(phrase, ctx, filterCtx, *valueArgs, valueArgsBegin, valueArgsEnd, analyzer, 0, false, true);
21052076
}
21062077

21072078
// STARTS_WITH(<attribute>, <prefix>, [<scoring-limit>])

0 commit comments

Comments
 (0)
0