@@ -1908,9 +1908,88 @@ arangodb::Result fromFuncMinMatch(irs::boolean_filter* filter, QueryContext cons
1908
1908
return {};
1909
1909
}
1910
1910
1911
+
1912
+ arangodb::Result processPhraseArgs (
1913
+ irs::by_phrase* phrase, QueryContext const & ctx,
1914
+ FilterContext const & filterCtx, arangodb::aql::AstNode const & valueArgs,
1915
+ size_t valueArgsBegin, size_t valueArgsEnd, irs::analysis::analyzer::ptr& analyzer,
1916
+ size_t offset, bool allowDefaultOffset, bool allowRecursion) {
1917
+ irs::string_ref value;
1918
+ bool expectingOffset = false ;
1919
+ for (size_t idx = valueArgsBegin; idx < valueArgsEnd; ++idx) {
1920
+ auto currentArg = valueArgs.getMemberUnchecked (idx);
1921
+ if (!currentArg) {
1922
+ auto message = " 'PHRASE' AQL function: Unable to parse argument on position " s + std::to_string (idx);
1923
+ LOG_TOPIC (" 44bed" , WARN, arangodb::iresearch::TOPIC) << message;
1924
+ return { TRI_ERROR_BAD_PARAMETER, message };
1925
+ }
1926
+ if (currentArg->isArray () && (!expectingOffset || allowDefaultOffset)) {
1927
+ // array arg is processed with possible default 0 offsets - to be easily compatible with TOKENS function
1928
+ // No array recursion allowed. This could be allowed, but just looks tangled.
1929
+ // Anyone interested coud use FLATTEN to explicitly require processing all recurring arrays as one array
1930
+ if (allowRecursion) {
1931
+ auto subRes = processPhraseArgs (phrase, ctx, filterCtx, *currentArg, 0 , currentArg->numMembers (), analyzer, offset, true , false );
1932
+ if (subRes.fail ()) {
1933
+ return subRes;
1934
+ }
1935
+ expectingOffset = true ;
1936
+ offset = 0 ;
1937
+ continue ;
1938
+ } else {
1939
+ auto message = " 'PHRASE' AQL function: recursive arrays not allowed at position " s + std::to_string (idx);
1940
+ LOG_TOPIC (" 66c24" , WARN, arangodb::iresearch::TOPIC) << message;
1941
+ return { TRI_ERROR_BAD_PARAMETER, message };
1942
+ }
1943
+ }
1944
+ ScopedAqlValue currentValue (*currentArg);
1945
+ if (phrase || currentValue.isConstant ()) {
1946
+ if (!currentValue.execute (ctx)) {
1947
+ auto message = " 'PHRASE' AQL function: Unable to parse argument on position " + std::to_string (idx);
1948
+ LOG_TOPIC (" d819d" , WARN, arangodb::iresearch::TOPIC) << message;
1949
+ return { TRI_ERROR_BAD_PARAMETER, message };
1950
+ }
1951
+ if (arangodb::iresearch::SCOPED_VALUE_TYPE_DOUBLE == currentValue.type () && expectingOffset) {
1952
+ offset = static_cast <uint64_t >(currentValue.getInt64 ());
1953
+ expectingOffset = false ;
1954
+ continue ; // got offset let`s go search for value
1955
+ } else if ( (arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != currentValue.type () || !currentValue.getString (value)) || // value is not a string at all
1956
+ expectingOffset && !allowDefaultOffset) { // offset is expected mandatory but got value
1957
+ std::string expectedValue;
1958
+ if (expectingOffset && allowDefaultOffset) {
1959
+ expectedValue = " as a value or offset" ;
1960
+ } else if (expectingOffset) {
1961
+ expectedValue = " as an offset" ;
1962
+ } else {
1963
+ expectedValue = " as a value" ;
1964
+ }
1965
+ auto message = " 'PHRASE' AQL function: Unable to parse argument on position " + std::to_string (idx) + expectedValue;
1966
+ LOG_TOPIC (" ac06b" , WARN, arangodb::iresearch::TOPIC) << message;
1967
+ return { TRI_ERROR_BAD_PARAMETER, message };
1968
+ }
1969
+ } else {
1970
+ // in case of non const node encountered while parsing we can not decide if current and following args are correct before execution
1971
+ // so at this stage we say all is ok
1972
+ return {};
1973
+ }
1974
+ if (phrase) {
1975
+ TRI_ASSERT (analyzer);
1976
+ appendTerms (*phrase, value, *analyzer, offset);
1977
+ }
1978
+ offset = 0 ;
1979
+ expectingOffset = true ;
1980
+ }
1981
+ if (!expectingOffset) { // that means last arg is numeric - this is error as no term to apply offset to
1982
+ auto message = " 'PHRASE' AQL function : Unable to parse argument on position " + std::to_string (valueArgsEnd - 1 ) + " as a value" s;
1983
+ LOG_TOPIC (" 5fafe" , WARN, arangodb::iresearch::TOPIC) << message;
1984
+ return { TRI_ERROR_BAD_PARAMETER, message };
1985
+ }
1986
+ return {};
1987
+ }
1988
+
1989
+ // note: <value> could be either string ether array of strings with offsets inbetween . Inside array
1990
+ // 0 offset could be omitted e.g. [term1, term2, 2, term3] is equal to: [term1, 0, term2, 2, term3]
1911
1991
// PHRASE(<attribute>, <value> [, <offset>, <value>, ...] [, <analyzer>])
1912
- // PHRASE(<attribute>, '[' <value> [, <offset>, <value>, ...] ']' [,
1913
- // <analyzer>])
1992
+ // PHRASE(<attribute>, '[' <value> [, <offset>, <value>, ...] ']' [,<analyzer>])
1914
1993
arangodb::Result fromFuncPhrase (irs::boolean_filter* filter, QueryContext const & ctx,
1915
1994
FilterContext const & filterCtx, arangodb::aql::AstNode const & args) {
1916
1995
if (!args.isDeterministic ()) {
@@ -1940,7 +2019,7 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
1940
2019
ctx, argc, " PHRASE" );
1941
2020
1942
2021
if (!analyzerPool._pool ) {
1943
- return {TRI_ERROR_INTERNAL };
2022
+ return {TRI_ERROR_BAD_PARAMETER };
1944
2023
}
1945
2024
}
1946
2025
@@ -1958,70 +2037,15 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
1958
2037
}
1959
2038
1960
2039
// ...........................................................................
1961
- // 2nd argument defines a value
2040
+ // 2nd argument and later defines a values
1962
2041
// ...........................................................................
1963
-
1964
- auto const * valueArg = args.getMemberUnchecked (1 );
1965
-
1966
- if (!valueArg) {
1967
- auto message = " 'PHRASE' AQL function: 2nd argument is invalid" ;
1968
- LOG_TOPIC (" c3aec" , WARN, arangodb::iresearch::TOPIC) << message;
1969
- return {TRI_ERROR_BAD_PARAMETER, message};
1970
- }
1971
-
1972
2042
auto * valueArgs = &args;
1973
2043
size_t valueArgsBegin = 1 ;
1974
2044
size_t valueArgsEnd = argc;
1975
2045
1976
- if (valueArg->isArray ()) {
1977
- valueArgs = valueArg;
1978
- valueArgsBegin = 0 ;
1979
- valueArgsEnd = valueArg->numMembers ();
1980
-
1981
- if (0 == (valueArgsEnd & 1 )) {
1982
- auto message = " 'PHRASE' AQL function: 2nd argument has an invalid number of members (must be an odd number)" ;
1983
- LOG_TOPIC (" 05c0c" , WARN, arangodb::iresearch::TOPIC) << message;
1984
- return {TRI_ERROR_BAD_PARAMETER, message};
1985
- }
1986
-
1987
- valueArg = valueArgs->getMemberUnchecked (valueArgsBegin);
1988
-
1989
- if (!valueArg) {
1990
- std::stringstream ss;;
1991
- ss << valueArg;
1992
- auto message = " 'PHRASE' AQL function: 2nd argument has an invalid member at offset: " s + ss.str ();
1993
- LOG_TOPIC (" 892bc" , WARN, arangodb::iresearch::TOPIC) << message;
1994
- return {TRI_ERROR_BAD_PARAMETER, message};
1995
- }
1996
- }
1997
-
1998
- irs::string_ref value;
1999
- ScopedAqlValue inputValue (*valueArg);
2000
-
2001
- if (filter || inputValue.isConstant ()) {
2002
- if (!inputValue.execute (ctx)) {
2003
- auto message = " 'PHRASE' AQL function: Failed to evaluate 2nd argument" ;
2004
- LOG_TOPIC (" 14a81" , WARN, arangodb::iresearch::TOPIC) << message;
2005
- return {TRI_ERROR_BAD_PARAMETER, message};
2006
- }
2007
-
2008
- if (arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != inputValue.type ()) {
2009
- auto message = " 'PHRASE' AQL function: 2nd argument has invalid type '" s +
2010
- ScopedAqlValue::typeString (inputValue.type ()).c_str () + " ' (string expected)" ;
2011
- LOG_TOPIC (" a91b6" , WARN, arangodb::iresearch::TOPIC) << message;
2012
- return {TRI_ERROR_BAD_PARAMETER, message};
2013
- }
2014
-
2015
- if (!inputValue.getString (value)) {
2016
- auto message = " 'PHRASE' AQL function: Unable to parse 2nd argument as string" ;
2017
- LOG_TOPIC (" b546d" , WARN, arangodb::iresearch::TOPIC) << message;
2018
- return {TRI_ERROR_BAD_PARAMETER, message};
2019
- }
2020
- }
2021
-
2022
2046
irs::by_phrase* phrase = nullptr ;
2023
2047
irs::analysis::analyzer::ptr analyzer;
2024
-
2048
+ // prepare filter if execution phase
2025
2049
if (filter) {
2026
2050
std::string name;
2027
2051
@@ -2032,7 +2056,7 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
2032
2056
}
2033
2057
2034
2058
TRI_ASSERT (analyzerPool._pool );
2035
- analyzer = analyzerPool._pool ->get (); // get analyzer from pool
2059
+ analyzer = analyzerPool._pool ->get ();
2036
2060
2037
2061
if (!analyzer) {
2038
2062
auto message = " 'PHRASE' AQL function: Unable to instantiate analyzer '" s + analyzerPool._pool ->name () + " '" ;
@@ -2045,63 +2069,10 @@ arangodb::Result fromFuncPhrase(irs::boolean_filter* filter, QueryContext const&
2045
2069
phrase = &filter->add <irs::by_phrase>();
2046
2070
phrase->field (std::move (name));
2047
2071
phrase->boost (filterCtx.boost );
2048
-
2049
- TRI_ASSERT (analyzer);
2050
- appendTerms (*phrase, value, *analyzer, 0 );
2051
- }
2052
-
2053
- decltype (fieldArg) offsetArg = nullptr ;
2054
- size_t offset = 0 ;
2055
-
2056
- for (size_t idx = valueArgsBegin + 1 , end = valueArgsEnd; idx < end; idx += 2 ) {
2057
- offsetArg = valueArgs->getMemberUnchecked (idx);
2058
-
2059
- if (!offsetArg) {
2060
- auto message = " 'PHRASE' AQL function: Unable to parse argument on position " s + std::to_string (idx) + " as an offset" s;
2061
- LOG_TOPIC (" 44bed" , WARN, arangodb::iresearch::TOPIC) << message;
2062
- return {TRI_ERROR_BAD_PARAMETER, message};
2063
- }
2064
-
2065
- valueArg = valueArgs->getMemberUnchecked (idx + 1 );
2066
-
2067
- if (!valueArg) {
2068
- auto message = " 'PHRASE' AQL function: Unable to parse argument on position " + std::to_string (idx + 1 ) + " as a value" ;
2069
- LOG_TOPIC (" ac06b" , WARN, arangodb::iresearch::TOPIC) << message;
2070
- return {TRI_ERROR_BAD_PARAMETER, message};
2071
- }
2072
-
2073
- ScopedAqlValue offsetValue (*offsetArg);
2074
-
2075
- if (filter || offsetValue.isConstant ()) {
2076
- if (!offsetValue.execute (ctx) ||
2077
- arangodb::iresearch::SCOPED_VALUE_TYPE_DOUBLE != offsetValue.type ()) {
2078
- auto message = " 'PHRASE' AQL function: Unable to parse argument on position " + std::to_string (idx) + " as an offset" ;
2079
- LOG_TOPIC (" d819d" , WARN, arangodb::iresearch::TOPIC) << message;
2080
- return {TRI_ERROR_BAD_PARAMETER, message};
2081
- }
2082
-
2083
- offset = static_cast <uint64_t >(offsetValue.getInt64 ());
2084
- }
2085
-
2086
- ScopedAqlValue inputValue (*valueArg);
2087
-
2088
- if (filter || inputValue.isConstant ()) {
2089
- if (!inputValue.execute (ctx) ||
2090
- arangodb::iresearch::SCOPED_VALUE_TYPE_STRING != inputValue.type () ||
2091
- !inputValue.getString (value)) {
2092
- auto message = " 'PHRASE' AQL function: Unable to parse argument on position " + std::to_string (idx + 1 ) + " as a value" ;
2093
- LOG_TOPIC (" 39e12" , WARN, arangodb::iresearch::TOPIC) << message;
2094
- return {TRI_ERROR_BAD_PARAMETER, message};
2095
- }
2096
- }
2097
-
2098
- if (phrase) {
2099
- TRI_ASSERT (analyzer);
2100
- appendTerms (*phrase, value, *analyzer, offset);
2101
- }
2102
2072
}
2103
-
2104
- return { }; // ok;
2073
+ // on top level we require explicit offsets - to be backward compatible and be able to distinguish last argument as analyzer or value
2074
+ // Also we allow recursion inside array to support older syntax (one array arg) and add ability to pass several arrays as args
2075
+ return processPhraseArgs (phrase, ctx, filterCtx, *valueArgs, valueArgsBegin, valueArgsEnd, analyzer, 0 , false , true );
2105
2076
}
2106
2077
2107
2078
// STARTS_WITH(<attribute>, <prefix>, [<scoring-limit>])
0 commit comments