8000 FT Analyzers: Experimental Hint & Static Examples (#1004) · arangodb/docs@2998c23 · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Dec 13, 2023. It is now read-only.

Commit 2998c23

Browse files
authored
FT Analyzers: Experimental Hint & Static Examples (#1004)
Change `classification` and `nearest_neighbors` analyzer examples to static examples - as they are experimental. Also add a hint/warning to each of these entries
1 parent 5ce764a commit 2998c23

File tree

1 file changed

+63
-69
lines changed

1 file changed

+63
-69
lines changed

3.10/analyzers.md

Lines changed: 63 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,12 @@ Create different `segmentation` Analyzers to show the behavior of the different
980980

981981
<small>Introduced in: v3.10.0</small>
982982

983+
{% hint 'warning' %}
984+
This feature is experimental and under active development.
985+
The naming and interfaces may change at any time.
986+
Execution times are not representative of the final product.
987+
{% endhint %}
988+
983989
An Analyzer capable of classifying tokens in the input text.
984990

985991
It applies a user-provided [fastText](https://fasttext.cc/) word embedding model
@@ -1001,47 +1007,42 @@ The *properties* allowed for this Analyzer are an object with the following attr
10011007

10021008
Create and use a `classification` Analyzer with a stored "cooking" classifier to classify items.
10031009

1004-
{% arangoshexample examplevar="examplevar" script="script" result="result" %}
1005-
@startDocuBlockInline analyzerClassification
1006-
@EXAMPLE_ARANGOSH_RUN{ClassificationAnalyzerModelSetup}
1007-
var fs = require("fs");
1008-
var internal = require("internal");
1009-
try {
1010-
fs.makeDirectory("/tmp/embeddingsModels");
1011-
} catch (e) {
1012-
}
1013-
1014-
var destModelPath = "/tmp/embeddingsModels/model_cooking.bin";
1015-
if (!fs.exists(destModelPath)) {
1016-
var sourceModelPath = fs.join(internal.pathForTesting("common"), "aql", "iresearch", "model_cooking.bin");
1017-
try {
1018-
fs.copyFile(sourceModelPath, destModelPath);
1019-
} catch (e) {}
1020-
}
1021-
@END_EXAMPLE_ARANGOSH_RUN
1022-
1023-
@EXAMPLE_ARANGOSH_OUTPUT{analyzerClassification}
1024-
var analyzers = require("@arangodb/analyzers");
1025-
var classifier_single = analyzers.save("classifier_single", "classification", { "model_location": "/tmp/embeddingsModels/model_cooking.bin" }, ["frequency", "norm", "position"]);
1026-
var classifier_top_two = analyzers.save("classifier_double", "classification", { "model_location": "/tmp/embeddingsModels/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1027-
| db._query(`LET str = 'Which baking dish is best to bake a banana bread ?'
1028-
| RETURN {
1029-
| "all": TOKENS(str, 'classifier_single'),
1030-
| "double": TOKENS(str, 'classifier_double')
1031-
| }
1032-
`);
1033-
~ analyzers.remove(classifier_single.name);
1034-
~ analyzers.remove(classifier_top_two.name);
1035-
@END_EXAMPLE_ARANGOSH_OUTPUT
1036-
@endDocuBlock analyzerClassification
1037-
{% endarangoshexample %}
1038-
{% include arangoshexample.html id=examplevar script=script result=result %}
1010+
```
1011+
arangosh> var analyzers = require("@arangodb/analyzers");
1012+
arangosh> var classifier_single = analyzers.save("classifier_single", "classification", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin" }, ["frequency", "norm", "position"]);
1013+
arangosh> var classifier_top_two = analyzers.save("classifier_double", "classification", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1014+
arangosh> db._query(`LET str = 'Which baking dish is best to bake a banana bread ?'
1015+
RETURN {
1016+
"all": TOKENS(str, 'classifier_single'),
1017+
"double": TOKENS(str, 'classifier_double')
1018+
}
1019+
`);
1020+
```
10391021

1022+
```
1023+
[
1024+
{
1025+
"all" : [
1026+
"__label__baking"
1027+
],
1028+
"double" : [
1029+
"__label__baking",
1030+
"__label__bananas"
1031+
]
1032+
}
1033+
]
1034+
```
10401035

10411036
### `nearest_neighbors`
10421037

10431038
<small>Introduced in: v3.10.0</small>
10441039

1040+
{% hint 'warning' %}
1041+
This feature is experimental and under active development.
1042+
The naming and interfaces may change at any time.
1043+
Execution times are not representative of the final product.
1044+
{% endhint %}
1045+
10451046
An Analyzer capable of finding nearest neighbors of tokens in the input.
10461047

10471048
It applies a user-provided [fastText](https://fasttext.cc/) word embedding model to retrieve nearest neighbor tokens in
@@ -1060,41 +1061,34 @@ The *properties* allowed for this Analyzer are an object with the following attr
10601061

10611062
Create and use a `nearest_neighbors` Analyzer with a stored "cooking" classifier to find similar terms.
10621063

1063-
{% arangoshexample examplevar="examplevar" script="script" result="result" %}
1064-
@startDocuBlockInline analyzerNearestNeighbors
1065-
@EXAMPLE_ARANGOSH_RUN{NNAnalyzerModelSetup}
1066-
var fs = require("fs");
1067-
var internal = require("internal");
1068-
try {
1069-
fs.makeDirectory("/tmp/embeddingsModels");
1070-
} catch (e) {
1071-
}
1064+
```
1065+
arangosh> var analyzers = require("@arangodb/analyzers");
1066+
arangosh> var nn_single = analyzers.save("nn_single", "nearest_neighbors", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin" }, ["frequency", "norm", "position"]);
1067+
arangosh> var nn_top_two = analyzers.save("nn_double", "nearest_neighbors", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1068+
arangosh> db._query(`LET str = 'salt, oil'
1069+
RETURN {
1070+
"all": TOKENS(str, 'nn_single'),
1071+
"double": TOKENS(str, 'nn_double')
1072+
}
1073+
`);
1074+
```
10721075

1073-
var destModelPath = "/tmp/embeddingsModels/model_cooking.bin";
1074-
if (!fs.exists(destModelPath)) {
1075-
var sourceModelPath = fs.join(internal.pathForTesting("common"), "aql", "iresearch", "model_cooking.bin");
1076-
try {
1077-
fs.copyFile(sourceModelPath, destModelPath);
1078-
} catch (e) {}
1079-
}
1080-
@END_EXAMPLE_ARANGOSH_RUN
1081-
1082-
@EXAMPLE_ARANGOSH_OUTPUT{analyzerNearestNeighbors}
1083-
var analyzers = require("@arangodb/analyzers");
1084-
var nn_single = analyzers.save("nn_single", "nearest_neighbors", { "model_location": "/tmp/embeddingsModels/model_cooking.bin" }, ["frequency", "norm", "position"]);
1085-
var nn_top_two = analyzers.save("nn_double", "nearest_neighbors", { "model_location": "/tmp/embeddingsModels/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1086-
| db._query(`LET str = 'salt and oil'
1087-
| RETURN {
1088-
| "all": TOKENS(str, 'nn_single'),
1089-
| "double": TOKENS(str, 'nn_double')
1090-
| }
1091-
`);
1092-
~ analyzers.remove(nn_single.name);
1093-
~ analyzers.remove(nn_top_two.name);
1094-
@END_EXAMPLE_ARANGOSH_OUTPUT
1095-
@endDocuBlock analyzerNearestNeighbors
1096-
{% endarangoshexample %}
1097-
{% include arangoshexample.html id=examplevar script=script result=result %}
1076+
```
1077+
[
1078+
{
1079+
"all" : [
1080+
"pepper",
1081+
"olive"
1082+
],
1083+
"double" : [
1084+
"pepper",
1085+
"table",
1086+
"olive",
1087+
"avocado"
1088+
]
1089+
}
1090+
]
1091+
```
10981092

10991093

11001094
### `geojson`

0 commit comments

Comments
 (0)
0