@@ -980,6 +980,12 @@ Create different `segmentation` Analyzers to show the behavior of the different
980
980
981
981
< small > Introduced in : v3 .10 .0 < / small >
982
982
983
+ {% hint ' warning' % }
984
+ This feature is experimental and under active development .
985
+ The naming and interfaces may change at any time .
986
+ Execution times are not representative of the final product .
987
+ {% endhint % }
988
+
983
989
An Analyzer capable of classifying tokens in the input text .
984
990
985
991
It applies a user - provided [fastText ](https: // fasttext.cc/) word embedding model
@@ -1001,47 +1007,42 @@ The *properties* allowed for this Analyzer are an object with the following attr
1001
1007
1002
1008
Create and use a ` classification` Analyzer with a stored " cooking" classifier to classify items.
1003
1009
1004
- {% arangoshexample examplevar= " examplevar" script= " script" result= " result" % }
1005
- @startDocuBlockInline analyzerClassification
1006
- @EXAMPLE_ARANGOSH_RUN {ClassificationAnalyzerModelSetup}
1007
- var fs = require (" fs" );
1008
- var internal = require (" internal" );
1009
- try {
1010
- fs .makeDirectory (" /tmp/embeddingsModels" );
1011
- } catch (e) {
1012
- }
1013
-
1014
- var destModelPath = " /tmp/embeddingsModels/model_cooking.bin" ;
1015
- if (! fs .exists (destModelPath)) {
1016
- var sourceModelPath = fs .join (internal .pathForTesting (" common" ), " aql" , " iresearch" , " model_cooking.bin" );
1017
- try {
1018
- fs .copyFile (sourceModelPath, destModelPath);
1019
- } catch (e) {}
1020
- }
1021
- @END_EXAMPLE_ARANGOSH_RUN
1022
-
1023
- @EXAMPLE_ARANGOSH_OUTPUT {analyzerClassification}
1024
- var analyzers = require (" @arangodb/analyzers" );
1025
- var classifier_single = analyzers .save (" classifier_single" , " classification" , { " model_location" : " /tmp/embeddingsModels/model_cooking.bin" }, [" frequency" , " norm" , " position" ]);
1026
- var classifier_top_two = analyzers .save (" classifier_double" , " classification" , { " model_location" : " /tmp/embeddingsModels/model_cooking.bin" , " top_k" : 2 }, [" frequency" , " norm" , " position" ]);
1027
- | db ._query (` LET str = 'Which baking dish is best to bake a banana bread ?'
1028
- | RETURN {
1029
- | "all": TOKENS(str, 'classifier_single'),
1030
- | "double": TOKENS(str, 'classifier_double')
1031
- | }
1032
- ` );
1033
- ~ analyzers .remove (classifier_single .name );
1034
- ~ analyzers .remove (classifier_top_two .name );
1035
- @END_EXAMPLE_ARANGOSH_OUTPUT
1036
- @endDocuBlock analyzerClassification
1037
- {% endarangoshexample % }
1038
- {% include arangoshexample .html id= examplevar script= script result= result % }
1010
+ ` ` `
1011
+ arangosh> var analyzers = require("@arangodb/analyzers");
1012
+ arangosh> var classifier_single = analyzers.save("classifier_single", "classification", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin" }, ["frequency", "norm", "position"]);
1013
+ arangosh> var classifier_top_two = analyzers.save("classifier_double", "classification", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1014
+ arangosh> db._query(` LET str = ' Which baking dish is best to bake a banana bread ?'
1015
+ RETURN {
1016
+ " all" : TOKENS (str, ' classifier_single' ),
1017
+ " double" : TOKENS (str, ' classifier_double' )
1018
+ }
1019
+ ` );
1020
+ ` ` `
1039
1021
1022
+ ` ` `
1023
+ [
1024
+ {
1025
+ "all" : [
1026
+ "__label__baking"
1027
+ ],
1028
+ "double" : [
1029
+ "__label__baking",
1030
+ "__label__bananas"
1031
+ ]
1032
+ }
1033
+ ]
1034
+ ` ` `
1040
1035
1041
1036
### ` nearest_neighbors`
1042
1037
1043
1038
< small> Introduced in: v3.10 .0 < / small>
1044
1039
1040
+ {% hint ' warning' % }
1041
+ This feature is experimental and under active development.
1042
+ The naming and interfaces may change at any time.
1043
+ Execution times are not representative of the final product.
1044
+ {% endhint % }
1045
+
1045
1046
An Analyzer capable of finding nearest neighbors of tokens in the input.
1046
1047
1047
1048
It applies a user- provided [fastText](https: // fasttext.cc/) word embedding model to retrieve nearest neighbor tokens in
@@ -1060,41 +1061,34 @@ The *properties* allowed for this Analyzer are an object with the following attr
1060
1061
1061
1062
Create and use a ` nearest_neighbors` Analyzer with a stored " cooking" classifier to find similar terms.
1062
1063
1063
- {% arangoshexample examplevar= " examplevar" script= " script" result= " result" % }
1064
- @startDocuBlockInline analyzerNearestNeighbors
1065
- @EXAMPLE_ARANGOSH_RUN {NNAnalyzerModelSetup}
1066
- var fs = require (" fs" );
1067
- var internal = require (" internal" );
1068
- try {
1069
- fs .makeDirectory (" /tmp/embeddingsModels" );
1070
- } catch (e) {
1071
- }
1064
+ ` ` `
1065
+ arangosh> var analyzers = require("@arangodb/analyzers");
1066
+ arangosh> var nn_single = analyzers.save("nn_single", "nearest_neighbors", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin" }, ["frequency", "norm", "position"]);
1067
+ arangosh> var nn_top_two = analyzers.save("nn_double", "nearest_neighbors", { "model_location": "/path_to_local_fasttext_model_directory/model_cooking.bin", "top_k": 2 }, ["frequency", "norm", "position"]);
1068
+ arangosh> db._query(` LET str = ' salt, oil'
1069
+ RETURN {
1070
+ " all" : TOKENS (str, ' nn_single' ),
1071
+ " double" : TOKENS (str, ' nn_double' )
1072
+ }
1073
+ ` );
1074
+ ` ` `
1072
1075
1073
- var destModelPath = " /tmp/embeddingsModels/model_cooking.bin" ;
1074
- if (! fs .exists (destModelPath)) {
1075
- var sourceModelPath = fs .join (internal .pathForTesting (" common" ), " aql" , " iresearch" , " model_cooking.bin" );
1076
- try {
1077
- fs .copyFile (sourceModelPath, destModelPath);
1078
- } catch (e) {}
1079
- }
1080
- @END_EXAMPLE_ARANGOSH_RUN
1081
-
1082
- @EXAMPLE_ARANGOSH_OUTPUT {analyzerNearestNeighbors}
1083
- var analyzers = require (" @arangodb/analyzers" );
1084
- var nn_single = analyzers .save (" nn_single" , " nearest_neighbors" , { " model_location" : " /tmp/embeddingsModels/model_cooking.bin" }, [" frequency" , " norm" , " position" ]);
1085
- var nn_top_two = analyzers .save (" nn_double" , " nearest_neighbors" , { " model_location" : " /tmp/embeddingsModels/model_cooking.bin" , " top_k" : 2 }, [" frequency" , " norm" , " position" ]);
1086
- | db ._query (` LET str = 'salt and oil'
1087
- | RETURN {
1088
- | "all": TOKENS(str, 'nn_single'),
1089
- | "double": TOKENS(str, 'nn_double')
1090
- | }
1091
- ` );
1092
- ~ analyzers .remove (nn_single .name );
1093
- ~ analyzers .remove (nn_top_two .name );
1094
- @END_EXAMPLE_ARANGOSH_OUTPUT
1095
- @endDocuBlock analyzerNearestNeighbors
1096
- {% endarangoshexample % }
1097
- {% include arangoshexample .html id= examplevar script= script result= result % }
1076
+ ` ` `
1077
+ [
1078
+ {
1079
+ "all" : [
1080
+ "pepper",
1081
+ "olive"
1082
+ ],
1083
+ "double" : [
1084
+ "pepper",
1085
+ "table",
1086
+ "olive",
1087
+ "avocado"
1088
+ ]
1089
+ }
1090
+ ]
1091
+ ` ` `
1098
1092
1099
1093
1100
1094
### ` geojson`
0 commit comments