8000 ArangoSearch: Upgrade to Snowball 2 (additional stemmer languages) by Simran-B · Pull Request #10973 · arangodb/arangodb · GitHub
[go: up one dir, main page]

Skip to content

ArangoSearch: Upgrade to Snowball 2 (additional stemmer languages) #10973

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 62 additions & 6 deletions 3rdParty/iresearch.build/external/snowball/libstemmer/modules.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
/* /home/user/git-root/arangodb-iresearch/build/qtcreator/Debug/3rdParty/iresearch/external/snowball/libstemmer/modules.h: List of stemming modules.
/* /work/ArangoDB/build/3rdParty/iresearch/external/snowball/libstemmer/modules.h: List of stemming modules.
*
* This file is generated by mkmodules.pl from a list of module names.
* Do not edit manually.
*
* Modules included by this file are: arabic, danish, dutch, english, finnish,
* french, german, hungarian, irish, italian, norwegian, porter, portuguese,
* romanian, russian, spanish, swedish, tamil, turkish
* Modules included by this file are: arabic, basque, catalan, danish, dutch,
* english, finnish, french, german, greek, hindi, hungarian, indonesian,
* irish, italian, lithuanian, nepali, norwegian, porter, portuguese,
* romanian, russian, serbian, spanish, swedish, tamil, turkish
*/

#include "../libstemmer/stem_UTF_8_arabic.h"
#include "../libstemmer/stem_ISO_8859_1_basque.h"
#include "../libstemmer/stem_UTF_8_basque.h"
#include "../libstemmer/stem_ISO_8859_1_catalan.h"
#include "../libstemmer/stem_UTF_8_catalan.h"
#include "../libstemmer/stem_ISO_8859_1_danish.h"
#include "../libstemmer/stem_UTF_8_danish.h"
#include "../libstemmer/stem_ISO_8859_1_dutch.h"
Expand All @@ -21,12 +26,18 @@
#include "../libstemmer/stem_UTF_8_french.h"
#include "../libstemmer/stem_ISO_8859_1_german.h"
#include "../libstemmer/stem_UTF_8_german.h"
#include "../libstemmer/stem_UTF_8_greek.h"
#include "../libstemmer/stem_UTF_8_hindi.h"
#include "../libstemmer/stem_ISO_8859_2_hungarian.h"
#include "../libstemmer/stem_UTF_8_hungarian.h"
#include "../libstemmer/stem_ISO_8859_1_indonesian.h"
#include "../libstemmer/stem_UTF_8_indonesian.h"
#include "../libstemmer/stem_ISO_8859_1_irish.h"
#include "../libstemmer/stem_UTF_8_irish.h"
#include "../libstemmer/stem_ISO_8859_1_italian.h"
#include "../libstemmer/stem_UTF_8_italian.h"
#include "../libstemmer/stem_UTF_8_lithuanian.h"
#include "../libstemmer/stem_UTF_8_nepali.h"
#include "../libstemmer/stem_ISO_8859_1_norwegian.h"
#include "../libstemmer/stem_UTF_8_norwegian.h"
#include "../libstemmer/stem_ISO_8859_1_porter.h"
Expand All @@ -37,6 +48,7 @@
#include "../libstemmer/stem_UTF_8_romanian.h"
#include "../libstemmer/stem_KOI8_R_russian.h"
#include "../libstemmer/stem_UTF_8_russian.h"
#include "../libstemmer/stem_UTF_8_serbian.h"
#include "../libstemmer/stem_ISO_8859_1_spanish.h"
#include "../libstemmer/stem_UTF_8_spanish.h"
#include "../libstemmer/stem_ISO_8859_1_swedish.h"
Expand All @@ -56,7 +68,7 @@ struct stemmer_encoding {
const char * name;
stemmer_encoding_t enc;
};
static struct stemmer_encoding encodings[] = {
static const struct stemmer_encoding encodings[] = {
{"ISO_8859_1", ENC_ISO_8859_1},
{"ISO_8859_2", ENC_ISO_8859_2},
{"KOI8_R", ENC_KOI8_R},
Expand All @@ -71,10 +83,20 @@ struct stemmer_modules {
void (*close)(struct SN_env *);
int (*stem)(struct SN_env *);
};
static struct stemmer_modules modules[] = {
static const struct stemmer_modules modules[] = {
{"ar", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem},
{"ara", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem},
{"arabic", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem},
{"baq", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
{"baq", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
{"basque", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
{"basque", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
{"ca", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
{"ca", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"cat", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
{"cat", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"catalan", ENC_ISO_8859_1, catalan_ISO_8859_1_create_env, catalan_ISO_8859_1_close_env, catalan_ISO_8859_1_stem},
{"catalan", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
{"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
{"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
Expand All @@ -89,6 +111,8 @@ static struct stemmer_modules modules[] = {
{"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
{"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
{"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
{"el", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
{"ell", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
{"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
{"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
{"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
Expand All @@ -99,6 +123,10 @@ static struct stemmer_modules modules[] = {
{"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"eu", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
{"eu", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
{"eus", ENC_ISO_8859_1, basque_ISO_8859_1_create_env, basque_ISO_8859_1_close_env, basque_ISO_8859_1_stem},
{"eus", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem},
{"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
{"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
{"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
Expand All @@ -121,12 +149,23 @@ static struct stemmer_modules modules[] = {
{"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
{"gle", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
{"gle", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
{"gre", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
{"greek", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem},
{"hi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
{"hin", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
{"hindi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem},
{"hu", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"hun", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"hungarian", ENC_ISO_8859_2, hungarian_ISO_8859_2_create_env, hungarian_ISO_8859_2_close_env, hungarian_ISO_8859_2_stem},
{"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
{"id", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
{"id", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
{"ind", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
{"ind", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
{"indonesian", ENC_ISO_8859_1, indonesian_ISO_8859_1_create_env, indonesian_ISO_8859_1_close_env, indonesian_ISO_8859_1_stem},
{"indonesian", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem},
{"irish", ENC_ISO_8859_1, irish_ISO_8859_1_create_env, irish_ISO_8859_1_close_env, irish_ISO_8859_1_stem},
{"irish", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem},
{"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
Expand All @@ -135,6 +174,12 @@ static struct stemmer_modules modules[] = {
{"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
{"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
{"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
{"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
{"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
{"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem},
{"ne", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
{"nep", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
{"nepali", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem},
{"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
{"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
{"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
Expand Down Expand Up @@ -167,10 +212,13 @@ static struct stemmer_modules modules[] = {
{"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
{"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
{"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
{"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
{"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem},
{"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
{"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
{"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
Expand All @@ -187,20 +235,28 @@ static struct stemmer_modules modules[] = {
};
static const char * algorithm_names[] = {
"arabic",
"basque",
"catalan",
"danish",
"dutch",
"english",
"finnish",
"french",
"german",
"greek",
"hindi",
"hungarian",
"indonesian",
"irish",
"italian",
"lithuanian",
"nepali",
"norwegian",
"porter",
"portuguese",
"romanian",
"russian",
"serbian",
"spanish",
"swedish",
"tamil",
Expand Down
65 changes: 26 additions & 39 deletions 3rdParty/iresearch/external/snowball/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ PROJECT(snowball C)

cmake_minimum_required(VERSION 2.8)

SET(SNOWBALL_VERSION_MAJOR 1)
SET(SNOWBALL_VERSION_MAJOR 2)
SET(SNOWBALL_VERSION_MINOR 0)
SET(SNOWBALL_VERSION_PATCH 0)

Expand Down Expand Up @@ -42,7 +42,7 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
ENDIF(ENABLE_OPTIMIZATION MATCHES "ON")
ELSE()
set(SUN_BUILD32 "-m32")
set(SUN_BUILD64 "-m64")
set(SUN_BUILD64 "-m64")
ENDIF()
IF (BUILD_CPU_MODE STREQUAL "32")
message (STATUS "Building 32-bit mode on Solaris")
Expand Down Expand Up @@ -141,19 +141,24 @@ ENDIF(ENABLE_STATIC MATCHES "ON")

# End of configuration
SET(LIBSTEM_ALGORITHMS
arabic
danish dutch english finnish french german hungarian
irish italian
norwegian porter portuguese romanian
russian spanish swedish tamil turkish
arabic basque catalan danish dutch english
finnish french german hungarian indonesian
irish italian norwegian porter portuguese
romanian russian spanish swedish tamil turkish
)
SET(KOI8_ALGORITHMS russian)
SET(ISO_8859_1_ALGORITHMS
danish dutch english finnish french german irish
italian norwegian porter portuguese spanish swedish
basque catalan danish dutch english finnish
french german indonesian irish italian
norwegian porter portuguese spanish swedish
)
SET(ISO_8859_2_ALGORITHMS
hungarian romanian
)
SET(OTHER_ALGORITHMS
german2 greek hindi kraaij_pohlmann
lithuanian lovins nepali serbian
)
SET(ISO_8859_2_ALGORITHMS hungarian romanian)
SET(OTHER_ALGORITHMS german2 kraaij_pohlmann lovins)
SET(ALL_ALGORITHMS ${LIBSTEM_ALGORITHMS} ${OTHER_ALGORITHMS})

SET(COMPILER_SOURCES
Expand All @@ -162,9 +167,11 @@ SET(COMPILER_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/compiler/analyser.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/driver.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_csharp.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_go.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_java.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_jsx.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_js.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_pascal.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_python.c
${CMAKE_CURRENT_SOURCE_DIR}/compiler/generator_rust.c
)
Expand All @@ -175,11 +182,6 @@ SET(SNOWBALL_RUNTIME
${CMAKE_CURRENT_SOURCE_DIR}/runtime/api.c
${CMAKE_CURRENT_SOURCE_DIR}/runtime/utilities.c
)
SET(LIBSTEMMER_SOURCES libstemmer/libstemmer.c)
SET(LIBSTEMMER_UTF8_SOURCES libstemmer/libstemmer_utf8.c)
#LIBSTEMMER_UTF8_SOURCES = libstemmer/libstemmer_utf8.c
#LIBSTEMMER_HEADERS = include/libstemmer.h libstemmer/modules.h libstemmer/modules_utf8.h
#LIBSTEMMER_EXTRA = libstemmer/modules.txt libstemmer/modules_utf8.txt libstemmer/libstemmer_c.in

SET(STEMWORDS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/examples/stemwords.c)
SET(MODULES_H "modules.h")
Expand All @@ -188,40 +190,24 @@ CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/libstemmer_c.in ${CMAKE_CU
MACRO(gen_stem IN ENCODING)
FOREACH(_it ${IN})
SET(_base "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/stem_${ENCODING}_${_it}")
SET(_header "${_base}.h")
SET(_source "${_base}.c")
STRING(REPLACE "UTF_8" "Unicode" _in_enc "${ENCODING}")
SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_${_in_enc}.sbl")
IF(${_in_enc} STREQUAL "Unicode" AND NOT EXISTS ${_input})
ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND $<TARGET_FILE:snowball> "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}/stem_ISO_8859_1.sbl" -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source})

ELSE()
IF(EXISTS "${_input}")
ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND $<TARGET_FILE:snowball> ${_input} -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source})
ENDIF()
ENDIF()
SET(_input "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/${_it}.sbl")
ADD_CUSTOM_COMMAND(OUTPUT ${_source}
COMMAND $<TARGET_FILE:snowball> ${_input} -o ${_base} -eprefix ${_it}_${ENCODING}_ -r ${CMAKE_CURRENT_SOURCE_DIR}/runtime -u
DEPENDS snowball)
LIST(APPEND STEMMER_SOURCES ${_source})
ENDFOREACH()
ENDMACRO()

INCLUDE_DIRECTORIES("include")
INCLUDE_DIRECTORIES("${CMAKE_CURRENT_BINARY_DIR}/libstemmer")

# NOTE: modules.h gets overwritten by static file from iresearch.build folder
ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/libstemmer
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h "libstemmer" ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc.mak
)

ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules_utf8.h
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/libstemmer
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/mkmodules.pl ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h "libstemmer" ${CMAKE_CURRENT_SOURCE_DIR}/libstemmer/modules_utf8.txt ${CMAKE_CURRENT_BINARY_DIR}/libstemmer/mkinc_utf8.mak utf8
)

ADD_CUSTOM_TARGET(modules DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/modules.h")

SET(STEMMER_SOURCES "${CMAKE_CURRENT_BINARY_DIR}/libstemmer/libstemmer.c")
Expand All @@ -233,6 +219,7 @@ gen_stem("${LIBSTEM_ALGORITHMS}" "UTF_8")
gen_stem("${KOI8_ALGORITHMS}" "KOI8_R")
gen_stem("${ISO_8859_1_ALGORITHMS}" "ISO_8859_1")
gen_stem("${ISO_8859_2_ALGORITHMS}" "ISO_8859_2")
gen_stem("${OTHER_ALGORITHMS}" "UTF_8")

INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_SOURCE_DIR}/libstemmer
Expand Down
22 changes: 22 additions & 0 deletions 3rdParty/snowball/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
*.o
/algorithms.mk
/libstemmer/libstemmer.c
/libstemmer/libstemmer_utf8.c
/libstemmer/mkinc.mak
/libstemmer/mkinc_utf8.mak
/libstemmer/modules.h
/libstemmer/modules_utf8.h
/snowball
/src_c
/stemwords
/dist
/java/org/tartarus/snowball/ext/
/js_out
/python_check
/python_out
*.generated.cs
/rust/Cargo.lock
/rust/src/snowball/algorithms/*.rs
/rust/target/
/go/algorithms/
/go/stemwords/algorithms.go
Loading
0