8000 Fix various issues with ALTER TEXT SEARCH CONFIGURATION · postgrespro/postgres@f33e832 · GitHub
[go: up one dir, main page]

Skip to content
  • Commit f33e832

    Browse files
    committed
    Fix various issues with ALTER TEXT SEARCH CONFIGURATION
    This commit addresses a set of issues when changing token type mappings in a text search configuration when using duplicated token names: - ADD MAPPING would fail on insertion because of a constraint failure after inserting the same mapping. - ALTER MAPPING with an "overridden" configuration failed with "tuple already updated by self" when the token mappings are removed. - DROP MAPPING failed with "tuple already updated by self", like previously, but in a different code path. The code is refactored so the token names (with their numbers) are handled as a List with unique members rather than an array with numbers, ensuring that no duplicates mess up with the catalog inserts, updates and deletes. The list is generated by getTokenTypes(), with the same error handling as previously while duplicated tokens are discarded from the list used to work on the catalogs. Regression tests are expanded to cover much more ground for the cases fixed by this commit, as there was no coverage for the code touched in this commit. A bit more is done regarding the fact that a token name not supported by a configuration's parser should result in an error even if IF EXISTS is used in a DROP MAPPING clause. This is implied in the code but there was no coverage for that, and it was very easy to miss. These issues exist since at least their introduction in core with 140d4eb, so backpatch all the way down. Reported-by: Alexander Lakhin Author: Tender Wang, Michael Paquier Discussion: https://postgr.es/m/18310-1eb233c5908189c8@postgresql.org Backpatch-through: 12
    1 parent 52a9af3 commit f33e832

    File tree

    4 files changed

    +132
    -30
    lines changed

    4 files changed

    +132
    -30
    lines changed

    src/backend/commands/tsearchcmds.c

    Lines changed: 67 additions & 30 deletions
    Original file line numberDiff line numberDiff line change
    @@ -48,6 +48,12 @@
    4848
    #include "utils/rel.h"
    4949
    #include "utils/syscache.h"
    5050

    51+
    /* Single entry of List returned by getTokenTypes() */
    52+
    typedef struct
    53+
    {
    54+
    int num; /* token type number */
    55+
    char *name; /* token type name */
    56+
    } TSTokenTypeItem;
    5157

    5258
    static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    5359
    HeapTuple tup, Relation relMap);
    @@ -1192,22 +1198,45 @@ AlterTSConfiguration(AlterTSConfigurationStmt *stmt)
    11921198
    }
    11931199

    11941200
    /*
    1195-
    * Translate a list of token type names to an array of token type numbers
    1201+
    * Check whether a token type name is a member of a TSTokenTypeItem list.
    11961202
    */
    1197-
    static int *
    1203+
    static bool
    1204+
    tstoken_list_member(char *token_name, List *tokens)
    1205+
    {
    1206+
    ListCell *c;
    1207+
    bool found = false;
    1208+
    1209+
    foreach(c, tokens)
    1210+
    {
    1211+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
    1212+
    1213+
    if (strcmp(token_name, ts->name) == 0)
    1214+
    {
    1215+
    found = true;
    1216+
    break;
    1217+
    }
    1218+
    }
    1219+
    1220+
    return found;
    1221+
    }
    1222+
    1223+
    /*
    1224+
    * Translate a list of token type names to a list of unique TSTokenTypeItem.
    1225+
    *
    1226+
    * Duplicated entries list are removed from tokennames.
    1227+
    */
    1228+
    static List *
    11981229
    getTokenTypes(Oid prsId, List *tokennames)
    11991230
    {
    12001231
    TSParserCacheEntry *prs = lookup_ts_parser_cache(prsId);
    12011232
    LexDescr *list;
    1202-
    int *res,
    1203-
    i,
    1204-
    ntoken;
    1233+
    List *result = NIL;
    1234+
    int ntoken;
    12051235
    ListCell *tn;
    12061236

    12071237
    ntoken = list_length(tokennames);
    12081238
    if (ntoken == 0)
    1209-
    return NULL;
    1210-
    res = (int *) palloc(sizeof(int) * ntoken);
    1239+
    return NIL;
    12111240

    12121241
    if (!OidIsValid(prs->lextypeOid))
    12131242
    elog(ERROR, "method lextype isn't defined for text search parser %u",
    @@ -1217,19 +1246,26 @@ getTokenTypes(Oid prsId, List *tokennames)
    12171246
    list = (LexDescr *) DatumGetPointer(OidFunctionCall1(prs->lextypeOid,
    12181247
    (Datum) 0));
    12191248

    1220-
    i = 0;
    12211249
    foreach(tn, tokennames)
    12221250
    {
    12231251
    String *val = lfirst_node(String, tn);
    12241252
    bool found = false;
    12251253
    int j;
    12261254

    1255+
    /* Skip if this token is already in the result */
    1256+
    if (tstoken_list_member(strVal(val), result))
    1257+
    continue;
    1258+
    12271259
    j = 0;
    12281260
    while (list && list[j].lexid)
    12291261
    {
    12301262
    if (strcmp(strVal(val), list[j].alias) == 0)
    12311263
    {
    1232-
    res[i] = list[j].lexid;
    1264+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) palloc0(sizeof(TSTokenTypeItem));
    1265+
    1266+
    ts->num = list[j].lexid;
    1267+
    ts->name = pstrdup(strVal(val));
    1268+
    result = lappend(result, ts);
    12331269
    found = true;
    12341270
    break;
    12351271
    }
    @@ -1240,10 +1276,9 @@ getTokenTypes(Oid prsId, List *tokennames)
    12401276
    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    12411277
    errmsg("token type \"%s\" does not exist",
    12421278
    strVal(val))));
    1243-
    i++;
    12441279
    }
    12451280

    1246-
    return res;
    1281+
    return result;
    12471282
    }
    12481283

    12491284
    /*
    @@ -1261,8 +1296,8 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    12611296
    int i;
    12621297
    int j;
    12631298
    Oid prsId;
    1264-
    int *tokens,
    1265-
    ntoken;
    1299+
    List *tokens = NIL;
    1300+
    int ntoken;
    12661301
    Oid *dictIds;
    12671302
    int ndict;
    12681303
    ListCell *c;
    @@ -1273,23 +1308,25 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    12731308
    prsId = tsform->cfgparser;
    12741309

    12751310
    tokens = getTokenTypes(prsId, stmt->tokentype);
    1276-
    ntoken = list_length(stmt->tokentype);
    1311+
    ntoken = list_length(tokens);
    12771312

    12781313
    if (stmt->override)
    12791314
    {
    12801315
    /*
    12811316
    * delete maps for tokens if they exist and command was ALTER
    12821317
    */
    1283-
    for (i = 0; i < ntoken; i++)
    1318+
    foreach(c, tokens)
    12841319
    {
    1320+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
    1321+
    12851322
    ScanKeyInit(&skey[0],
    12861323
    Anum_pg_ts_config_map_mapcfg,
    12871324
    BTEqualStrategyNumber, F_OIDEQ,
    12881325
    ObjectIdGetDatum(cfgId));
    12891326
    ScanKeyInit(&skey[1],
    12901327
    Anum_pg_ts_config_map_maptokentype,
    12911328
    BTEqualStrategyNumber, F_INT4EQ,
    1292-
    Int32GetDatum(tokens[i]));
    1329+
    Int32GetDatum(ts->num));
    12931330

    12941331
    scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
    12951332
    NULL, 2, skey);
    @@ -1346,9 +1383,11 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    13461383
    {
    13471384
    bool tokmatch = false;
    13481385

    1349-
    for (j = 0; j < ntoken; j++)
    1386+
    foreach(c, tokens)
    13501387
    {
    1351-
    if (cfgmap->maptokentype == tokens[j])
    1388+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
    1389+
    1390+
    if (cfgmap->maptokentype == ts->num)
    13521391
    {
    13531392
    tokmatch = true;
    13541393
    break;
    @@ -1401,8 +1440,10 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14011440
    /*
    14021441
    * Insertion of new entries
    14031442
    */
    1404-
    for (i = 0; i < ntoken; i++)
    1443+
    foreach(c, tokens)
    14051444
    {
    1445+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
    1446+
    14061447
    for (j = 0; j < ndict; j++)
    14071448
    {
    14081449
    ExecClearTuple(slot[slotCount]);
    @@ -1411,7 +1452,7 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14111452
    slot[slotCount]->tts_tupleDescriptor->natts * sizeof(bool));
    14121453

    14131454
    slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapcfg - 1] = ObjectIdGetDatum(cfgId);
    1414-
    slot[slotCount]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(tokens[i]);
    1455+
    slot[slotCount]->tts_values[Anum_pg_ts_config_map_maptokentype - 1] = Int32GetDatum(ts->num);
    14151456
    slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapseqno - 1] = Int32GetDatum(j + 1);
    14161457
    slot[slotCount]->tts_values[Anum_pg_ts_config_map_mapdict - 1] = ObjectIdGetDatum(dictIds[j]);
    14171458

    @@ -1455,9 +1496,8 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14551496
    ScanKeyData skey[2];
    14561497
    SysScanDesc scan;
    14571498
    HeapTuple maptup;
    1458-
    int i;
    14591499
    Oid prsId;
    1460-
    int *tokens;
    1500+
    List *tokens = NIL;
    14611501
    ListCell *c;
    14621502

    14631503
    tsform = (Form_pg_ts_config) GETSTRUCT(tup);
    @@ -1466,10 +1506,9 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14661506

    14671507
    tokens = getTokenTypes(prsId, stmt->tokentype);
    14681508

    1469-
    i = 0;
    1470-
    foreach(c, stmt->tokentype)
    1509+
    foreach(c, tokens)
    14711510
    {
    1472-
    String *val = lfirst_node(String, c);
    1511+
    TSTokenTypeItem *ts = (TSTokenTypeItem *) lfirst(c);
    14731512
    bool found = false;
    14741513

    14751514
    ScanKeyInit(&skey[0],
    @@ -1479,7 +1518,7 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14791518
    ScanKeyInit(&skey[1],
    14801519
    Anum_pg_ts_config_map_maptokentype,
    14811520
    BTEqualStrategyNumber, F_INT4EQ,
    1482-
    Int32GetDatum(tokens[i]));
    1521+
    Int32GetDatum(ts->num));
    14831522

    14841523
    scan = systable_beginscan(relMap, TSConfigMapIndexId, true,
    14851524
    NULL, 2, skey);
    @@ -1499,17 +1538,15 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
    14991538
    ereport(ERROR,
    15001539
    (errcode(ERRCODE_UNDEFINED_OBJECT),
    15011540
    errmsg("mapping for token type \"%s\" does not exist",
    1502-
    strVal(val))));
    1541+
    ts->name)));
    15031542
    }
    15041543
    else
    15051544
    {
    15061545
    ereport(NOTICE,
    15071546
    (errmsg("mapping for token type \"%s\" does not exist, skipping",
    1508-
    strVal(val))));
    1547+
    ts->name)));
    15091548
    }
    15101549
    }
    1511-
    1512-
    i++;
    15131550
    }
    15141551

    15151552
    EventTriggerCollectAlterTSConfig(stmt, cfgId, NULL, 0);

    src/test/regress/expected/tsdicts.out

    Lines changed: 34 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -687,3 +687,37 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
    687687
    "AffFile" = ispell_sample
    688688
    );
    689689
    ERROR: unrecognized Ispell parameter: "DictFile"
    690+
    -- Test grammar for configurations
    691+
    CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
    692+
    -- Overriden mapping change with duplicated tokens.
    693+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    694+
    ALTER MAPPING FOR word, word WITH ispell;
    695+
    -- Not a token supported by the configuration's parser, fails.
    696+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    697+
    DROP MAPPING FOR not_a_token, not_a_token;
    698+
    ERROR: token type "not_a_token" does not exist
    699+
    -- Not a token supported by the configuration's parser, fails even
    700+
    -- with IF EXISTS.
    701+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    702+
    DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
    703+
    ERROR: token type "not_a_token" does not exist
    704+
    -- Token supported by the configuration's parser, succeeds.
    705+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    706+
    DROP MAPPING FOR word, word;
    707+
    -- No mapping for token supported by the configuration's parser, fails.
    708+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    709+
    DROP MAPPING FOR word;
    710+
    ERROR: mapping for token type "word" does not exist
    711+
    -- Token supported by the configuration's parser, cannot be found,
    712+
    -- succeeds with IF EXISTS.
    713+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    714+
    DROP MAPPING IF EXISTS FOR word, word;
    715+
    NOTICE: mapping for token type "word" does not exist, skipping
    716+
    -- Re-add mapping, with duplicated tokens supported by the parser.
    717+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    718+
    ADD MAPPING FOR word, word WITH ispell;
    719+
    -- Not a token supported by the configuration's parser, fails.
    720+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    721+
    ADD MAPPING FOR not_a_token WITH ispell;
    722+
    ERROR: token type "not_a_token" does not exist
    723+
    DROP TEXT SEARCH CONFIGURATION dummy_tst;

    src/test/regress/sql/tsdicts.sql

    Lines changed: 30 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -251,3 +251,33 @@ CREATE TEXT SEARCH DICTIONARY tsdict_case
    251251
    "DictFile" = ispell_sample,
    252252
    "AffFile" = ispell_sample
    253253
    );
    254+
    255+
    -- Test grammar for configurations
    256+
    CREATE TEXT SEARCH CONFIGURATION dummy_tst (COPY=english);
    257+
    -- Overriden mapping change with duplicated tokens.
    258+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    259+
    ALTER MAPPING FOR word, word WITH ispell;
    260+
    -- Not a token supported by the configuration's parser, fails.
    261+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    262+
    DROP MAPPING FOR not_a_token, not_a_token;
    263+
    -- Not a token supported by the configuration's parser, fails even
    264+
    -- with IF EXISTS.
    265+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    266+
    DROP MAPPING IF EXISTS FOR not_a_token, not_a_token;
    267+
    -- Token supported by the configuration's parser, succeeds.
    268+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    269+
    DROP MAPPING FOR word, word;
    270+
    -- No mapping for token supported by the configuration's parser, fails.
    271+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    272+
    DROP MAPPING FOR word;
    273+
    -- Token supported by the configuration's parser, cannot be found,
    274+
    -- succeeds with IF EXISTS.
    275+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    276+
    DROP MAPPING IF EXISTS FOR word, word;
    277+
    -- Re-add mapping, with duplicated tokens supported by the parser.
    278+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    279+
    ADD MAPPING FOR word, word WITH ispell;
    280+
    -- Not a token supported by the configuration's parser, fails.
    281+
    ALTER TEXT SEARCH CONFIGURATION dummy_tst
    282+
    ADD MAPPING FOR not_a_token WITH ispell;
    283+
    DROP TEXT SEARCH CONFIGURATION dummy_tst;

    src/tools/pgindent/typedefs.list

    Lines changed: 1 addition & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -2731,6 +2731,7 @@ TSQuerySign
    27312731
    TSReadPointer
    27322732
    TSTemplateInfo
    27332733
    TSTernaryValue
    2734+
    TSTokenTypeItem
    27342735
    TSTokenTypeStorage
    27352736
    TSVector
    27362737
    TSVectorBuildState

    0 commit comments

    Comments
     (0)
    0