8000 Add json(b)_to_tsvector function · postgrespro/postgres_cluster@1c1791e · GitHub
[go: up one dir, main page]

Skip to content

Commit 1c1791e

Browse files
committed
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert it to tsvector for full text search. Current to_tsvector(json(b)) suggests to convert only string values, but it's possible to index keys, numerics and even booleans value. To solve that json(b)_to_tsvector has a second required argument contained a list of desired types of json fields. Second argument is a jsonb scalar or array right now with possibility to add new options in a future. Bump catalog version Author: Dmitry Dolgov with some editorization by me Reviewed by: Teodor Sigaev Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
1 parent 529ab7b commit 1c1791e

File tree

10 files changed

+629
-49
lines changed
  • utils
  • test/regress
  • 10 files changed

    +629
    -49
    lines changed

    doc/src/sgml/func.sgml

    Lines changed: 20 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -9727,6 +9727,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
    97279727
    <entry><literal>to_tsvector('english', '{"a": "The Fat Rats"}'::json)</literal></entry>
    97289728
    <entry><literal>'fat':2 'rat':3</literal></entry>
    97299729
    </row>
    9730+
    <row>
    9731+
    <entry>
    9732+
    <literal><function>json(b)_to_tsvector(<optional> <replaceable class="parameter">config</replaceable> <type>regconfig</type>,
    9733+
    </optional> <replaceable class="parameter">document</replaceable> <type>json(b)</type>,
    9734+
    <replaceable class="parameter">filter</replaceable> <type>json(b)</type>)</function></literal>
    9735+
    </entry>
    9736+
    <entry><type>tsvector</type></entry>
    9737+
    <entry>
    9738+
    reduce each value in the document, specified by <replaceable class="parameter">filter</replaceable> to a <type>tsvector</type>,
    9739+
    and then concatenate those in document order to produce a single <type>tsvector</type>.
    9740+
    <replaceable class="parameter">filter</replaceable> is a jsonb array, that enumerates what kind of elements need to be included
    9741+
    into the resulting <type>tsvector</type>. Possible values for <replaceable class="parameter">filter</replaceable> are
    9742+
    <literal>"string"</literal> (to include all string values), <literal>"numeric"</literal> (to include all numeric values in the string format),
    9743+
    <literal>"boolean"</literal> (to include all boolean values in the string format "true"/"false"),
    9744+
    <literal>"key"</literal> (to include all keys) or <literal>"all"</literal> (to include all above). These values
    9745+
    can be combined together to include, e.g. all string and numeric values.
    9746+
    </entry>
    9747+
    <entry><literal>json_to_tsvector('english', '{"a": "The Fat Rats", "b": 123}'::json, '["string", "numeric"]')</literal></entry>
    9748+
    <entry><literal>'123':5 'fat':2 'rat':3</literal></entry>
    9749+
    </row>
    97309750
    <row>
    97319751
    <entry>
    97329752
    <indexterm>

    src/backend/tsearch/to_tsany.c

    Lines changed: 108 additions & 20 deletions
    Original file line numberDiff line numberDiff line change
    @@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
    267267
    PointerGetDatum(in)));
    268268
    }
    269269

    270-
    Datum
    271-
    jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
    270+
    /*
    271+
    * Worker function for jsonb(_string)_to_tsvector(_byid)
    272+
    */
    273+
    static TSVector
    274+
    jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
    272275
    {
    273-
    Oid cfgId = PG_GETARG_OID(0);
    274-
    Jsonb *jb = PG_GETARG_JSONB_P(1);
    275-
    TSVector result;
    276276
    TSVectorBuildState state;
    277277
    ParsedText prs;
    278278

    @@ -281,33 +281,77 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
    281281
    state.prs = &prs;
    282282
    state.cfgId = cfgId;
    283283

    284-
    iterate_jsonb_string_values(jb, &state, add_to_tsvector);
    284+
    iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
    285285

    286-
    PG_FREE_IF_COPY(jb, 1);
    286+
    return make_tsvector(&prs);
    287+
    }
    288+
    289+
    Datum
    290+
    jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
    291+
    {
    292+
    Oid cfgId = PG_GETARG_OID(0);
    293+
    Jsonb *jb = PG_GETARG_JSONB_P(1);
    294+
    TSVector result;
    287295

    288-
    result = make_tsvector(&prs);
    296+
    result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
    297+
    PG_FREE_IF_COPY(jb, 1);
    289298

    290299
    PG_RETURN_TSVECTOR(result);
    291300
    }
    292301

    293302
    Datum
    294-
    jsonb_to_tsvector(PG_FUNCTION_ARGS)
    303+
    jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
    295304
    {
    296305
    Jsonb *jb = PG_GETARG_JSONB_P(0);
    297306
    Oid cfgId;
    307+
    TSVector result;
    298308

    299309
    cfgId = getTSCurrentConfig(true);
    300-
    PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid< A3E2 /span>,
    301-
    ObjectIdGetDatum(cfgId),
    302-
    JsonbPGetDatum(jb)));
    310+
    result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
    311+
    PG_FREE_IF_COPY(jb, 0);
    312+
    313+
    PG_RETURN_TSVECTOR(result);
    303314
    }
    304315

    305316
    Datum
    306-
    json_to_tsvector_byid(PG_FUNCTION_ARGS)
    317+
    jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
    307318
    {
    308319
    Oid cfgId = PG_GETARG_OID(0);
    309-
    text *json = PG_GETARG_TEXT_P(1);
    320+
    Jsonb *jb = PG_GETARG_JSONB_P(1);
    321+
    Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
    322+
    TSVector result;
    323+
    uint32 flags = parse_jsonb_index_flags(jbFlags);
    324+
    325+
    result = jsonb_to_tsvector_worker(cfgId, jb, flags);
    326+
    PG_FREE_IF_COPY(jb, 1);
    327+
    PG_FREE_IF_COPY(jbFlags, 2);
    328+
    329+
    PG_RETURN_TSVECTOR(result);
    330+
    }
    331+
    332+
    Datum
    333+
    jsonb_to_tsvector(PG_FUNCTION_ARGS)
    334+
    {< F438 /div>
    335+
    Jsonb *jb = PG_GETARG_JSONB_P(0);
    336+
    Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
    337+
    Oid cfgId;
    310338
    TSVector result;
    339+
    uint32 flags = parse_jsonb_index_flags(jbFlags);
    340+
    341+
    cfgId = getTSCurrentConfig(true);
    342+
    result = jsonb_to_tsvector_worker(cfgId, jb, flags);
    343+
    PG_FREE_IF_COPY(jb, 0);
    344+
    PG_FREE_IF_COPY(jbFlags, 1);
    345+
    346+
    PG_RETURN_TSVECTOR(result);
    347+
    }
    348+
    349+
    /*
    350+
    * Worker function for json(_string)_to_tsvector(_byid)
    351+
    */
    352+
    static TSVector
    353+
    json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
    354+
    {
    311355
    TSVectorBuildState state;
    312356
    ParsedText prs;
    313357

    @@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
    316360
    state.prs = &prs;
    317361
    state.cfgId = cfgId;
    318362

    319-
    iterate_json_string_values(json, &state, add_to_tsvector);
    363+
    iterate_json_values(json, flags, &state, add_to_tsvector);
    364+
    365+
    return make_tsvector(&prs);
    366+
    }
    367+
    368+
    Datum
    369+
    json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
    370+
    {
    371+
    Oid cfgId = PG_GETARG_OID(0);
    372+
    text *json = PG_GETARG_TEXT_P(1);
    373+
    TSVector result;
    320374

    375+
    result = json_to_tsvector_worker(cfgId, json, jtiString);
    321376
    PG_FREE_IF_COPY(json, 1);
    322377

    323-
    result = make_tsvector(&prs);
    378+
    PG_RETURN_TSVECTOR(result);
    379+
    }
    380+
    381+
    Datum
    382+
    json_string_to_tsvector(PG_FUNCTION_ARGS)
    383+
    {
    384+
    text *json = PG_GETARG_TEXT_P(0);
    385+
    Oid cfgId;
    386+
    TSVector result;
    387+
    388+
    cfgId = getTSCurrentConfig(true);
    389+
    result = json_to_tsvector_worker(cfgId, json, jtiString);
    390+
    PG_FREE_IF_COPY(json, 0);
    391+
    392+
    PG_RETURN_TSVECTOR(result);
    393+
    }
    394+
    395+
    Datum
    396+
    json_to_tsvector_byid(PG_FUNCTION_ARGS)
    397+
    {
    398+
    Oid cfgId = PG_GETARG_OID(0);
    399+
    text *json = PG_GETARG_TEXT_P(1);
    400+
    Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
    401+
    TSVector result;
    402+
    uint32 flags = parse_jsonb_index_flags(jbFlags);
    403+
    404+
    result = json_to_tsvector_worker(cfgId, json, flags);
    405+
    PG_FREE_IF_COPY(json, 1);
    406+
    PG_FREE_IF_COPY(jbFlags, 2);
    324407

    325408
    PG_RETURN_TSVECTOR(result);
    326409
    }
    @@ -329,12 +412,17 @@ Datum
    329412
    json_to_tsvector(PG_FUNCTION_ARGS)
    330413
    {
    331414
    text *json = PG_GETARG_TEXT_P(0);
    415+
    Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
    332416
    Oid cfgId;
    417+
    TSVector result;
    418+
    uint32 flags = parse_jsonb_index_flags(jbFlags);
    333419

    334420
    cfgId = getTSCurrentConfig(true);
    335-
    PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
    336-
    ObjectIdGetDatum(cfgId),
    337-
    PointerGetDatum(json)));
    421+
    result = json_to_tsvector_worker(cfgId, json, flags);
    422+
    PG_FREE_IF_COPY(json, 0);
    423+
    PG_FREE_IF_COPY(jbFlags, 1);
    424+
    425+
    PG_RETURN_TSVECTOR(result);
    338426
    }
    339427

    340428
    /*
    @@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
    353441
    * First time through: initialize words array to a reasonable size.
    354442
    * (parsetext() will realloc it bigger as needed.)
    355443
    */
    356-
    prs->lenwords = Max(elem_len / 6, 64);
    444+
    prs->lenwords = 16;
    357445
    prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
    358446
    prs->curwords = 0;
    359447
    prs->pos = 0;

    0 commit comments

    Comments
     (0)
    0