8000 use sorted + unique arrays, fix rum_anyarray_consistent() · postgrespro/rum@e4247ef · GitHub
[go: up one dir, main page]

Skip to content

Commit e4247ef

Browse files
commit 10000 ted
use sorted + unique arrays, fix rum_anyarray_consistent()
1 parent f77cbce commit e4247ef

File tree

1 file changed

+67
-52
lines changed

1 file changed

+67
-52
lines changed

src/rum_arr_utils.c

Lines changed: 67 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static void sortSimpleArray(SimpleArray *s, int32 direction);
136136
static void uniqSimpleArray(SimpleArray *s, bool onlyDuplicate);
137137

138138
static int32 getNumOfIntersect(SimpleArray *sa, SimpleArray *sb);
139-
static float8 getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersect);
139+
static float8 getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersection);
140140

141141

142142

@@ -164,30 +164,26 @@ Datum
164164
rum_extract_anyarray(PG_FUNCTION_ARGS)
165165
{
166166
/* Make copy of array input to ensure it doesn't disappear while in use */
167-
ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
167+
ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
168+
SimpleArray *sa;
169+
AnyArrayTypeInfo *info;
168170

169-
Datum *entries;
170-
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
171-
bool **entries_isnull = (bool **) PG_GETARG_POINTER(2);
171+
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
172172

173-
Datum **addInfo = (Datum **) PG_GETARG_POINTER(3);
174-
bool **addInfoIsNull = (bool **) PG_GETARG_POINTER(4);
173+
Datum **addInfo = (Datum **) PG_GETARG_POINTER(3);
174+
bool **addInfoIsNull = (bool **) PG_GETARG_POINTER(4);
175175

176-
int16 elmlen;
177-
bool elmbyval;
178-
char elmalign;
179-
int i;
176+
int i;
180177

181178
CHECKARRVALID(array);
182179

183-
get_typlenbyvalalign(ARR_ELEMTYPE(array),
184-
&elmlen, &elmbyval, &elmalign);
180+
info = getAnyArrayTypeInfoCached(fcinfo, ARR_ELEMTYPE(array));
185181

186-
deconstruct_array(array,
187-
ARR_ELEMTYPE(array),
188-
elmlen, elmbyval, elmalign,
189-
&entries, entries_isnull, nentries);
182+
sa = Array2SimpleArray(info, array);
183+
sortSimpleArray(sa, 1);
184+
uniqSimpleArray(sa, false);
190185

186+
*nentries = sa->nelems;
191187
*addInfo = (Datum *) palloc(*nentries * sizeof(Datum));
192188
*addInfoIsNull = (bool *) palloc(*nentries * sizeof(bool));
193189

@@ -199,40 +195,32 @@ rum_extract_anyarray(PG_FUNCTION_ARGS)
199195
}
200196

201197
/* we should not free array, entries[i] points into it */
202-
PG_RETURN_POINTER(entries);
198+
PG_RETURN_POINTER(sa->elems);
203199
}
204200

205201
/* Enhanced version of ginqueryarrayextract() */
206202
Datum
207203
rum_extract_anyarray_query(PG_FUNCTION_ARGS)
208204
{
209205
/* Make copy of array input to ensure it doesn't disappear while in use */
210-
ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
211-
212-
Datum *entries;
213-
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
214-
bool **entries_isnull = (bool **) PG_GETARG_POINTER(5);
215-
216-
StrategyNumber strategy = PG_GETARG_UINT16(2);
217-
218-
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
219-
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
206+
ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
207+
SimpleArray *sa;
208+
AnyArrayTypeInfo *info;
220209

221-
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
210+
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
222211

223-
int16 elmlen;
224-
bool elmbyval;
225-
char elmalign;
212+
StrategyNumber strategy = PG_GETARG_UINT16(2);
213+
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
226214

227215
CHECKARRVALID(array);
228216

229-
get_typlenbyvalalign(ARR_ELEMTYPE(array),
230-
&elmlen, &elmbyval, &elmalign);
217+
info = getAnyArrayTypeInfoCached(fcinfo, ARR_ELEMTYPE(array));
218+
219+
sa = Array2SimpleArray(info, array);
220+
sortSimpleArray(sa, 1);
221+
uniqSimpleArray(sa, false);
231222

232-
deconstruct_array(array,
233-
ARR_ELEMTYPE(array),
234-
elmlen, elmbyval, elmalign,
235-
&entries, entries_isnull, nentries);
223+
*nentries = sa->nelems;
236224

237225
switch (strategy)
238226
{
@@ -268,7 +256,7 @@ rum_extract_anyarray_query(PG_FUNCTION_ARGS)
268256
}
269257

270258
/* we should not free array, elems[i] points into it */
271-
PG_RETURN_POINTER(entries);
259+
PG_RETURN_POINTER(sa->elems);
272260
}
273261

274262

@@ -373,11 +361,34 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
373361
}
374362
break;
375363
case RUM_SIMILAR_STRATEGY:
376-
/* we will need recheck */
377-
*recheck = true;
364+
/* we won't need recheck */
365+
*recheck = false;
378366

379-
/* can't do anything else useful here */
380-
res = true;
367+
{
368+
int32 intersection = 0,
369+
nentries = -1;
370+
SimpleArray sa, sb;
371+
372+
for (i = 0; i < nkeys; i++)
373+
if (check[i])
374+
intersection++;
375+
376+
for (i = 0; i < nkeys; i++)
377+
if (!addInfoIsNull[0])
378+
{
379+
nentries = DatumGetInt32(addInfo[i]);
380+
break;
381+
}
382+
383+
if (nentries >= 0)
384+
{
385+
InitDummySimpleArray(&sa, nentries);
386+
InitDummySimpleArray(&sb, nkeys);
387+
res = getSimilarity(&sa, &sb, intersection) >= SmlLimit;
388+
}
389+
else
390+
res = false;
391+
}
381392
break;
382393
default:
383394
elog(ERROR, "rum_anyarray_consistent: unknown strategy number: %d",
@@ -403,17 +414,17 @@ rum_anyarray_ordering(PG_FUNCTION_ARGS)
403414

404415
float8 dist,
405416
sml;
406-
int32 intersect = 0,
417+
int32 intersection = 0,
407418
nentries = -1;
408419
int i;
409420

410421
SimpleArray sa, sb;
411422

412423
for (i = 0; i < nkeys; i++)
413424
if (check[i])
414-
intersect++;
425+
intersection++;
415426

416-
if (intersect == 0)
427+
if (intersection == 0)
417428
PG_RETURN_FLOAT8(get_float8_infinity());
418429

419430
for (i = 0; i < nkeys; i++)
@@ -425,7 +436,7 @@ rum_anyarray_ordering(PG_FUNCTION_ARGS)
425436

426437
InitDummySimpleArray(&sa, nentries);
427438
InitDummySimpleArray(&sb, nkeys);
428-
sml = getSimilarity(&sa, &sb, intersect);
439+
sml = getSimilarity(&sa, &sb, intersection);
429440

430441
if (sml == 0.0)
431442
dist = get_float8_infinity();
@@ -842,20 +853,24 @@ getNumOfIntersect(SimpleArray *sa, SimpleArray *sb)
842853
}
843854

844855
static float8
845-
getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersect)
856+
getSimilarity(SimpleArray *sa, SimpleArray *sb, int32 intersection)
846857
{
847-
float8 result = 0.0;
858+
float8 result = 0.0;
848859

849860
switch (SmlType)
850861
{
851862
case AA_Cosine:
852-
result = ((float8)intersect) / sqrt(((float8)sa->nelems) * ((float8)sb->nelems));
863+
result = ((float8) intersection) /
864+
sqrt(((float8) sa->nelems) * ((float8) sb->nelems));
853865
break;
854866
case AA_Jaccard:
855-
result = ((float8)intersect) / (((float8)sa->nelems) + ((float8)sb->nelems) - ((double)intersect));
867+
result = ((float8) intersection) /
868+
(((float8) sa->nelems) +
869+
((float8) sb->nelems) -
870+
((float8) intersection));
856871
break;
857872
case AA_Overlap:
858-
result = intersect;
873+
result = intersection;
859874
break;
860875
default:
861876
elog(ERROR, "unknown similarity type");

0 commit comments

Comments
 (0)
0