8000 [SEARCH-442, BTS-1169] Make GeoJson parsing faster (#17862) · hicder/arangodb@79a4679 · GitHub
[go: up one dir, main page]

Skip to content

Commit 79a4679

Browse files
authored
[SEARCH-442, BTS-1169] Make GeoJson parsing faster (arangodb#17862)
* Make Geo faster: * Make GeoJson parsing faster * Make ShapeContainer intersects/contains faster * Remove unnecessary functions * Use FlatHashSet for deduplication uint64_t in query * Use GeoJson parsing without validation for IResearch GeoFilter * Fix centroid compute for S2LatLngRect in ShapeContainer * Fix GEO_CONTAINS(S2Polygon, S2LatLngRect) * Fix S2LatLng -> S2Point works only for valid S2LatLng * Something else? * Fix tests * Fix for empty polygons and multi polygons Maybe we want to allow empty loops in general? We can just skip such loops * If and return/throw error if S2LatLng not normalized * Allow to specify not normalized lat lon: Before this commit it was really strange. In some cases we produce error to user. In other ignore and works incorrect. In third makes normalize and works correct. I think always produce error is better, but in such case we need way to update old user data. It's impossible now, so I think better to always normalize So we change behaviour: In some queries user couldn't specify not normalized points. It was error/invalid behavior, now they can and it will be valid * Commit to satisfy stupid cppcheck which cannot understand it's all cases * Fixes * Fix compilation with libc++ * Fix macOS * Apply review suggestions Rename to old names Minimize diff * Update iresearch (arangodb#17863) * Fix format * Fix by review suggestions * Fix by review suggestions
1 parent 2dbdd04 commit 79a4679

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2217
-2504
lines changed

3rdParty/s2geometry/master/src/s2/s2polygon.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ class S2Polygon final : public S2Region {
701701
S2Polygon* Clone() const override;
702702
S2Cap GetCapBound() const override; // Cap surrounding rect bound.
703703
S2LatLngRect GetRectBound() const override { return bound_; }
704+
S2LatLngRect GetSubRegionBound() const { return subregion_bound_; }
704705
void GetCellUnionBound(std::vector<S2CellId> *cell_ids) const override;
705706

706707
bool Contains(const S2Cell& cell) const override;

arangod/Aql/Functions.cpp

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
#include <date/iso_week.h>
9898
#include <date/tz.h>
9999
#include <s2/s2loop.h>
100+
#include <s2/s2polygon.h>
100101

101102
#include <unicode/schriter.h>
102103
#include <unicode/stsearch.h>
@@ -1200,7 +1201,8 @@ AqlValue geoContainsIntersect(ExpressionContext* expressionContext,
12001201

12011202
AqlValueMaterializer mat1(vopts);
12021203
geo::ShapeContainer outer, inner;
1203-
Result res = geo::geojson::parseRegion(mat1.slice(p1, true), outer, false);
1204+
auto res = geo::json::parseRegion(mat1.slice(p1, true), outer,
1205+
/*legacy=*/false);
12041206
if (res.fail()) {
12051207
registerWarning(expressionContext, func, res);
12061208
return AqlValue(AqlValueHintNull());
@@ -1215,13 +1217,12 @@ AqlValue geoContainsIntersect(ExpressionContext* expressionContext,
12151217
}
12161218

12171219
AqlValueMaterializer mat2(vopts);
1218-
if (p2.isArray() && p2.length() >= 2) {
1219-
res = inner.parseCoordinates(mat2.slice(p2, true), /*geoJson*/ true);
1220-
} else if (p2.isObject()) {
1221-
res = geo::geojson::parseRegion(mat2.slice(p2, true), inner, false);
1220+
if (p2.isArray()) {
1221+
res = geo::json::parseCoordinates<true>(mat2.slice(p2, true), inner,
1222+
/*geoJson=*/true);
12221223
} else {
1223-
res.reset(TRI_ERROR_BAD_PARAMETER,
1224-
"Second arg requires coordinate pair or GeoJSON");
1224+
res = geo::json::parseRegion(mat2.slice(p2, true), inner,
1225+
/*legacy=*/false);
12251226
}
12261227
if (res.fail()) {
12271228
registerWarning(expressionContext, func, res);
@@ -1230,7 +1231,7 @@ AqlValue geoContainsIntersect(ExpressionContext* expressionContext,
12301231

12311232
bool result;
12321233
try {
1233-
result = contains ? outer.contains(&inner) : outer.intersects(&inner);
1234+
result = contains ? outer.contains(inner) : outer.intersects(inner);
12341235
return AqlValue(AqlValueHintBool(result));
12351236
} catch (basics::Exception const& ex) {
12361237
res.reset(ex.code(), ex.what());
@@ -1339,13 +1340,12 @@ Result parseShape(ExpressionContext* exprCtx, AqlValue const& value,
13391340
auto* vopts = &exprCtx->trx().vpackOptions();
13401341
AqlValueMaterializer mat(vopts);
13411342

1342-
if (value.isArray() && value.length() >= 2) {
1343-
return shape.parseCoordinates(mat.slice(value, true), /*geoJson*/ true);
1344-
} else if (value.isObject()) {
1345-
return geo::geojson::parseRegion(mat.slice(value, true), shape, false);
1346-
} else {
1347-
return {TRI_ERROR_BAD_PARAMETER, "Requires coordinate pair or GeoJSON"};
1343+
if (value.isArray()) {
1344+
return geo::json::parseCoordinates<true>(mat.slice(value, true), shape,
1345+
/*geoJson=*/true);
13481346
}
1347+
return geo::json::parseRegion(mat.slice(value, true), shape,
1348+
/*legacy=*/false);
13491349
}
13501350

13511351
} // namespace
@@ -6016,12 +6016,12 @@ AqlValue functions::GeoDistance(ExpressionContext* exprCtx, AstNode const&,
60166016
return AqlValue(AqlValueHintNull());
60176017
}
60186018

6019-
if (parameters.size() > 2 && parameters[2].isString()) {
6020-
VPackValueLength len;
6021-
const char* ptr = parameters[2].slice().getStringUnchecked(len);
6022-
geo::Ellipsoid const& e = geo::utils::ellipsoidFromString(ptr, len);
6023-
return ::numberValue(shape1.distanceFromCentroid(shape2.centroid(), e),
6024-
true);
6019+
if (parameters.size() > 2) {
6020+
if (auto slice = parameters[2].slice(); slice.isString()) {
6021+
auto const& e = geo::utils::ellipsoidFromString(slice.stringView());
6022+
return ::numberValue(shape1.distanceFromCentroid(shape2.centroid(), e),
6023+
true);
6024+
}
60256025
}
60266026
return ::numberValue(shape1.distanceFromCentroid(shape2.centroid()), true);
60276027
}
@@ -6107,10 +6107,8 @@ AqlValue functions::GeoInRange(ExpressionContext* ctx, AstNode const& node,
61076107

61086108
if (argc > 6) {
61096109
auto const& value = extractFunctionParameterValue(args, 6);
6110-
if (value.isString()) {
6111-
VPackValueLength len;
6112-
char const* ptr = value.slice().getStringUnchecked(len);
6113-
ellipsoid = &geo::utils::ellipsoidFromString(ptr, len);
6110+
if (auto slice = value.slice(); slice.isString()) {
6111+
ellipsoid = &geo::utils::ellipsoidFromString(slice.stringView());
61146112
}
61156113
}
61166114
}
@@ -6163,8 +6161,10 @@ AqlValue functions::GeoEquals(ExpressionContext* expressionContext,
61636161
AqlValueMaterializer mat2(vopts);
61646162

61656163
geo::ShapeContainer first, second;
6166-
Result res1 = geo::geojson::parseRegion(mat1.slice(p1, true), first, false);
6167-
Result res2 = geo::geojson::parseRegion(mat2.slice(p2, true), second, false);
6164+
auto res1 = geo::json::parseRegion(mat1.slice(p1, true), first,
6165+
/*legacy=*/false);
6166+
auto res2 = geo::json::parseRegion(mat2.slice(p2, true), second,
6167+
/*legacy=*/false);
61686168

61696169
if (res1.fail()) {
61706170
registerWarning(expressionContext, "GEO_EQUALS", res1);
@@ -6175,7 +6175,7 @@ AqlValue functions::GeoEquals(ExpressionContext* expressionContext,
61756175
return AqlValue(AqlValueHintNull());
61766176
}
61776177

6178-
bool result = first.equals(&second);
6178+
bool result = first.equals(second);
61796179
return AqlValue(AqlValueHintBool(result));
61806180
}
61816181

@@ -6191,18 +6191,17 @@ AqlValue functions::GeoArea(ExpressionContext* expressionContext,
61916191
AqlValueMaterializer mat(vopts);
61926192

61936193
geo::ShapeContainer shape;
6194-
Result res = geo::geojson::parseRegion(mat.slice(p1, true), shape, false);
6194+
auto res = geo::json::parseRegion(mat.slice(p1, true), shape,
6195+
/*legacy=*/false);
61956196

61966197
if (res.fail()) {
61976198
registerWarning(expressionContext, "GEO_AREA", res);
61986199
return AqlValue(AqlValueHintNull());
61996200
}
62006201

62016202
auto detEllipsoid = [](AqlValue const& p) {
6202-
if (p.isString()) {
6203-
VPackValueLength len;
6204-
const char* ptr = p.slice().getStringUnchecked(len);
6205-
return geo::utils::ellipsoidFromString(ptr, len);
6203+
if (auto slice = p.slice(); slice.isString()) {
6204+
return geo::utils::ellipsoidFromString(slice.stringView());
62066205
}
62076206
return geo::SPHERE;
62086207
};
@@ -6259,13 +6258,13 @@ AqlValue functions::IsInPolygon(ExpressionContext* expressionContext,
62596258

62606259
S2Loop loop;
62616260
loop.set_s2debug_override(S2Debug::DISABLE);
6262-
Result res = geo::geojson::parseLoop(coords.slice(), geoJson, loop);
6261+
auto res = geo::json::parseLoop(coords.slice(), loop, geoJson);
62636262
if (res.fail() || !loop.IsValid()) {
62646263
registerWarning(expressionContext, "IS_IN_POLYGON", res);
62656264
return AqlValue(AqlValueHintNull());
62666265
}
62676266

6268-
S2LatLng latLng = S2LatLng::FromDegrees(latitude, longitude);
6267+
S2LatLng latLng = S2LatLng::FromDegrees(latitude, longitude).Normalized();
62696268
return AqlValue(AqlValueHintBool(loop.Contains(latLng.ToPoint())));
62706269
}
62716270

@@ -6421,8 +6420,8 @@ AqlValue functions::GeoPolygon(ExpressionContext* expressionContext,
64216420
builder->close(); // object
64226421

64236422
// Now actually parse the result with S2:
6424-
geo::ShapeContainer container;
6425-
res = geo::geojson::parsePolygon(builder->slice(), container, false);
6423+
S2Polygon polygon;
6424+
res = geo::json::parsePolygon(builder->slice(), polygon);
64266425
if (res.fail()) {
64276426
registerWarning(expressionContext, "GEO_POLYGON", res);
64286427
return AqlValue(AqlValueHintNull());
@@ -6504,9 +6503,8 @@ AqlValue functions::GeoMultiPolygon(ExpressionContext* expressionContext,
65046503
builder->close();
65056504

65066505
// Now actually parse the result with S2:
6507-
geo::ShapeContainer container;
6508-
Result res =
6509-
geo::geojson::parseMultiPolygon(builder->slice(), container, false);
6506+
S2Polygon polygon;
6507+
auto res = geo::json::parseMultiPolygon(builder->slice(), polygon);
65106508
if (res.fail()) {
65116509
registerWarning(expressionContext, "GEO_MULTIPOLYGON", res);
65126510
return AqlValue(AqlValueHintNull());
@@ -7374,7 +7372,7 @@ std::optional<T> bitOperationValue(VPackSlice input) {
73747372
return {};
73757373
}
73767374

7377-
AqlValue handleBitOperation(
7375+
static AqlValue handleBitOperation(
73787376
ExpressionContext* expressionContext, AstNode const& node,
73797377
VPackFunctionParametersView parameters,
73807378
std::function<uint64_t(uint64_t, uint64_t)> const& cb) {

arangod/GeoIndex/Covering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ std::vector<geo::Interval> CoveringUtils::intervals() {
6262
TRI_ASSERT(!isDone());
6363

6464
std::vector<geo::Interval> intervals;
65-
std::vector<S2CellId> cover = _params.filterShape.covering(&_coverer);
65+
auto cover = _params.filterShape.covering(_coverer);
6666
geo::utils::scanIntervals(_params, cover, intervals);
6767
_allIntervalsCovered = true;
6868
return intervals;

arangod/GeoIndex/Covering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
#include <deque>
2727
#include <vector>
28-
#include <unordered_set>
2928

3029
#include <s2/s2cap.h>
3130
#include <s2/s2cell_id.h>
@@ -35,6 +34,7 @@
3534
#include "Geo/GeoParams.h"
3635
#include "Geo/Utils.h"
3736
#include "VocBase/Identifiers/LocalDocumentId.h"
37+
#include "Containers/FlatHashSet.h"
3838

3939
namespace arangodb::geo_index {
4040

@@ -112,7 +112,7 @@ class CoveringUtils {
112112
GeoDocumentsQueue _buffer;
113113

114114
// deduplication filter
115-
std::unordered_set<uint64_t> _seenDocs;
115+
containers::FlatHashSet<uint64_t> _seenDocs;
116116

117117
/// Coverer instance to use
118118
S2RegionCoverer _coverer;

arangod/GeoIndex/Index.cpp

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -85,19 +85,19 @@ Index::Index(VPackSlice const& info,
8585
}
8686

8787
/// @brief Parse document and return cells for indexing
88-
Result Index::indexCells(VPackSlice const& doc, std::vector<S2CellId>& cells,
88+
Result Index::indexCells(velocypack::Slice doc, std::vector<S2CellId>& cells,
8989
S2Point& centroid) const {
9090
if (_variant == Variant::GEOJSON) {
9191
VPackSlice loc = doc.get(_location);
9292
if (loc.isArray()) {
93-
return geo::utils::indexCellsLatLng(loc, /*geojson*/ true, cells,
93+
return geo::utils::indexCellsLatLng(loc, /*geoJson=*/true, cells,
9494
centroid);
9595
}
9696
geo::ShapeContainer shape;
97-
Result r = geo::geojson::parseRegion(loc, shape, _legacyPolygons);
97+
Result r = geo::json::parseRegion(loc, shape, _legacyPolygons);
9898
if (r.ok()) {
9999
S2RegionCoverer coverer(_coverParams.regionCovererOpts());
100-
cells = shape.covering(&coverer);
100+
cells = shape.covering(coverer);
101101
centroid = shape.centroid();
102102
if (!S2LatLng(centroid).is_valid()) {
103103
return TRI_ERROR_BAD_PARAMETER;
@@ -115,50 +115,45 @@ Result Index::indexCells(VPackSlice const& doc, std::vector<S2CellId>& cells,
115115
}
116116
return r;
117117
} else if (_variant == Variant::COMBINED_LAT_LON) {
118-
VPackSlice loc = doc.get(_location);
119-
return geo::utils::indexCellsLatLng(loc, /*geojson*/ false, cells,
118+
auto loc = doc.get(_location);
119+
return geo::utils::indexCellsLatLng(loc, /*geoJson=*/false, cells,
120120
centroid);
121121
} else if (_variant == Variant::INDIVIDUAL_LAT_LON) {
122-
VPackSlice lat = doc.get(_latitude);
123-
VPackSlice lon = doc.get(_longitude);
122+
auto lat = doc.get(_latitude);
123+
auto lon = doc.get(_longitude);
124124
if (!lat.isNumber() || !lon.isNumber()) {
125125
return TRI_ERROR_BAD_PARAMETER;
126126
}
127-
S2LatLng ll = S2LatLng::FromDegrees(lat.getNumericValue<double>(),
128-
lon.getNumericValue<double>());
129-
if (!ll.is_valid()) {
130-
LOG_TOPIC("8173c", DEBUG, arangodb::Logger::FIXME)
131-
<< "illegal geo-coordinates, ignoring entry";
132-
return TRI_ERROR_BAD_PARAMETER;
133-
}
127+
S2LatLng ll =
128+
S2LatLng::FromDegrees(lat.getNumber<double>(), lon.getNumber<double>())
129+
.Normalized();
134130
centroid = ll.ToPoint();
135131
cells.emplace_back(centroid);
136132
return TRI_ERROR_NO_ERROR;
137133
}
138134
return TRI_ERROR_INTERNAL;
139135
}
140136

141-
Result Index::shape(velocypack::Slice const& doc,
142-
geo::ShapeContainer& shape) const {
137+
Result Index::shape(velocypack::Slice doc, geo::ShapeContainer& shape) const {
143138
if (_variant == Variant::GEOJSON) {
144-
VPackSlice loc = doc.get(_location);
145-
if (loc.isArray() && loc.length() >= 2) {
146-
return shape.parseCoordinates(loc, /*geoJson*/ true);
147-
} else if (loc.isObject()) {
148-
return geo::geojson::parseRegion(loc, shape, _legacyPolygons);
139+
auto loc = doc.get(_location);
140+
if (loc.isArray()) {
141+
return geo::json::parseCoordinates<true>(loc, shape, /*geoJson=*/true);
149142
}
150-
return TRI_ERROR_BAD_PARAMETER;
143+
return geo::json::parseRegion(loc, shape, _legacyPolygons);
151144
} else if (_variant == Variant::COMBINED_LAT_LON) {
152-
VPackSlice loc = doc.get(_location);
153-
return shape.parseCoordinates(loc, /*geoJson*/ false);
145+
auto loc = doc.get(_location);
146+
return geo::json::parseCoordinates<true>(loc, shape, /*geoJson=*/false);
154147
} else if (_variant == Variant::INDIVIDUAL_LAT_LON) {
155-
VPackSlice lon = doc.get(_longitude);
156-
VPackSlice lat = doc.get(_latitude);
157-
if (!lon.isNumber() || !lat.isNumber()) {
148+
auto lon = doc.get(_longitude);
149+
auto lat = doc.get(_latitude);
150+
if (!lon.isNumber<double>() || !lat.isNumber<double>()) {
158151
return TRI_ERROR_BAD_PARAMETER;
159152
}
160-
shape.resetCoordinates(lat.getNumericValue<double>(),
161-
lon.getNumericValue<double>());
153+
shape.reset(
154+
S2LatLng::FromDegrees(lat.getNumber<double>(), lon.getNumber<double>())
155+
.Normalized()
156+
.ToPoint());
162157
return TRI_ERROR_NO_ERROR;
163158
}
164159
return TRI_ERROR_INTERNAL;
@@ -186,18 +181,20 @@ S2LatLng Index::parseGeoDistance(aql::AstNode const* args,
186181

187182
if (cc->type == aql::NODE_TYPE_ARRAY) { // [lng, lat] is valid input
188183
TRI_ASSERT(cc->numMembers() == 2);
189-
return S2LatLng::FromDegrees(/*lat*/ cc->getMember(1)->getDoubleValue(),
190-
/*lon*/ cc->getMember(0)->getDoubleValue());
184+
return S2LatLng::FromDegrees(
185+
/*lat_degrees=*/cc->getMember(1)->getDoubleValue(),
186+
/*lng_degrees=*/cc->getMember(0)->getDoubleValue())
187+
.Normalized();
191188
} else {
192189
VPackBuilder jsonB;
193190
cc->toVelocyPackValue(jsonB);
194191
VPackSlice json = jsonB.slice();
195192
geo::ShapeContainer shape;
196193
Result res;
197-
if (json.isArray() && json.length() >= 2) {
198-
res = shape.parseCoordinates(json, /*GeoJson*/ true);
194+
if (json.isArray()) {
195+
res = geo::json::parseCoordinates<true>(json, shape, /*geoJson=*/true);
199196
} else {
200-
res = geo::geojson::parseRegion(json, shape, legacy);
197+
res = geo::json::parseRegion(json, shape, legacy);
201198
}
202199
if (res.fail()) {
203200
THROW_ARANGO_EXCEPTION(res);
@@ -252,8 +249,7 @@ void Index::handleNode(aql::AstNode const* node, aql::Variable const* ref,
252249
// arrays can't occur only handle real GeoJSON
253250
VPackBuilder bb;
254251
geoJson->toVelocyPackValue(bb);
255-
Result res =
256-
geo::geojson::parseRegion(bb.slice(), qp.filterShape, legacy);
252+
auto res = geo::json::parseRegion(bb.slice(), qp.filterShape, legacy);
257253
if (res.fail()) {
258254
THROW_ARANGO_EXCEPTION(res);
259255
}

arangod/GeoIndex/Index.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ class Slice;
4242
}
4343

4444
namespace geo {
45-
struct Coordinate;
4645
struct QueryParams;
4746
class ShapeContainer;
4847
} // namespace geo
@@ -71,10 +70,10 @@ struct Index {
7170

7271
public:
7372
/// @brief Parse document and return cells for indexing
74-
Result indexCells(velocypack::Slice const& doc, std::vector<S2CellId>& cells,
73+
Result indexCells(velocypack::Slice doc, std::vector<S2CellId>& cells,
7574
S2Point& centroid) const;
7675

77-
Result shape(velocypack::Slice const& doc, geo::ShapeContainer& shape) const;
76+
Result shape(velocypack::Slice doc, geo::ShapeContainer& shape) const;
7877

7978
/// @brief Parse AQL condition into query parameters
8079
/// Public to allow usage by legacy geo indexes

0 commit comments

Comments
 (0)
0