8000 add sqlglot benchmarks (#221) · python/pyperformance@b31928f · GitHub
[go: up one dir, main page]

Skip to content
8000

Commit b31928f

Browse files
authored
add sqlglot benchmarks (#221)
sqlglot is a pure python sql parser, transpiler, and optimizer
1 parent 2bc2727 commit b31928f

File tree

4 files changed

+187
-0
lines changed

4 files changed

+187
-0
lines changed

pyperformance/data-files/benchmarks/MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ scimark <local>
5353
spectral_norm <local>
5454
sqlalchemy_declarative <local>
5555
sqlalchemy_imperative <local>
56+
sqlglot <local>
5657
sqlite_synth <local>
5758
sympy <local>
5859
telco <local>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[project]
2+
name = "pyperformance_bm_sqlglot"
3+
requires-python = ">=3.7"
4+
dependencies = [
5+
"pyperf",
6+
"sqlglot",
7+
]
8+
urls = {repository = "https://github.com/python/pyperformance"}
9+
dynamic = ["version"]
10+
11+
[tool.pyperformance]
12+
name = "sqlglot"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sqlglot==4.6.0
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
import pyperf
2+
3+
from sqlglot import parse_one, transpile
4+
from sqlglot.optimizer import optimize, normalize
5+
6+
7+
SQL = """
8+
select
9+
supp_nation,
10+
cust_nation,
11+
l_year,
12+
sum(volume) as revenue
13+
from
14+
(
15+
select
16+
n1.n_name as supp_nation,
17+
n2.n_name as cust_nation,
18+
extract(year from l_shipdate) as l_year,
19+
l_extendedprice * (1 - l_discount) as volume
20+
from
21+
supplier,
22+
lineitem,
23+
orders,
24+
customer,
25+
nation n1,
26+
nation n2
27+
where
28+
s_suppkey = l_suppkey
29+
and o_orderkey = l_orderkey
30+
and c_custkey = o_custkey
31+
and s_nationkey = n1.n_nationkey
32+
and c_nationkey = n2.n_nationkey
33+
and (
34+
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
35+
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
36+
)
37+
and l_shipdate between date '1995-01-01' and date '1996-12-31'
38+
) as shipping
39+
group by
40+
supp_nation,
41+
cust_nation,
42+
l_year
43+
order by
44+
supp_nation,
45+
cust_nation,
46+
l_year;
47+
"""
48+
49+
TPCH_SCHEMA = {
50+
"lineitem": {
51+
"l_orderkey": "uint64",
52+
"l_partkey": "uint64",
53+
"l_suppkey": "uint64",
54+
"l_linenumber": "uint64",
55+
"l_quantity": "float64",
56+
"l_extendedprice": "float64",
57+
"l_discount": "float64",
58+
"l_tax": "float64",
59+
"l_returnflag": "string",
60+
"l_linestatus": "string",
61+
"l_shipdate": "date32",
62+
"l_commitdate": "date32",
63+
"l_receiptdate": "date32",
64+
"l_shipinstruct": "string",
65+
"l_shipmode": "string",
66+
"l_comment": "string",
67+
},
68+
"orders": {
69+
"o_orderkey": "uint64",
70+
"o_custkey": "uint64",
71+
"o_orderstatus": "string",
72+
"o_totalprice": "float64",
73+
"o_orderdate": "date32",
74+
"o_orderpriority": "string",
75+
"o_clerk": "string",
76+
"o_shippriority": "int32",
77+
"o_comment": "string",
78+
},
79+
"customer": {
80+
"c_custkey": "uint64",
81+
"c_name": "string",
82+
"c_address": "string",
83+
"c_nationkey": "uint64",
84+
"c_phone": "string",
85+
"c_acctbal": "float64",
86+
"c_mktsegment": "string",
87+
"c_comment": "string",
88+
},
89+
"part": {
90+
"p_partkey": "uint64",
91+
"p_name": "string",
92+
"p_mfgr": "string",
93+
"p_brand": "string",
94+
"p_type": "string",
95+
"p_size": "int32",
96+
"p_container": "string",
97+
"p_retailprice": "float64",
98+
"p_comment": "string",
99+
},
100+
"supplier": {
101+
"s_suppkey": "uint64",
102+
"s_name": "string",
103+
"s_address": "string",
104+
"s_nationkey": "uint64",
105+
"s_phone": "string",
106+
"s_acctbal": "float64",
107+
"s_comment": "string",
108+
},
109+
"partsupp": {
110+
"ps_partkey": "uint64",
111+
"ps_suppkey": "uint64",
112+
"ps_availqty": "int32",
113+
"ps_supplycost": "float64",
114+
"ps_comment": "string",
115+
},
116+
"nation": {
117+
"n_nationkey": "uint64",
118+
"n_name": "string",
119+
"n_regionkey": "uint64",
120+
"n_comment": "string",
121+
},
122+
"region": {
123+
"r_regionkey": "uint64",
124+
"r_name": "string",
125+
"r_comment": "string",
126+
},
127+
}
128+
129+
130+
def bench_parse(loops):
131+
elapsed = 0
132+
for _ in range(loops):
133+
t0 = pyperf.perf_counter()
134+
parse_one(SQL)
135+
elapsed += pyperf.perf_counter() - t0
136+
return elapsed
137+
138+
139+
def bench_transpile(loops):
140+
elapsed = 0
141+
for _ in range(loops):
142+
t0 = pyperf.perf_counter()
143+
transpile(SQL, write="spark")
144+
elapsed += pyperf.perf_counter() - t0
145+
return elapsed
146+
147+
148+
def bench_optimize(loops):
149+
elapsed = 0
150+
for _ in range(loops):
151+
t0 = pyperf.perf_counter()
152+
optimize(parse_one(SQL), TPCH_SCHEMA)
153+
elapsed += pyperf.perf_counter() - t0
154+
return elapsed
155+
156+
157+
def bench_normalize(loops):
158+
elapsed = 0
159+
conjunction = parse_one("(A AND B) OR (C AND D) OR (E AND F) OR (G AND H)")
160+
for _ in range(loops):
161+
t0 = pyperf.perf_counter()
162+
normalize.normalize(conjunction)
163+
elapsed += pyperf.perf_counter() - t0
164+
return elapsed
165+
166+
167+
if __name__ == "__main__":
168+
runner = pyperf.Runner()
169+
runner.metadata['description'] = "SQLGlot benchmark"
170+
runner.bench_time_func("sqlglot_parse", bench_parse)
171+
runner.bench_time_func("sqlglot_transpile", bench_transpile)
172+
runner.bench_time_func("sqlglot_optimize", bench_optimize)
173+
runner.bench_time_func("sqlglot_normalize", bench_normalize)

0 commit comments

Comments
 (0)
0