8000 feat: add example of reading parquet from s3 (#460) · llama90/arrow-datafusion-python@9c643bf · GitHub
[go: up one dir, main page]

Skip to content

Commit 9c643bf

Browse files
authored
feat: add example of reading parquet from s3 (apache#460)
1 parent 499f045 commit 9c643bf

File tree

1 file changed

+39
-0
lines changed

1 file changed

+39
-0
lines changed

examples/sql-parquet-s3.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
import os
19+
import datafusion
20+
from datafusion.object_store import AmazonS3
21+
22+
region = "us-east-1"
23+
bucket_name = "yellow-trips"
24+
25+
s3 = AmazonS3(
26+
bucket_name=bucket_name,
27+
region=region,
28+
access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
29+
secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
30+
)
31+
32+
ctx = datafusion.SessionContext()
33+
path = f"s3://{bucket_name}/"
34+
ctx.register_object_store(path, s3)
35+
36+
ctx.register_parquet("trips", path)
37+
38+
df = ctx.sql("select count(passenger_count) from trips")
39+
df.show()

0 commit comments

Comments
 (0)
0