|
31 | 31 | all_schemas = {}
|
32 | 32 |
|
33 | 33 | all_schemas["customer"] = [
|
34 |
| - ("C_CUSTKEY", pyarrow.int32()), |
| 34 | + ("C_CUSTKEY", pyarrow.int64()), |
35 | 35 | ("C_NAME", pyarrow.string()),
|
36 | 36 | ("C_ADDRESS", pyarrow.string()),
|
37 |
| - ("C_NATIONKEY", pyarrow.int32()), |
| 37 | + ("C_NATIONKEY", pyarrow.int64()), |
38 | 38 | ("C_PHONE", pyarrow.string()),
|
39 | 39 | ("C_ACCTBAL", pyarrow.decimal128(15, 2)),
|
40 | 40 | ("C_MKTSEGMENT", pyarrow.string()),
|
41 | 41 | ("C_COMMENT", pyarrow.string()),
|
42 | 42 | ]
|
43 | 43 |
|
44 | 44 | all_schemas["lineitem"] = [
|
45 |
| - ("L_ORDERKEY", pyarrow.int32()), |
46 |
| - ("L_PARTKEY", pyarrow.int32()), |
47 |
| - ("L_SUPPKEY", pyarrow.int32()), |
| 45 | + ("L_ORDERKEY", pyarrow.int64()), |
| 46 | + ("L_PARTKEY", pyarrow.int64()), |
| 47 | + ("L_SUPPKEY", pyarrow.int64()), |
48 | 48 | ("L_LINENUMBER", pyarrow.int32()),
|
49 | 49 | ("L_QUANTITY", pyarrow.decimal128(15, 2)),
|
50 | 50 | ("L_EXTENDEDPRICE", pyarrow.decimal128(15, 2)),
|
|
61 | 61 | ]
|
62 | 62 |
|
63 | 63 | all_schemas["nation"] = [
|
64 |
| - ("N_NATIONKEY", pyarrow.int32()), |
| 64 | + ("N_NATIONKEY", pyarrow.int64()), |
65 | 65 | ("N_NAME", pyarrow.string()),
|
66 |
| - ("N_REGIONKEY", pyarrow.int32()), |
| 66 | + ("N_REGIONKEY", pyarrow.int64()), |
67 | 67 | ("N_COMMENT", pyarrow.string()),
|
68 | 68 | ]
|
69 | 69 |
|
70 | 70 | all_schemas["orders"] = [
|
71 |
| - ("O_ORDERKEY", pyarrow.int32()), |
72 |
| - ("O_CUSTKEY", pyarrow.int32()), |
| 71 | + ("O_ORDERKEY", pyarrow.int64()), |
| 72 | + ("O_CUSTKEY", pyarrow.int64()), |
73 | 73 | ("O_ORDERSTATUS", pyarrow.string()),
|
74 | 74 | ("O_TOTALPRICE", pyarrow.decimal128(15, 2)),
|
75 | 75 | ("O_ORDERDATE", pyarrow.date32()),
|
|
80 | 80 | ]
|
81 | 81 |
|
82 | 82 | all_schemas["part"] = [
|
83 |
| - ("P_PARTKEY", pyarrow.int32()), |
| 83 | + ("P_PARTKEY", pyarrow.int64()), |
84 | 84 | ("P_NAME", pyarrow.string()),
|
85 | 85 | ("P_MFGR", pyarrow.string()),
|
86 | 86 | ("P_BRAND", pyarrow.string()),
|
|
92 | 92 | ]
|
93 | 93 |
|
94 | 94 | all_schemas["partsupp"] = [
|
95 |
| - ("PS_PARTKEY", pyarrow.int32()), |
96 |
| - ("PS_SUPPKEY", pyarrow.int32()), |
| 95 | + ("PS_PARTKEY", pyarrow.int64()), |
| 96 | + ("PS_SUPPKEY", pyarrow.int64()), |
97 | 97 | ("PS_AVAILQTY", pyarrow.int32()),
|
98 | 98 | ("PS_SUPPLYCOST", pyarrow.decimal128(15, 2)),
|
99 | 99 | ("PS_COMMENT", pyarrow.string()),
|
100 | 100 | ]
|
101 | 101 |
|
102 | 102 | all_schemas["region"] = [
|
103 |
| - ("r_REGIONKEY", pyarrow.int32()), |
| 103 | + ("r_REGIONKEY", pyarrow.int64()), |
104 | 104 | ("r_NAME", pyarrow.string()),
|
105 | 105 | ("r_COMMENT", pyarrow.string()),
|
106 | 106 | ]
|
107 | 107 |
|
108 | 108 | all_schemas["supplier"] = [
|
109 |
| - ("S_SUPPKEY", pyarrow.int32()), |
| 109 | + ("S_SUPPKEY", pyarrow.int64()), |
110 | 110 | ("S_NAME", pyarrow.string()),
|
111 | 111 | ("S_ADDRESS", pyarrow.string()),
|
112 | 112 | ("S_NATIONKEY", pyarrow.int32()),
|
|
125 | 125 | # in to handle the trailing | in the file
|
126 | 126 | output_cols = [r[0] for r in curr_schema]
|
127 | 127 |
|
| 128 | + curr_schema = [ pyarrow.field(r[0], r[1], nullable=False) for r in curr_schema] |
| 129 | + |
128 | 130 | # Trailing | requires extra field for in processing
|
129 | 131 | curr_schema.append(("some_null", pyarrow.null()))
|
130 | 132 |
|
|
0 commit comments