|
2 | 2 | import random
|
3 | 3 |
|
4 | 4 | import pandas as pd
|
5 |
| - |
| 5 | +import os |
6 | 6 |
|
7 | 7 | def seriesDemo():
|
8 | 8 | """This function gives demo for Pandas Series"""
|
@@ -243,4 +243,50 @@ def dataframe_merge_demo():
|
243 | 243 | print(emp_df.merge(dept_df.rename(columns={'id': 'dept_id'}), how='left'))
|
244 | 244 |
|
245 | 245 |
|
246 |
| -dataframe_remove_rows() |
| 246 | +def datframe_export_file(): |
| 247 | + emp_data = { |
| 248 | + 'emp_id': [10, 20, 30, 40, 50, 60], |
| 249 | + 'emp_name': ["Rohit", "Pooja", "Rajani", "Rushi", "Rutu", "Prithvi"], |
| 250 | + 'emp_sal': [5600, 6200, 7900, 7623.45, 5823.41, 5399.14], |
| 251 | + 'dept_id': [1, 2, 3, 1, 3, 3] |
| 252 | + } |
| 253 | + |
| 254 | + dept_data = { |
| 255 | + 'dept_id': [1, 2, 3], |
| 256 | + 'dept_name': ["IT", "Civil", "Computer Science"] |
| 257 | + } |
| 258 | + |
| 259 | + emp_df = pd.DataFrame(emp_data) |
| 260 | + dept_df = pd.DataFrame(dept_data) |
| 261 | + |
| 262 | + merged_df = emp_df.merge(dept_df, how='inner') |
| 263 | + path_to_save = os.getcwd() + "/data/employee_details/" |
| 264 | + |
| 265 | + is_dir_exist = os.access(path_to_save, os.F_OK) |
| 266 | + if not is_dir_exist: |
| 267 | + print("creating directory to store file") |
| 268 | + os.makedirs(path_to_save) |
| 269 | + |
| 270 | + merged_df.to_csv(path_to_save+"employee.csv", header=True) |
| 271 | + merged_df.to_json(path_to_save+"employee.json") |
| 272 | + print("Dataframe saved into csv and json format") |
| 273 | + |
| 274 | + |
| 275 | +def dataframe_read_json_export_to_parquet(): |
| 276 | + json_file_path = os.getcwd() + "/data/employee_details/employee.json" |
| 277 | + parquet_file_path = os.getcwd() + "/data/employee_details/parquet/" |
| 278 | + df = pd.read_json(json_file_path) |
| 279 | + |
| 280 | + print("Employee dataframe ...") |
| 281 | + print(df) |
| 282 | + print("Saving dataframe to json format") |
| 283 | + is_dir_exist = os.access(parquet_file_path, os.F_OK) |
| 284 | + if not is_dir_exist: |
| 285 | + print(f"creating {parquet_file_path} to store parquet file") |
| 286 | + os.makedirs(parquet_file_path) |
| 287 | + |
| 288 | + df.to_parquet(parquet_file_path+"employee.parquet", compression="snappy", engine='fastparquet') |
| 289 | + print("File stored as parquet format") |
| 290 | + |
| 291 | + |
| 292 | +dataframe_read_json_export_to_parquet() |
0 commit comments