10000 csv splitter · goroyal/python-scripts@8b65a7c · GitHub
[go: up one dir, main page]

Skip to content

Commit 8b65a7c

Browse files
committed
csv splitter
1 parent 681685c commit 8b65a7c

File tree

2 files changed

+268
-0
lines changed

2 files changed

+268
-0
lines changed

12_csv_split.py

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
### WIP
2+
3+
import sys
4+
import os
5+
import getopt
6+
import csv
7+
8+
"""
9+
Splits a CSV file into multiple pieces based on command line arguments.
10+
11+
Arguments:
12+
`-h`: help file of usage of the script
13+
`-i`: input file name
14+
`-o`: output file, A %s-style template for the numbered output files.
15+
`-r`: row limit to split
16+
`-c`: A %s-style template for the numbered output files.
17+
18+
Default settings:
19+
`output_path` is the current directory
20+
`keep_headers` is on (headers will be kept)
21+
`delimeter` is ,
22+
23+
Example usage:
24+
# split by every 10000 rows
25+
>> python 12_csv_split.py -i input.csv -o rownumber -r 10000
26+
# split by unique items in column 0
27+
>> python 12_csv_split.py -i input.csv -o userid -c 0
28+
# access help
29+
>> python 12_csv_split.py -h for help
30+
31+
"""
32+
33+
def main(argv):
34+
35+
argument_dict = grab_command_line_arguments(argv)
36+
parse_file(argument_dict)
37+
38+
39+
def grab_command_line_arguments(argv):
40+
41+
# global variables
42+
inputfile = ''
43+
outputfile = ''
44+
rowlimit = ''
45+
columnindex = ''
46+
argument_dict = {}
47+
48+
# grab arguments
49+
opts, args = getopt.getopt(argv,"hi:o:r:c:",["ifile=","ofile=","rowlimit=","columnindex="])
50+
51+
# end if no arguments provided
52+
if not opts:
53+
print "No options provided. Try again. Use `-h` for help."
54+
sys.exit()
55+
56+
# grab arguments
57+
for opt, arg in opts:
58+
if opt == '-h':
59+
print 'csvsplit.py -i <inputfile> -r <row limit> -c <column index> -o <outputfile>'
60+
sys.exit()
61+
elif opt in ("-i", "--ifile"):
62+
inputfile = arg
63+
elif opt in ("-o", "--ofile"):
64+
outputfile = arg
65+
elif opt in ("-r", "--rowlimit"):
66+
rowlimit = arg
67+
elif opt in ("-c", "--columnindex"):
68+
columnindex = arg
69+
70+
# Output arguments
71+
print "\nArguments:"
72+
if inputfile:
73+
argument_dict["input_file"] = inputfile
74+
print "Input file is '{}'".format(inputfile)
75+
else:
76+
"Please enter an input file."
77+
if outputfile:
78+
argument_dict["output_file"] = outputfile
79+
print "Output file is '{}'".format(outputfile)
80+
else:
81+
print "Please enter an output file."
82+
if rowlimit:
83+
argument_dict["rowlimit"] = rowlimit
84+
print "Rowlimit is '{}'".format(rowlimit)
85+
if columnindex:
86+
argument_dict["columnindex"] = columnindex
87+
print "Columnindex is '{}'".format(columnindex)
88+
if rowlimit and columnindex:
89+
print "Please use either a rowlimit or columnlimit, not both."
90+
sys.exit()
91+
if not rowlimit or columnindex:
92+
print "Please enter either a rowlimit or columnlimit."
93+
sys.exit()
94+
95+
# to do - check to make sure file, rowlimit, and columnlimit exist
96+
print argument_dict
97+
return argument_dict
98+
99+
100+
def parse_file(argument_dict):
101+
102+
#split csv file by certain rownumber
103+
if argument_dict["rowlimit"]:
104+
rowlimit = int(argument_dict["rowlimit"])
105+
output_name_file = "{}.csv".format(argument_dict["output_file"])
106+
output_path='.'
107+
keep_headers=True
108+
delimiter=','
109+
filehandler = open(argument_dict["input_file"],'r')
110+
reader = csv.reader(filehandler, delimiter=delimiter)
111+
current_piece = 1
112+
current_out_path = os.path.join(
113+
output_path,
114+
output_name_file
115+
)
116+
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
117+
current_limit = rowlimit
118+
if keep_headers:
119+
headers = reader.next()
120+
current_out_writer.writerow(headers)
121+
for i, row in enumerate(reader):
122+
if i + 1 > current_limit:
123+
current_piece += 1
124+
current_limit = rowlimit * current_piece
125+
current_out_path = os.path.join(
126+
output_path,
127+
output_name_file
128+
)
129+
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
130+
131+
# elif columnindex: #split csv file accrording to unique values of certain column,it's like filter only certain item in excel
132+
# itemlist = []
133+
# columnindex = int(columnindex)
134+
# output_name_template= outputfile+'_%s.csv'
135+
# output_path='.'
136+
# keep_headers=True
137+
# delimiter=','
138+
# filehandler = open(inputfile,'r')
139+
# reader = csv.reader(filehandler, delimiter=delimiter)
140+
# if keep_headers:
141+
# headers = reader.next()
142+
143+
# for i, row in enumerate(reader):
144+
145+
# current_out_path = os.path.join(
146+
# output_path,
147+
# output_name_template % row[columnindex] )
148+
# if row[columnindex] not in itemlist:
149+
# try:
150+
# current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
151+
# except IOError:
152+
# continue
153+
# else:
154+
# itemlist.append(row[columnindex])
155+
# if keep_headers:
156+
# current_out_writer.writerow(headers)
157+
# current_out_writer.writerow(row)
158+
# else:
159+
# current_out_writer = csv.writer(open(current_out_path, 'a'), delimiter=delimiter)
160+
# current_out_writer.writerow(row)
161+
# print 'totally %i unique items in column %i \n' % (len(itemlist),columnindex)
162+
# else:
163+
# print "oops, please check instruction of script by >>./csvsplit.py -h"
164+
165+
166+
if __name__ == "__main__":
167+
main(sys.argv[1:])

12_sample_csv.csv

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
First Name,Last Name,Email Address,Phone Number,Company,Date Hired
2+
Abigail,Branch,volutpat.ornare.facilisis@Phasellusvitaemauris.co.uk,(412) 540-6276,Sem Eget PC,07/02/2013
3+
Roanna,Lambert,tristique.pharetra@arcuvelquam.ca,(747) 536-6748,Eget Laoreet Foundation,11/23/2013
4+
Amanda,England,semper.rutrum@blandit.com,(669) 164-6411,Magna Nec Quam Limited,08/11/2012
5+
Hilel,Chapman,ultrices@tempor.ca,(683) 531-0279,Sed Molestie PC,06/25/2012
6+
Basia,Bowers,Quisque.ornare@tinciduntnibh.com,(135) 986-6437,Tincidunt Nunc Ac Associates,05/11/2013
7+
Dylan,Dunlap,est.Mauris@etnetuset.org,(877) 604-4603,Eu Ultrices Institute,07/02/2012
8+
Regan,Cardenas,vitae.semper@ultriciesornareelit.org,(693) 378-7235,Neque Morbi Corporation,10/30/2012
9+
Sade,Green,tortor@sagittis.co.uk,(816) 255-5508,Eleifend Ltd,09/03/2012
10+
Marshall,Richardson,sed.facilisis@eu.com,(460) 132-4621,Purus Maecenas Libero LLC,12/21/2012
11+
Regina,Brown,semper.auctor@sem.co.uk,(185) 963-9365,Vulputate Consulting,06/16/2013
12+
Irma,Rivers,vitae@luctusvulputate.net,(701) 393-3679,Nec Leo Morbi Incorporated,05/07/2013
13+
Rudyard,Cline,fringilla@risusatfringilla.org,(971) 228-3147,Risus Quis Consulting,04/25/2013
14+
Justina,Richmond,sapien.Nunc.pulvinar@vitaeerat.co.uk,(755) 103-3125,Ullamcorper Associates,02/12/2013
15+
Reece,Blackburn,felis@Aliquamauctor.com,(239) 528-2742,Suspendisse Associates,04/03/2014
16+
Lillith,Holden,ut.dolor.dapibus@porttitor.net,(305) 797-1579,Dapibus Id Blandit LLP,09/11/2013
17+
Taylor,Vinson,ac@vellectusCum.net,(355) 993-1099,Egestas Institute,05/16/2012
18+
Colton,Barker,volutpat@necluctus.ca,(705) 978-5992,Ornare Consulting,04/24/2013
19+
Vladimir,Walls,mollis.lectus@imperdietullamcorperDuis.edu,(311) 406-4856,Faucibus Ut Nulla LLP,08/12/2012
20+
Freya,Rowland,sagittis@elementumduiquis.co.uk,(284) 850-7506,Turpis PC,05/31/2013
21+
Cullen,Phelps,Nam.ligula@orciluctus.ca,(425) 280-1763,Rhoncus Id Mollis Consulting,09/10/2013
22+
Boris,Lopez,posuere@adipiscingligula.edu,(769) 701-0055,Nunc Sed Orci Industries,07/26/2013
23+
Alvin,Meyer,Etiam@felis.ca,(783) 312-0821,Dignissim Pharetra Ltd,03/02/2013
24+
Nicole,Boyle,tortor.Integer@imperdiet.edu,(675) 678-1160,Dictum Eleifend Nunc LLC,05/05/2012
25+
Flynn,Petersen,dui@lectusrutrum.com,(787) 543-7411,Penatibus Et Associates,03/11/2013
26+
Troy,Herman,a.felis.ullamcorper@sem.ca,(932) 900-7922,Dolor Donec Associates,11/16/2012
27+
Constance,Shields,nec.leo.Morbi@eunulla.com,(221) 761-2368,Vel Quam Company,02/14/2014
28+
Ocean,Green,vulputate.dui@bibendumDonecfelis.net,(481) 832-0298,Nunc Associates,03/03/2013
29+
Steven,Lopez,Suspendisse.ac@sedpedeCum.net,(294) 415-0435,Ipsum Company,07/25/2013
30+
Adara,Lee,magna.Duis@erat.org,(760) 291-7826,Eu Ultrices PC,10/05/2013
31+
Noble,Hancock,Donec.tincidunt.Donec@dictumcursusNunc.edu,(333) 272-8234,Vitae Risus Duis LLC,09/13/2012
32+
Kendall,Wilcox,quis.pede@Pellentesqueut.ca,(173) 982-4381,Ultrices Industries,01/26/2013
33+
Sebastian,Barton,orci.Ut@ametfaucibus.ca,(951) 817-9217,In Mi Pede Corporation,05/11/2014
34+
Gavin,Clark,metus.facilisis.lorem@Sedetlibero.ca,(671) 714-8378,Vestibulum Neque Limited,06/06/2012
35+
Charles,Woods,Maecenas.mi.felis@lacusvarius.org,(559) 935-9739,Amet Ante Company,09/02/2013
36+
Elvis,Roberts,tempor.diam@risus.co.uk,(184) 182-5324,Facilisis Vitae Inc.,01/07/2014
37+
Caldwell,Carey,Suspendisse@Proin.edu,(125) 243-9354,Egestas Lacinia Sed Inc.,10/24/2012
38+
Jesse,Leblanc,sit@tellussemmollis.com,(726) 216-8000,Lectus Ltd,11/22/2013
39+
Hu,Adkins,purus.in.molestie@acmattisvelit.co.uk,(370) 317-7556,Aliquam Vulputate Company,10/19/2013
40+
Hamilton,Tyler,taciti.sociosqu.ad@Sedmalesuadaaugue.com,(234) 744-3868,Nunc Sed LLC,10/19/2012
41+
Cade,Osborn,at.iaculis.quis@doloregestas.org,(501) 753-9793,Consectetuer Industries,08/14/2013
42+
Ashely,Kent,Cum.sociis.natoque@odioPhasellusat.edu,(789) 869-6558,Imperdiet Ornare Corporation,02/04/2013
43+
Veda,Cameron,tristique.pharetra@necenimNunc.co.uk,(522) 127-0654,Egestas Incorporated,12/29/2012
44+
Burke,Ferrell,orci.sem@semPellentesque.co.uk,(975) 891-3694,Purus Accumsan Institute,07/26/2013
45+
Fuller,Lamb,orci.Donec@vulputatedui.edu,(523) 614-5785,Pede Cum Sociis Limited,12/02/2013
46+
Natalie,Taylor,In@lorem.ca,(117) 594-2685,A Facilisis Non LLP,12/06/2013
47+
Astra,Morton,nec@scelerisquenequeNullam.com,(390) 867-2558,Non Ante Bibendum Foundation,05/07/2012
48+
David,Espinoza,gravida@a.co.uk,(287) 945-5239,Lobortis Nisi Nibh Industries,05/11/2014
49+
Sybil,Todd,risus@sitametrisus.edu,(611) 848-4765,Massa Mauris Vestibulum Incorporated,01/19/2013
50+
Lee,Barron,cursus.non@Praesentinterdumligula.ca,(765) 654-9167,In Ornare Inc.,01/01/2013
51+
Zachery,Reed,nulla.Integer.urna@amet.edu,(667) 465-1222,Ac Corp.,10/07/2012
52+
Marshall,Brady,lobortis.nisi.nibh@molestiearcu.edu,(391) 336-5310,Ac Sem Ut Incorporated,07/12/2012
53+
Selma,Floyd,eros.turpis.non@lectusconvallis.net,(398) 920-1076,Non Foundation,07/21/2012
54+
Ivy,Garrison,posuere@euodio.net,(428) 321-5542,Semper Erat Foundation,12/19/2013
55+
Wyatt,Gibbs,Sed@nequeNullamut.ca,(973) 141-9840,Pellentesque Corp.,11/21/2013
56+
Vaughan,Moss,adipiscing@Phasellusfermentum.net,(597) 730-0228,Tempor Institute,10/27/2013
57+
Elijah,Mcgowan,Aliquam@Quisqueornaretortor.ca,(127) 171-1859,Tempor Bibendum Donec LLC,08/26/2012
58+
Miranda,Ingram,fermentum@velitSedmalesuada.net,(864) 873-7359,Feugiat Non Lobortis Institute,08/20/2012
59+
Anastasia,Lawrence,Mauris.eu@pedeultrices.net,(106) 260-8688,Sit Amet Consulting,05/31/2012
60+
Samson,Patton,non.arcu@enimnislelementum.ca,(302) 330-4251,Hendrerit Associates,12/27/2013
61+
Erasmus,Sexton,lectus.justo@aliquam.org,(972) 793-9187,Feugiat Industries,10/15/2013
62+
Emery,Gardner,erat@lorem.org,(848) 534-1656,Nunc Sit Amet Industries,08/24/2012
63+
Nomlanga,Hensley,Fusce@leoVivamus.org,(644) 169-6243,Consectetuer Company,08/29/2012
64+
Jason,Craft,nunc.nulla@sapien.ca,(691) 770-9143,Blandit LLC,03/23/2013
65+
Kathleen,Haley,sed.dolor.Fusce@imperdietornare.edu,(891) 454-8400,Lorem Company,07/02/2012
66+
Aline,Flynn,a@Nunclaoreet.edu,(563) 400-6803,Et Netus LLP,01/28/2013
67+
Ursa,Dickson,Integer.sem@ullamcorpervelit.com,(371) 615-7750,Nullam Company,12/22/2012
68+
Wesley,Lopez,enim.non.nisi@vulputateduinec.edu,(287) 777-3724,Lobortis Ultrices Vivamus Corp.,06/17/2013
69+
Victoria,Mcleod,lectus.justo.eu@ut.ca,(583) 108-1294,Justo Faucibus Lectus Corporation,10/17/2012
70+
Shana,Roach,scelerisque.sed.sapien@afelisullamcorper.edu,(921) 385-2342,Quis Turpis Vitae Incorporated,05/26/2014
71+
Maxine,Ruiz,Donec.porttitor@hymenaeosMaurisut.edu,(520) 801-0808,Luctus Foundation,12/05/2013
72+
Harriet,Bishop,Quisque@Crasdictum.com,(758) 716-9401,Dictum Phasellus In Inc.,09/08/2013
73+
Serina,Williams,tincidunt.vehicula.risus@sedliberoProin.ca,(270) 288-0136,At Egestas A Corporation,03/17/2014
74+
Rhea,Copeland,laoreet.ipsum@Aliquam.co.uk,(775) 493-9118,Ipsum Incorporated,05/22/2013
75+
Evan,Holcomb,neque.sed@ullamcorperDuis.ca,(695) 656-8621,Sem Institute,02/16/2013
76+
Basil,Mccall,arcu.Vestibulum.ante@luctuslobortis.co.uk,(144) 989-4125,Feugiat Tellus Lorem Institute,02/25/2013
77+
Florence,Riley,sit.amet@Proinvel.org,(663) 529-4829,Enim Sit PC,01/14/2014
78+
Heather,Peck,mauris@scelerisqueneque.edu,(850) 444-0917,Curabitur Limited,01/16/2014
79+
Dara,Robinson,egestas@utnisi.net,(106) 576-1355,Urna Incorporated,12/15/2012
80+
Kylan,Maxwell,conubia.nostra@accumsan.com,(973) 206-2558,Aliquam Eros Turpis Company,08/21/2012
81+
Petra,Blake,faucibus.orci.luctus@dapibusrutrum.ca,(901) 207-9872,Ac Metus Institute,06/17/2013
82+
Fiona,Goff,tincidunt@enim.net,(265) 255-7749,Odio Phasellus Corp.,12/03/2012
83+
Kameko,Diaz,ac@turpisNulla.edu,(731) 354-4848,Montes Nascetur Corporation,08/16/2013
84+
Craig,Valentine,tristique@urnaVivamus.net,(437) 229-8198,Etiam Gravida Molestie Consulting,05/06/2014
85+
Samson,Cunningham,semper.pretium@auctor.edu,(335) 666-7758,Nec Ante Associates,07/02/2013
86+
Yoko,Rogers,nunc@Vivamus.net,(893) 405-6889,Fermentum Vel Mauris Corp.,03/29/2014
87+
Walter,Burnett,nisi.Mauris.nulla@felis.co.uk,(336) 411-9222,Suscipit Est Institute,06/26/2012
88+
Gisela,Nash,euismod@lectusrutrum.ca,(917) 249-0166,Non Magna LLP,11/23/2012
89+
Wanda,Pierce,Nulla@dolorsit.com,(480) 872-3389,Cum Sociis Natoque Limited,11/02/2013
90+
Jane,Dixon,eu.odio@Infaucibus.com,(112) 139-8563,Id Ante Dictum LLC,03/14/2014
91+
Octavius,Shannon,iaculis.aliquet@ante.ca,(541) 652-3295,Libero Est Institute,05/28/2014
92+
Rigel,Hunt,metus.Aenean.sed@inhendrerit.org,(792) 358-7505,Enim PC,09/05/2013
93+
Rachel,Gray,erat.in.consectetuer@Fuscealiquetmagna.org,(165) 973-1366,Suscipit Nonummy Fusce LLC,05/08/2013
94+
Madeline,Bradley,dignissim.Maecenas@egetmassaSuspendisse.co.uk,(436) 223-3135,Posuere PC,01/24/2014
95+
Emma,Conner,dictum@magnaDuisdignissim.com,(304) 429-2622,Nulla Incorporated,11/05/2013
96+
Halee,Mclean,amet.faucibus@Phasellus.net,(669) 364-0148,Ligula Consulting,03/05/2014
97+
Conan,Williams,massa@felisNulla.net,(999) 649-4433,Velit Eu Limited,05/15/2014
98+
Martena,Fowler,mi.lacinia@maurisa.ca,(405) 661-1762,Blandit Nam Institute,02/27/2013
99+
Robin,Buckley,cursus.Nunc.mauris@nislQuisque.net,(376) 771-9862,Sed Corp.,10/30/2012
100+
Isadora,Adams,arcu.Vestibulum@urna.co.uk,(138) 774-6058,Blandit Viverra Donec Institute,08/07/2012
101+
Bernard,Price,ultrices@Praesent.ca,(368) 882-6146,Egestas Blandit LLP,11/03/2013

0 commit comments

Comments
 (0)
0