1
1
# python3
2
+ import os
2
3
import webbrowser
3
4
import jpype
4
5
import sys
6
+
7
+ def get_file_character_count (file_path ):
8
+ character_count = 0
9
+ with open (file_path , "r" ) as file :
10
+ try :
11
+ content = file .read ()
12
+ character_count = len (content )
13
+ except :
14
+ print (file_path + " is not a text file." )
15
+ return character_count
16
+
17
+ def get_all_files (folder_path ):
18
+ all_files = []
19
+ for root , dirs , files in os .walk (folder_path ):
20
+ for file in files :
21
+ file_path = os .path .join (root , file )
22
+ all_files .append (file_path )
23
+ return all_files
24
+ def get_text_files_character_count (folder_path ):
25
+ text_files = get_all_files (folder_path )
26
+ total_character_count = 0
27
+ for file_path in text_files :
28
+ character_count = get_file_character_count (file_path )
29
+ total_character_count += character_count
30
+ return total_character_count
31
+
5
32
def indexOf (args , arg ):
6
33
try :
7
34
return args .index (arg )
@@ -12,6 +39,7 @@ def save_to_file(file_name, contents):
12
39
fh = open (file_name , 'w' )
13
40
fh .write (contents )
14
41
fh .close ()
42
+
15
43
def call_dataFlowAnalyzer (args ):
16
44
# Start the Java Virtual Machine (JVM)
17
45
widget_server_url = "http://localhost:8000"
@@ -27,30 +55,43 @@ def call_dataFlowAnalyzer(args):
27
55
XML2Model = jpype .JClass ("gudusoft.gsqlparser.dlineage.util.XML2Model" )
28
56
RemoveDataflowFunction = jpype .JClass ("gudusoft.gsqlparser.dlineage.util.RemoveDataflowFunction" )
29
57
File = jpype .JClass ("java.io.File" )
30
- sqlFiles = None
31
58
EDbVendor = jpype .JClass ("gudusoft.gsqlparser.EDbVendor" )
32
59
vendor = EDbVendor .dbvoracle
33
60
index = indexOf (args , "/t" )
34
61
if index != - 1 and len (args ) > index + 1 :
35
- vendor = TGSqlParser .getDBVendorByName (args [index + 1 ])
62
+ vendor = TGSqlParser .getDBVendorByName (args [index + 1 ])
36
63
if indexOf (args , "/version" ) != - 1 :
37
- print ("Version: " + DataFlowAnalyzer .getVersion ())
38
- print ("Release Date: " + DataFlowAnalyzer .getReleaseDate ())
39
- return
64
+ print ("Version: " + DataFlowAnalyzer .getVersion ())
65
+ print ("Release Date: " + DataFlowAnalyzer .getReleaseDate ())
66
+ return
40
67
41
68
if indexOf (args , "/f" ) != - 1 and len (args ) > indexOf (args , "/f" ) + 1 :
42
- sqlFiles = File (args [indexOf (args , "/f" ) + 1 ])
43
- if not sqlFiles .exists () or not sqlFiles .isFile ():
44
- print (args [indexOf (args , "/f" ) + 1 ] + " is not a valid file." )
45
- return
69
+ sqlFiles = File (args [indexOf (args , "/f" ) + 1 ])
70
+ if not sqlFiles .exists () or not sqlFiles .isFile ():
71
+ print (args [indexOf (args , "/f" ) + 1 ] + " is not a valid file." )
72
+ return
73
+
74
+ character_count = get_file_character_count (args [indexOf (args , "/f" ) + 1 ])
75
+ if character_count > 10000 :
76
+ print ("SQLFlow lite version only supports processing SQL statements with a maximum of 10,"
77
+ "000 characters. If you need to process SQL statements without length restrictions, "
78
+ "please contact support@gudusoft.com for more information." )
79
+ return
80
+
46
81
elif indexOf (args , "/d" ) != - 1 and len (args ) > indexOf (args , "/d" ) + 1 :
47
- sqlFiles = File (args [indexOf (args , "/d" ) + 1 ])
48
- if not sqlFiles .exists () or not sqlFiles .isDirectory ():
49
- print (args [indexOf (args , "/d" ) + 1 ] + " is not a valid directory." )
50
- return
82
+ sqlFiles = File (args [indexOf (args , "/d" ) + 1 ])
83
+ if not sqlFiles .exists () or not sqlFiles .isDirectory ():
84
+ print (args [indexOf (args , "/d" ) + 1 ] + " is not a valid directory." )
85
+ return
86
+ character_count = get_text_files_character_count (args [indexOf (args , "/d" ) + 1 ])
87
+ if character_count > 10000 :
88
+ print ("SQLFlow lite version only supports processing SQL statements with a maximum of 10,"
89
+ "000 characters. If you need to process SQL statements without length restrictions, "
90
+ "please contact support@gudusoft.com for more information.&qu
F438
ot; )
91
+ return
51
92
else :
52
- print ("Please specify a sql file path or directory path to analyze dlineage." )
53
- return
93
+ print ("Please specify a sql file path or directory path to analyze dlineage." )
94
+ return
54
95
simple = indexOf (args , "/s" ) != - 1
55
96
ignoreTemporaryTable = indexOf (args , "/withTemporaryTable" ) == - 1
56
97
ignoreResultSets = indexOf (args , "/i" ) != - 1
@@ -77,7 +118,8 @@ def call_dataFlowAnalyzer(args):
77
118
topselectlist = True
78
119
tableLineage = indexOf (args , "/tableLineage" ) != - 1
79
120
csv = indexOf (args , "/csv" ) != - 1
80
- delimiter = args .get (indexOf (args , "/delimiter" ) + 1 ) if indexOf (args , "/delimiter" ) != - 1 and len (args ) > indexOf (args , "/delimiter" ) + 1 else ","
121
+ delimiter = args .get (indexOf (args , "/delimiter" ) + 1 ) if indexOf (args , "/delimiter" ) != - 1 and len (
122
+ args ) > indexOf (args , "/delimiter" ) + 1 else ","
81
123
if tableLineage :
82
124
simple = False
83
125
ignoreResultSets = False
@@ -91,7 +133,7 @@ def call_dataFlowAnalyzer(args):
91
133
SQLUtil = jpype .JClass ("gudusoft.gsqlparser.util.SQLUtil" )
92
134
envs = jsonSQLEnvParser .parseSQLEnv (vendor , SQLUtil .getFileContent (metadataFile ))
93
135
if envs != None and envs .length > 0 :
94
- sqlenv = envs [0 ]
136
+ sqlenv = envs [0 ]
95
137
dlineage = DataFlowAnalyzer (sqlFiles , vendor , simple )
96
138
if sqlenv != None :
97
139
dlineage .setSqlEnv (sqlenv )
@@ -100,7 +142,7 @@ def call_dataFlowAnalyzer(args):
100
142
dlineage .setShowJoin (showJoin )
101
143
dlineage .setIgnoreRecordSet (ignoreResultSets )
102
144
if ignoreResultSets and not ignoreFunction :
103
- dlineage .setSimpleShowFunction (True )
145
+ dlineage .setSimpleShowFunction (True )
104
146
dlineage .setLinkOrphanColumnToFirstTable (linkOrphanColumnToFirstTable )
105
147
dlineage .setIgnoreCoordinate (ignoreCoordinate )
106
148
dlineage .setSimpleShowTopSelectResultSet (topselectlist )
@@ -117,16 +159,14 @@ def call_dataFlowAnalyzer(args):
117
159
dlineage .getOption ().setDefaultSchema (args [indexOf (args , "/defaultSchema" ) + 1 ])
118
160
if indexOf (args , "/showResultSetTypes" ) != - 1 :
119
161
resultSetTypes = args [indexOf (args , "/showResultSetTypes" ) + 1 ]
120
- if resultSetTypes != None :
162
+ if resultSetTypes is not None :
121
163
dlineage .getOption ().showResultSetTypes (resultSetTypes .split ("," ))
122
164
123
165
if indexOf (args , "/filterRelationTypes" ) != - 1 :
124
166
dlineage .getOption ().filterRelationTypes (args [indexOf (args , "/filterRelationTypes" ) + 1 ])
125
167
if simple and not jsonFormat :
126
168
dlineage .setTextFormat (textFormat )
127
169
128
- result = None
129
- dataflow = None
130
170
if indexOf (args , "/er" ) != - 1 :
131
171
dlineage .getOption ().setShowERDiagram (True )
132
172
dlineage .generateDataFlow ()
@@ -135,7 +175,7 @@ def call_dataFlowAnalyzer(args):
135
175
generator = DataFlowGraphGenerator ()
136
176
result = generator .genERGraph (vendor , dataflow )
137
177
save_to_file ("widget/json/erGraph.json" , str (result ))
138
- webbrowser .open_new (widget_server_url + "/er.html" )
178
+ webbrowser .open_new (widget_server_url + "/er.html" )
139
179
return
140
180
elif tableLineage :
141
181
dlineage .generateDataFlow ()
@@ -192,40 +232,52 @@ def call_dataFlowAnalyzer(args):
192
232
if __name__ == "__main__" :
193
233
args = sys .argv
194
234
if len (args ) < 2 :
195
- print ("Usage: java DataFlowAnalyzer [/f <path_to_sql_file>] [/d <path_to_directory_includes_sql_files>] [/stat] [/s [/topselectlist] [/text] [/withTemporaryTable]] [/i] [/showResultSetTypes <resultset_types>] [/ic] [/lof] [/j] [/json] [/traceView] [/t <database type>] [/o <output file path>] [/version] [/env <path_to_metadata.json>] [/tableLineage [/csv [/delimeter <delimeter>]]] [/transform [/coor]] [/showConstant] [/treatArgumentsInCountFunctionAsDirectDataflow] [/filterRelationTypes <relationTypes>]" )
196
- print ("/f: Optional, the full path to SQL file." )
197
- print ("/d: Optional, the full path to the directory includes the SQL files." )
198
- print ("/j: Optional, return the result including the join relation." )
199
- print ("/s: Optional, simple output, ignore the intermediate results." )
200
- print ("/topselectlist: Optional, simple output with top select results." )
201
- print ("/withTemporaryTable: Optional, simple output with the temporary tables." )
202
- print ("/i: Optional, the same as /s option, but will keep the resultset generated by the SQL function, this parameter will have the same effect as /s /topselectlist + keep resultset generated by the sql function." )
203
- print ("/showResultSetTypes: Optional, simple output with specify resultset types, separate with commas, resultset types contains array, struct, result_of, cte, insert_select, update_select, merge_update, merge_insert, output, update_set,\r \n "
204
- + " pivot_table, unpivot_table, alias, rs, function, case_when" )
205
- print ("/if: Optional, keep all the intermediate resultset, but remove the resultset generated by the SQL function" )
206
- print ("/ic: Optional, ignore the coordinates in the output." )
207
- print ("/lof: Option, link orphan column to the first table." )
208
- print ("/traceView: Optional, only output the name of source tables and views, ignore all intermedidate data." )
209
- print ("/text: Optional, this option is valid only /s is used, output the column dependency in text mode." )
210
- print ("/json: Optional, print the json format output." )
211
- print ("/tableLineage [/csv /delimiter]: Optional, output tabel level lineage." )
212
- print ("/csv: Optional, output column level lineage in csv format." )
213
- print ("/delimiter: Optional, the delimiter of output column level lineage in csv format." )
214
- print ("/t: Option, set the database type. "
215
- + "Support access,bigquery,couchbase,dax,db2,greenplum,hana,hive,impala,informix,mdx,mssql,\n "
216
- + "sqlserver,mysql,netezza,odbc,openedge,oracle,postgresql,postgres,redshift,snowflake,\n "
217
- + "sybase,teradata,soql,vertica\n , " + "the default value is oracle" )
218
- print ("/env: Optional, specify a metadata.json to get the database metadata information." )
219
- print ("/transform: Optional, output the relation transform code." )
220
- print ("/coor: Optional, output the relation transform coordinate, but not the code." )
221
- print ("/defaultDatabase: Optional, specify the default schema." )
222
- print ("/defaultSchema: Optional, specify the default schema." )
223
- print ("/showImplicitSchema: Optional, show implicit schema." )
224
- print ("/showConstant: Optional, show constant table." )
225
- print ("/treatArgumentsInCountFunctionAsDirectDataflow: Optional, treat arguments in count function as direct dataflow." )
226
- print ("/filterRelationTypes: Optional, support fdd, fdr, join, call, er, multiple relatoin types separated by commas" )
227
- print ("/graph: Optional, Open a browser page to graphically display the results" )
228
- print ("/er: Optional, Open a browser page and display the ER diagram graphically" )
229
- sys .exit (0 )
235
+ print ("Usage: java DataFlowAnalyzer [/f <path_to_sql_file>] [/d <path_to_directory_includes_sql_files>] ["
236
+ "/stat] [/s [/topselectlist] [/text] [/withTemporaryTable]] [/i] [/showResultSetTypes "
237
+ "<resultset_types>] [/ic] [/lof] [/j] [/json] [/traceView] [/t <database type>] [/o <output file path>] "
238
+ "[/version] [/env <path_to_metadata.json>] [/tableLineage [/csv [/delimeter <delimeter>]]] [/transform "
239
+ "[/coor]] [/showConstant] [/treatArgumentsInCountFunctionAsDirectDataflow] [/filterRelationTypes "
240
+ "<relationTypes>]" )
241
+ print ("/f: Optional, the full path to SQL file." )
242
+ print ("/d: Optional, the full path to the directory includes the SQL files." )
243
+ print ("/j: Optional, return the result including the join relation." )
244
+ print ("/s: Optional, simple output, ignore the intermediate results." )
245
+ print ("/topselectlist: Optional, simple output with top select results." )
246
+ print ("/withTemporaryTable: Optional, simple output with the temporary tables." )
247
+ print ("/i: Optional, the same as /s option, but will keep the resultset generated by the SQL function, "
248
+ "this parameter will have the same effect as /s /topselectlist + keep resultset generated by the sql "
249
+ "function." )
250
+ print ("/showResultSetTypes: Optional, simple output with specify resultset types, separate with commas, "
251
+ "resultset types contains array, struct, result_of, cte, insert_select, update_select, merge_update, "
252
+ "merge_insert, output, update_set,\r \n "
253
+ + " pivot_table, unpivot_table, alias, rs, function, case_when" )
254
+ print ("/if: Optional, keep all the intermediate resultset, but remove the resultset generated by the SQL "
255
+ "function" )
256
+ print ("/ic: Optional, ignore the coordinates in the output." )
257
+ print ("/lof: Option, link orphan column to the first table." )
258
+ print ("/traceView: Optional, only output the name of source tables and views, ignore all intermedidate data." )
259
+ print ("/text: Optional, this option is valid only /s is used, output the column dependency in text mode." )
260
+ print ("/json: Optional, print the json format output." )
261
+ print ("/tableLineage [/csv /delimiter]: Optional, output tabel level lineage." )
262
+ print ("/csv: Optional, output column level lineage in csv format." )
263
+ print ("/delimiter: Optional, the delimiter of output column level lineage in csv format." )
264
+ print ("/t: Option, set the database type. "
265
+ + "Support access,bigquery,couchbase,dax,db2,greenplum,hana,hive,impala,informix,mdx,mssql,\n "
266
+ + "sqlserver,mysql,netezza,odbc,openedge,oracle,postgresql,postgres,redshift,snowflake,\n "
267
+ + "sybase,teradata,soql,vertica\n , " + "the default value is oracle" )
268
+ print ("/env: Optional, specify a metadata.json to get the database metadata information." )
269
+ print ("/transform: Optional, output the relation transform code." )
270
+ print ("/coor: Optional, output the relation transform coordinate, but not the code." )
271
+ print ("/defaultDatabase: Optional, specify the default schema." )
272
+ print ("/defaultSchema: Optional, specify the default schema." )
273
+ print ("/showImplicitSchema: Optional, show implicit schema." )
274
+ print ("/showConstant: Optional, show constant table." )
275
+ print ("/treatArgumentsInCountFunctionAsDirectDataflow: Optional, treat arguments in count function as direct "
276
+ "dataflow." )
277
+ print ("/filterRelationTypes: Optional, support fdd, fdr, join, call, er, multiple relatoin types separated by "
278
+ "commas" )
279
+ print ("/graph: Optional, Open a browser page to graphically display the results" )
280
+ print ("/er: Optional, Open a browser page and display the ER diagram graphically" )
281
+ sys .exit (0 )
230
282
231
283
call_dataFlowAnalyzer (args )
0 commit comments