@@ -55,11 +55,17 @@ def parse_parser_results(text):
55
55
av = re .split ("=| " , s )
56
56
# make [ignore,ignore,a,b,c,d] into [[a,b],[c,d]]
57
57
# and save as attr-value dict, convert numbers into ints
58
- tmp ['words' ].append ((av [1 ], dict (zip (* [av [2 :][x ::2 ] for x in (0 , 1 )]))))
58
+ # tmp['words'].append((av[1], dict(zip(*[av[2:][x::2] for x in (0, 1)]))))
59
59
# tried to convert digits to ints instead of strings, but
60
60
# it seems the results of this can't be serialized into JSON?
61
- # av = zip(*[av[2:][x::2] for x in (0, 1)])
62
- # tmp['words'][av[1]] = dict(map(lambda x: (x[0], x[1].isdigit() and int(x[1]) or x[1]), av))
61
+ word = av [1 ]
62
+ attributes = {}
63
+ for a ,v in zip (* [av [2 :][x ::2 ] for x in (0 , 1 )]):
64
+ if v .isdigit ():
65
+ attributes [a ] = int (v )
66
+ else :
67
+ attributes [a ] = v
68
+ tmp ['words' ].append ((word , attributes ))
63
69
state = 3
64
70
elif state == 3 :
65
71
# skip over parse tree
@@ -72,12 +78,22 @@ def parse_parser_results(text):
72
78
if not line .startswith (" " ) and line .endswith (")" ):
73
79
split_entry = re .split ("\(|, " , line [:- 1 ])
74
80
if len (split_entry ) == 3 :
75
- rel , left , right = map (lambda x : remove_id ( x ) , split_entry )
81
+ rel , left , right = map (lambda x : x , split_entry )
76
82
tmp ['tuples' ].append (tuple ([rel ,left ,right ]))
77
83
elif "Coreference links" in line :
78
84
state = 5
79
85
elif state == 5 :
80
- # coreference links. Not yet implemented
86
+ crexp = re .compile ('\s(\d*)\s(\d*)\s\-\>\s(\d*)\s(\d*), that is' )
87
+ matches = crexp .findall (line )
88
+ for src_i , src_pos , sink_i , sink_pos in matches :
89
+ print "COREF MATCH" , src_i , sink_i
90
+ src = tmp ['words' ][int (src_pos )- 1 ][0 ]
91
+ sink = tmp ['words' ][int (sink_pos )- 1 ][0 ]
92
+ if tmp .has_key ('coref' ):
93
+ tmp ['coref' ].append ((src , sink ))
94
+ else :
95
+ tmp ['coref' ] = [(src , sink )]
96
+
81
97
print "CR" , line
82
98
if len (tmp .keys ()) != 0 :
83
99
results .append (tmp )
@@ -191,8 +207,9 @@ def _parse(self, text, verbose=True):
191
207
def _debug_parse (self , text , verbose = True ):
192
208
print "DEBUG PARSE -- "
193
209
rf = open ("test.out" , 'r' )
194
- results = rf .readlines ()
210
+ incoming = '' . join ( rf .readlines () )
195
211
rf .close ()
212
+ results = parse_parser_results (incoming )
196
213
return results
197
214
198
215
def parse (self , text , verbose = True ):
@@ -220,7 +237,12 @@ def parse_imperative(self, text, verbose=True):
220
237
used_pronoun = None
221
238
pronouns = ["you" ,"he" , "she" ,"i" ]
222
239
for p in pronouns :
240
+ if text .startswith (p + " " ):
241
+ # it's already an imperative!
242
+ used_pronoun = None
243
+ break
223
244
if p not in text :
245
+ # found one not in there already
224
246
used_pronoun = p
225
247
break
226
248
# if you can't find one, regress to original parse
@@ -229,19 +251,31 @@ def parse_imperative(self, text, verbose=True):
229
251
230
252
# create text with pronoun and parse it
231
253
new_text = used_pronoun + " " + text .lstrip ()
232
- result = self ._parse (new_text , verbose )
254
+ result = self ._debug_parse (new_text , verbose )
255
+
256
+ if len (result ) != 1 :
257
+ print "Non-imperative sentence? Multiple sentences found."
233
258
234
259
# remove the dummy pronoun
260
+ used_pronoun_offset = len (used_pronoun )+ 1
235
261
if result [0 ].has_key ('text' ):
236
262
result [0 ]['text' ] = text
237
263
result [0 ]['tuples' ] = filter (lambda x : not (x [1 ] == used_pronoun or x [2 ]
238
264
== used_pronoun ), result [0 ]['tuples' ])
239
265
result [0 ]['words' ] = result [0 ]['words' ][1 :]
266
+ # account for offset
267
+ ct = 0
268
+ for word , av in result [0 ]['words' ]:
269
+ for a ,v in av .items ():
270
+ if a .startswith ("CharacterOffset" ):
271
+ result [0 ]['words' ][ct ][1 ][a ] = v - used_pronoun_offset
272
+ ct += 1
240
273
return dumps (result )
241
274
else :
242
275
# if there's a timeout error, just return it.
243
276
return dumps (result )
244
277
278
+
245
279
if __name__ == '__main__' :
246
280
parser = optparse .OptionParser (usage = "%prog [OPTIONS]" )
247
281
parser .add_option (
0 commit comments