From 9c99ab0298c25cff6c0663b0f47631ab532b942e Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 10 May 2014 17:50:16 -0700
Subject: [PATCH 01/27] Added setup.py script

---
 setup.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 setup.py

diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..93a0261
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,31 @@
+from setuptools import setup, find_packages
+
+PACKAGE = "corenlp"
+NAME = "wordseer-stanford-corenlp-python"
+DESCRIPTION = "A Stanford Core NLP wrapper (wordseer fork)"
+AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
+AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
+URL = "https://github.com/silverasm/stanford-corenlp-python"
+VERSION = "3.3.0-0"
+
+setup(
+    name=NAME,
+    version=VERSION,
+    description=DESCRIPTION,
+    author=AUTHOR,
+    author_email=AUTHOR_EMAIL,
+    url=URL,
+    packages=find_packages(),
+    package_data = {"": ["*.properties"],
+        "corenlp": ["*.properties"]},
+    install_requires=[
+        "pexpect >= 2.4",
+        "unidecode >= 0.04.12",
+        "xmltodict >= 0.4.6",
+    ],
+    classifiers=[
+        ("License :: OSI Approved :: GNU General Public License v2 or later "
+            "(GPLv2+)"),
+        "Programming Language :: Python",
+    ],
+)

From c17d656bb398bffa6030dd0d34bb8fe90b0a78d8 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 15 Jul 2014 18:45:31 -0700
Subject: [PATCH 02/27] Updated README, updated setup.py with newer info

---
 README.md | 5 +++--
 setup.py  | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 0f7a2c6..f0fb897 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,4 @@
 # A Python wrapper for the Java Stanford Core NLP tools
----------------------------
 
 This is a fork of Dustin Smith's [stanford-corenlp-python](https://github.com/dasmith/stanford-corenlp-python), a Python interface to [Stanford CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml). It can either use as python package, or run as a JSON-RPC server.
 
@@ -159,8 +158,10 @@ The function uses XML output feature of Stanford CoreNLP, and you can take all i
 
 (note: The function requires xmltodict now, you should install it by `sudo pip install xmltodict`)
 
-## Developer
+## Developers
    * Hiroyoshi Komatsu [hiroyoshi.komat@gmail.com]
    * Johannes Castner [jac2130@columbia.edu]
    * Robert Elwell [robert@wikia-inc.com]
    * Tristan Chong [tristan@wikia-inc.com]
+   * Aditi Muralidharan [aditi.shrikumar@gmail.com]
+
diff --git a/setup.py b/setup.py
index 93a0261..396bdda 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,11 @@
 from setuptools import setup, find_packages
 
 PACKAGE = "corenlp"
-NAME = "wordseer-stanford-corenlp-python"
+NAME = "stanford-corenlp-python"
 DESCRIPTION = "A Stanford Core NLP wrapper (wordseer fork)"
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
-URL = "https://github.com/silverasm/stanford-corenlp-python"
+URL = "https://github.com/Wordseer/stanford-corenlp-python"
 VERSION = "3.3.0-0"
 
 setup(
@@ -29,3 +29,4 @@
         "Programming Language :: Python",
     ],
 )
+

From e9f5973278f08f2e0667619d16b0193341e7cc17 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sun, 27 Jul 2014 13:33:59 +0000
Subject: [PATCH 03/27] Removed the remove_id method, let's see what happens

---
 corenlp/corenlp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 6e21ad9..1017735 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -18,7 +18,6 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
-
 import json
 import optparse
 import os
@@ -193,7 +192,7 @@ def parse_parser_results(text):
             else:
                 split_entry = re.split("\(|, |-", line[:-1])
                 if len(split_entry) == 5:
-                    rel, left, leftindex, right, rightindex = map(lambda x: remove_id(x), split_entry)
+                    rel, left, leftindex, right, rightindex = split_entry
                     sentence['dependencies'].append(tuple([rel, left, leftindex, right, rightindex]))
 
         elif state == STATE_COREFERENCE:
@@ -500,3 +499,4 @@ def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g", raw_output=Fa
     except KeyboardInterrupt:
         print >>sys.stderr, "Bye."
         exit()
+

From 8593c0aaa50296a049c833125a9b546ce65c423f Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Mon, 28 Jul 2014 12:42:46 +0000
Subject: [PATCH 04/27] Removed remove_id method altogether.

---
 corenlp/corenlp.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 1017735..23418fa 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -124,12 +124,6 @@ def init_corenlp_command(corenlp_path, memory, properties):
 
     return "%s %s -cp %s %s %s" % (java_path, limit, ':'.join(jars), classname, props)
 
-
-def remove_id(word):
-    """Removes the numeric suffix from the parsed recognized words: e.g. 'word-2' > 'word' """
-    return word.replace("'", "")
-
-
 def parse_bracketed(s):
     '''Parse word features [abc=... def = ...]
     Also manages to parse out features that have XML within them

From b664532367ce77fc576cadccbbba7e7fc35af2fb Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Wed, 30 Jul 2014 16:59:09 -0700
Subject: [PATCH 05/27] Fixed bug which caused weird issues with quotes in
 numeric values.

---
 corenlp/corenlp.py | 3 +++
 setup.py           | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 23418fa..a818728 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -27,6 +27,7 @@
 import pexpect
 import tempfile
 import shutil
+import re
 from progressbar import ProgressBar, Fraction
 from unidecode import unidecode
 from subprocess import call
@@ -187,6 +188,8 @@ def parse_parser_results(text):
                 split_entry = re.split("\(|, |-", line[:-1])
                 if len(split_entry) == 5:
                     rel, left, leftindex, right, rightindex = split_entry
+                    leftindex = re.sub("[^0-9]", "", leftindex)
+                    rightindex = re.sub("[^0-9]", "", rightindex)
                     sentence['dependencies'].append(tuple([rel, left, leftindex, right, rightindex]))
 
         elif state == STATE_COREFERENCE:
diff --git a/setup.py b/setup.py
index 396bdda..80ae7a7 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.0-0"
+VERSION = "3.3.2-0"
 
 setup(
     name=NAME,

From b4b9348ce2c7ce052ad3ebbaa300beb1dbce100c Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 2 Aug 2014 13:32:05 -0700
Subject: [PATCH 06/27] Parser shouldn't eat equals signs anymore

---
 corenlp/corenlp.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index a818728..e53eca8 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-
+import pdb
 import json
 import optparse
 import os
@@ -137,7 +137,7 @@ def parse_bracketed(s):
         temp["^^^%d^^^" % i] = tag
         s = s.replace(tag, "^^^%d^^^" % i)
     # Load key-value pairs, substituting as necessary
-    for attr, val in re.findall(r"([^=\s]*)=([^=\s]*)", s):
+    for attr, val in re.findall(r"([^=\s]*)=([^\s]*)", s):
         if val in temp:
             val = temp[val]
         if attr == 'Text':
@@ -171,6 +171,7 @@ def parse_parser_results(text):
             if not line.startswith("[Text="):
                 raise ParserError('Parse error. Could not find "[Text=" in: %s' % line)
             for s in WORD_PATTERN.findall(line):
+                pdb.set_trace()
                 sentence['words'].append(parse_bracketed(s))
             state = STATE_TREE
 

From aec4b351c3e7d5ddbbedc2f72c43bb38a32fb708 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 2 Aug 2014 13:33:16 -0700
Subject: [PATCH 07/27] Removed debug code

---
 corenlp/corenlp.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index e53eca8..625d75d 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -17,7 +17,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-import pdb
+
 import json
 import optparse
 import os
@@ -171,7 +171,6 @@ def parse_parser_results(text):
             if not line.startswith("[Text="):
                 raise ParserError('Parse error. Could not find "[Text=" in: %s' % line)
             for s in WORD_PATTERN.findall(line):
-                pdb.set_trace()
                 sentence['words'].append(parse_bracketed(s))
             state = STATE_TREE
 

From 3e47fbc0a2d1913dd71ee5201260d0fa0fe1fdf4 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Wed, 6 Aug 2014 11:42:38 -0700
Subject: [PATCH 08/27] Added support for winpexpect, hopefully not to hacky

---
 corenlp/corenlp.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 625d75d..9d127d1 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -32,6 +32,13 @@
 from unidecode import unidecode
 from subprocess import call
 
+use_winpexpect = True
+
+try:
+    import winpexpect
+except ImportError:
+    use_winpexpect = False
+
 VERBOSE = False
 STATE_START, STATE_TEXT, STATE_WORDS, STATE_TREE, STATE_DEPENDENCY, STATE_COREFERENCE = 0, 1, 2, 3, 4, 5
 WORD_PATTERN = re.compile('\[([^\]]+)\]')
@@ -310,7 +317,12 @@ class StanfordCoreNLP:
     def _spawn_corenlp(self):
         if VERBOSE:
             print self.start_corenlp
-        self.corenlp = pexpect.spawn(self.start_corenlp, maxread=8192, searchwindowsize=80)
+        if use_winpexpect:
+            self.corenlp = winpexpect.winspawn(self.start_corenlp, maxread=8192,
+                searchwindowsize=80)
+        else:
+            self.corenlp = pexpect.spawn(self.start_corenlp, maxread=8192,
+                searchwindowsize=80)
 
         # show progress bar while loading the models
         if VERBOSE:

From deb7db65d392d3fe6e44542aaff618056211ab1e Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Fri, 8 Aug 2014 12:20:46 -0700
Subject: [PATCH 09/27] Updated version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 80ae7a7..553fb52 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.2-0"
+VERSION = "3.3.3-0"
 
 setup(
     name=NAME,

From 7746e822f7ee9459a2de2a8f42eddd18e4c2be1c Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Fri, 8 Aug 2014 20:30:44 -0700
Subject: [PATCH 10/27] Disabled PBT3 escaping.

---
 corenlp/default.properties | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/corenlp/default.properties b/corenlp/default.properties
index c475c92..3f6c32c 100644
--- a/corenlp/default.properties
+++ b/corenlp/default.properties
@@ -49,7 +49,7 @@ annotators = tokenize, ssplit, pos, lemma, parse
 #dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
 #dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
 #dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
-
+tokenize.options = ptb3Escaping=false
 
 # This is the regular expression that describes which xml tags to keep
 # the text from.  In order to on off the xml removal, add cleanxml
@@ -63,3 +63,4 @@ annotators = tokenize, ssplit, pos, lemma, parse
 # Whether or not to allow malformed xml
 # StanfordCoreNLP.properties
 #wordnet.dir=models/wordnet-3.0-prolog
+

From 0d6394a6e12e2d05eacca9df8bb80e47f5678088 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Fri, 8 Aug 2014 20:46:15 -0700
Subject: [PATCH 11/27] Pushed version 3.3.4 to pypi.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 553fb52..8e72630 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.3-0"
+VERSION = "3.3.4-0"
 
 setup(
     name=NAME,

From 0f2f99b6a02095356f1d083cca91ce6a79be6879 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 9 Aug 2014 21:20:16 -0700
Subject: [PATCH 12/27] Revert "Disabled PBT3 escaping."

This reverts commit 7746e822f7ee9459a2de2a8f42eddd18e4c2be1c.
---
 corenlp/default.properties | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/corenlp/default.properties b/corenlp/default.properties
index 3f6c32c..c475c92 100644
--- a/corenlp/default.properties
+++ b/corenlp/default.properties
@@ -49,7 +49,7 @@ annotators = tokenize, ssplit, pos, lemma, parse
 #dcoref.female = /scr/nlp/data/Bergsma-Gender/female.unigrams.txt
 #dcoref.plural = /scr/nlp/data/Bergsma-Gender/plural.unigrams.txt
 #dcoref.singular = /scr/nlp/data/Bergsma-Gender/singular.unigrams.txt
-tokenize.options = ptb3Escaping=false
+
 
 # This is the regular expression that describes which xml tags to keep
 # the text from.  In order to on off the xml removal, add cleanxml
@@ -63,4 +63,3 @@ tokenize.options = ptb3Escaping=false
 # Whether or not to allow malformed xml
 # StanfordCoreNLP.properties
 #wordnet.dir=models/wordnet-3.0-prolog
-

From beb06d185f4c5a28d28e7e3c765f7dd79e894c1f Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 9 Aug 2014 21:21:50 -0700
Subject: [PATCH 13/27] Reverted and uploaded.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8e72630..d8966e7 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.4-0"
+VERSION = "3.3.5-0"
 
 setup(
     name=NAME,

From 8f0450fe4e12e7e6292369156f1c9002ba777b2a Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sun, 10 Aug 2014 08:58:20 -0700
Subject: [PATCH 14/27] Replace PTB3 escapes with characters in the parsing.

---
 corenlp/corenlp.py | 32 ++++++++++++++++++++++++++------
 setup.py           |  2 +-
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 9d127d1..0cae5c7 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -146,11 +146,11 @@ def parse_bracketed(s):
     # Load key-value pairs, substituting as necessary
     for attr, val in re.findall(r"([^=\s]*)=([^\s]*)", s):
         if val in temp:
-            val = temp[val]
+            val = remove_escapes(temp[val])
         if attr == 'Text':
-            word = val
+            word = remove_escapes(val)
         else:
-            attrs[attr] = val
+            attrs[attr] = remove_escapes(val)
     return (word, attrs)
 
 
@@ -171,7 +171,7 @@ def parse_parser_results(text):
             state = STATE_TEXT
 
         elif state == STATE_TEXT:
-            sentence['text'] = line
+            sentence['text'] = remove_escapes(line)
             state = STATE_WORDS
 
         elif state == STATE_WORDS:
@@ -186,7 +186,7 @@ def parse_parser_results(text):
                 state = STATE_DEPENDENCY
                 sentence['parsetree'] = " ".join(sentence['parsetree'])
             else:
-                sentence['parsetree'].append(line)
+                sentence['parsetree'].append(remove_escapes(line))
 
         elif state == STATE_DEPENDENCY:
             if len(line) == 0:
@@ -197,7 +197,9 @@ def parse_parser_results(text):
                     rel, left, leftindex, right, rightindex = split_entry
                     leftindex = re.sub("[^0-9]", "", leftindex)
                     rightindex = re.sub("[^0-9]", "", rightindex)
-                    sentence['dependencies'].append(tuple([rel, left, leftindex, right, rightindex]))
+                    sentence['dependencies'].append(tuple([rel,
+                        remove_escapes(left), leftindex, remove_escapes(right),
+                        rightindex]))
 
         elif state == STATE_COREFERENCE:
             if "Coreference set" in line:
@@ -474,6 +476,24 @@ def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g", raw_output=Fa
 
     return parse_xml_output(input_folder, corenlp_path, memory, raw_output=raw_output)
 
+def remove_escapes(text):
+    """Given a string, remove PTB3 escape characters.
+    """
+    escapes = {"-lrb-": "(",
+        "-rrb-": ")",
+        "-lsb-": "[",
+        "-rsb-": "]",
+        "-lcb-": "{",
+        "-rcb-": "}",
+        "-LRB-": "(",
+        "-RRB-": ")",
+        "-LSB-": "[",
+        "-RSB-": "]",
+        "-LCB-": "{",
+        "-RCB-": "}"}
+    if text:
+        pattern = re.compile('|'.join(re.escape(key) for key in escapes.keys()))
+        return pattern.sub(lambda x: escapes[x.group()], text)
 
 if __name__ == '__main__':
     """
diff --git a/setup.py b/setup.py
index d8966e7..124cf91 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.5-0"
+VERSION = "3.3.6-0"
 
 setup(
     name=NAME,

From 1ec5bb7dafae0cdb9a60ff52980497113b6c8895 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Mon, 11 Aug 2014 19:29:39 -0700
Subject: [PATCH 15/27] Script should now use winpexpect if necessary.

---
 setup.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 124cf91..ab439a8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,6 @@
+import sys
 from setuptools import setup, find_packages
+import pdb
 
 PACKAGE = "corenlp"
 NAME = "stanford-corenlp-python"
@@ -7,7 +9,18 @@
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
 VERSION = "3.3.6-0"
+INSTALLATION_REQS = ["unidecode >= 0.04.12", "xmltodict >= 0.4.6"]
 
+PEXPECT = "pexpect >= 2.4"
+WINPEXPECT = "winpexpect >= 1.5"
+
+pdb.set_trace()
+	
+if "win" in sys.platform:
+	INSTALLATION_REQS.append(WINPEXPECT)
+else:
+	INSTALLATION_REQS.append(PEXPECT)
+	
 setup(
     name=NAME,
     version=VERSION,
@@ -18,11 +31,7 @@
     packages=find_packages(),
     package_data = {"": ["*.properties"],
         "corenlp": ["*.properties"]},
-    install_requires=[
-        "pexpect >= 2.4",
-        "unidecode >= 0.04.12",
-        "xmltodict >= 0.4.6",
-    ],
+    install_requires=INSTALLATION_REQS,
     classifiers=[
         ("License :: OSI Approved :: GNU General Public License v2 or later "
             "(GPLv2+)"),

From 0dfbf2c81437f602667dc04fde25c95642ea8054 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 12 Aug 2014 18:58:57 -0700
Subject: [PATCH 16/27] Incremented version.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 124cf91..b51f392 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.6-0"
+VERSION = "3.3.7-0"
 
 setup(
     name=NAME,

From b73ddaf926258849e6bbeef2332e22a99640f43a Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 12 Aug 2014 19:07:38 -0700
Subject: [PATCH 17/27] Removed debug code

---
 setup.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/setup.py b/setup.py
index f26d008..1f230e0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,5 @@
 import sys
 from setuptools import setup, find_packages
-import pdb
 
 PACKAGE = "corenlp"
 NAME = "stanford-corenlp-python"
@@ -14,8 +13,6 @@
 PEXPECT = "pexpect >= 2.4"
 WINPEXPECT = "winpexpect >= 1.5"
 
-pdb.set_trace()
-
 if "win" in sys.platform:
 	INSTALLATION_REQS.append(WINPEXPECT)
 else:

From c68fd4fe814ab3ff2fb7f47a92428e375f61dcae Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 12 Aug 2014 19:56:38 -0700
Subject: [PATCH 18/27] Fixed commit which made the script think that windows
 was mac.

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 1f230e0..6dedbdd 100644
--- a/setup.py
+++ b/setup.py
@@ -7,13 +7,13 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.7-0"
+VERSION = "3.3.8-0"
 INSTALLATION_REQS = ["unidecode >= 0.04.12", "xmltodict >= 0.4.6"]
 
 PEXPECT = "pexpect >= 2.4"
 WINPEXPECT = "winpexpect >= 1.5"
 
-if "win" in sys.platform:
+if "win32" in sys.platform or "cygwin" in sys.platform:
 	INSTALLATION_REQS.append(WINPEXPECT)
 else:
 	INSTALLATION_REQS.append(PEXPECT)

From e1d7c5619bfc2ab09fdd6589c1bc50530dd50f52 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Wed, 13 Aug 2014 11:04:58 -0700
Subject: [PATCH 19/27] Now with pep 396!

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6dedbdd..35c75eb 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.8-0"
+VERSION = "3.3.8"
 INSTALLATION_REQS = ["unidecode >= 0.04.12", "xmltodict >= 0.4.6"]
 
 PEXPECT = "pexpect >= 2.4"

From 84eed35eb0d705fb98766641f531db89bdfd7fdd Mon Sep 17 00:00:00 2001
From: jannah <hassan.jannah@gmail.com>
Date: Sat, 13 Sep 2014 15:05:33 -0700
Subject: [PATCH 20/27] Added support windows

made file paths OS neutral and changed jars to *
---
 corenlp/corenlp.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index 0cae5c7..cb0a7de 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -105,12 +105,14 @@ def init_corenlp_command(corenlp_path, memory, properties):
             "joda-time.jar",
             "jollyday.jar"
             ]
+    
+    jars = ["*"]
 
     java_path = "java"
     classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
     # include the properties file, so you can change defaults
     # but any changes in output format will break parse_parser_results()
-    current_dir_pr =  os.path.dirname(os.path.abspath(__file__)) + "/" + properties
+    current_dir_pr =  os.path.join(os.path.dirname(os.path.abspath(__file__)), properties)
     if os.path.exists(properties):
         props = "-props %s" % (properties.replace(" ", "\\ "))
     elif os.path.exists(current_dir_pr):
@@ -119,9 +121,9 @@ def init_corenlp_command(corenlp_path, memory, properties):
         raise Exception("Error! Cannot locate: %s" % properties)
 
     # add and check classpaths
-    jars = [corenlp_path + "/" + jar for jar in jars]
+    jars = [os.path.join(corenlp_path,jar) for jar in jars]
     for jar in jars:
-        if not os.path.exists(jar):
+        if not os.path.exists(jar) and not "*" in jar:
             raise Exception("Error! Cannot locate: %s" % jar)
 
     # add memory limit on JVM
@@ -278,7 +280,7 @@ def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g", raw_output=
 
     #we get a list of the cleaned files that we want to parse:
 
-    files = [input_dir + '/' + f for f in os.listdir(input_dir) if f.endswith(".txt")]
+    files = [os.path.join(input_dir , f) for f in os.listdir(input_dir) if f.endswith(".txt")]
 
     #creating the file list of files to parse
 
@@ -296,7 +298,7 @@ def parse_xml_output(input_dir, corenlp_path=DIRECTORY, memory="3g", raw_output=
     # result = []
     try:
         for output_file in os.listdir(xml_dir):
-            with open(xml_dir + '/' + output_file, 'r') as xml:
+            with open(os.path.join(xml_dir + output_file), 'r') as xml:
                 # parsed = xml.read()
                 file_name = re.sub('.xml$', '', os.path.basename(output_file))
                 # result.append(parse_parser_xml_results(xml.read(), file_name,
@@ -358,7 +360,12 @@ def __init__(self, corenlp_path=DIRECTORY, memory="3g", properties='default.prop
         self._spawn_corenlp()
 
     def close(self, force=True):
-        self.corenlp.terminate(force)
+        global use_winpexpect
+        if use_winpexpect:
+            self.corenlp.terminate()
+        else:
+            self.corenlp.terminate(force)
+        
 
     def isalive(self):
         return self.corenlp.isalive()

From e4126f5b711ef7f3e7a663a3c52dba5dc088531a Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Sat, 13 Sep 2014 16:26:02 -0700
Subject: [PATCH 21/27] Generalized jar loading

---
 corenlp/corenlp.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index cb0a7de..ce419d4 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -31,6 +31,7 @@
 from progressbar import ProgressBar, Fraction
 from unidecode import unidecode
 from subprocess import call
+import glob
 
 use_winpexpect = True
 
@@ -99,14 +100,8 @@ def init_corenlp_command(corenlp_path, memory, properties):
     """
 
     # TODO: Can edit jar constants
-    jars = ["stanford-corenlp-3.2.0.jar",
-            "stanford-corenlp-3.2.0-models.jar",
-            "xom.jar",
-            "joda-time.jar",
-            "jollyday.jar"
-            ]
-    
-    jars = ["*"]
+    jar_mask = ["*.jar"]
+    jars = glob.glob(os.path.join(corenlp_path, jar))
 
     java_path = "java"
     classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
@@ -120,12 +115,6 @@ def init_corenlp_command(corenlp_path, memory, properties):
     else:
         raise Exception("Error! Cannot locate: %s" % properties)
 
-    # add and check classpaths
-    jars = [os.path.join(corenlp_path,jar) for jar in jars]
-    for jar in jars:
-        if not os.path.exists(jar) and not "*" in jar:
-            raise Exception("Error! Cannot locate: %s" % jar)
-
     # add memory limit on JVM
     if memory:
         limit = "-Xmx%s" % memory

From 847367f0b30908851184c7bd0f60b580f867263b Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 16 Sep 2014 11:22:10 -0700
Subject: [PATCH 22/27] Incremented version counter

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 35c75eb..badbb0a 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.8"
+VERSION = "3.3.9"
 INSTALLATION_REQS = ["unidecode >= 0.04.12", "xmltodict >= 0.4.6"]
 
 PEXPECT = "pexpect >= 2.4"

From 754c06292cab727c8b0fb8316b27e14cfdd51502 Mon Sep 17 00:00:00 2001
From: PlasmaSheep <plasmasheep@gmail.com>
Date: Tue, 16 Sep 2014 11:38:04 -0700
Subject: [PATCH 23/27] Fixed errors causing masks to fail.

---
 corenlp/corenlp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
index ce419d4..4592acc 100755
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -100,8 +100,8 @@ def init_corenlp_command(corenlp_path, memory, properties):
     """
 
     # TODO: Can edit jar constants
-    jar_mask = ["*.jar"]
-    jars = glob.glob(os.path.join(corenlp_path, jar))
+    jar_mask = "*.jar"
+    jars = glob.glob(os.path.join(corenlp_path, jar_mask))
 
     java_path = "java"
     classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
@@ -354,7 +354,7 @@ def close(self, force=True):
             self.corenlp.terminate()
         else:
             self.corenlp.terminate(force)
-        
+
 
     def isalive(self):
         return self.corenlp.isalive()

From e72ce4074c67f05de9d320d785bdf06481fbc2da Mon Sep 17 00:00:00 2001
From: Ian MacFarland <ianmacfarland@ischool.berkeley.edu>
Date: Fri, 12 Jun 2015 17:40:01 -0700
Subject: [PATCH 24/27] handle different text output from depparse annotator

---
 corenlp/corenlp.py         | 13 +++++--------
 corenlp/default.properties |  6 +++++-
 2 files changed, 10 insertions(+), 9 deletions(-)
 mode change 100755 => 100644 corenlp/corenlp.py

diff --git a/corenlp/corenlp.py b/corenlp/corenlp.py
old mode 100755
new mode 100644
index 4592acc..f2c9a16
--- a/corenlp/corenlp.py
+++ b/corenlp/corenlp.py
@@ -153,7 +153,8 @@ def parse_parser_results(text):
     """
     results = {"sentences": []}
     state = STATE_START
-    for line in unidecode(text.decode('utf-8')).split("\n"):
+    lines = unidecode(text.decode('utf-8')).split("\n")
+    for index, line in enumerate(lines):
         line = line.strip()
 
         if line.startswith("Sentence #"):
@@ -170,15 +171,11 @@ def parse_parser_results(text):
                 raise ParserError('Parse error. Could not find "[Text=" in: %s' % line)
             for s in WORD_PATTERN.findall(line):
                 sentence['words'].append(parse_bracketed(s))
-            state = STATE_TREE
-
-        elif state == STATE_TREE:
-            if len(line) == 0:
+            if not lines[index + 1].startswith("[Text="):
                 state = STATE_DEPENDENCY
-                sentence['parsetree'] = " ".join(sentence['parsetree'])
-            else:
-                sentence['parsetree'].append(remove_escapes(line))
+                # skipping TREE because the new depparse annotator doesn't make a parse tree
 
+        
         elif state == STATE_DEPENDENCY:
             if len(line) == 0:
                 state = STATE_COREFERENCE
diff --git a/corenlp/default.properties b/corenlp/default.properties
index c475c92..70ac093 100644
--- a/corenlp/default.properties
+++ b/corenlp/default.properties
@@ -1,4 +1,8 @@
-annotators = tokenize, ssplit, pos, lemma, parse
+annotators = tokenize, ssplit, pos, lemma, depparse
+
+# specify Stanford Dependencies format for backwards compatibility
+# (new default is Universal Dependencies in 3.5.2)
+depparse.model = edu/stanford/nlp/models/parser/nndep/english_SD.gz
 
 # A true-casing annotator is also available (see below)
 #annotators = tokenize, ssplit, pos, lemma, truecase

From 9bd284ec168aec7624c4916fd5e1afec55f587e9 Mon Sep 17 00:00:00 2001
From: Ian MacFarland <ianmacfarland@ischool.berkeley.edu>
Date: Fri, 12 Jun 2015 17:49:06 -0700
Subject: [PATCH 25/27] update README

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f0fb897..4771019 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,11 @@
 # A Python wrapper for the Java Stanford Core NLP tools
 
-This is a fork of Dustin Smith's [stanford-corenlp-python](https://github.com/dasmith/stanford-corenlp-python), a Python interface to [Stanford CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml). It can either use as python package, or run as a JSON-RPC server.
+This is a Wordseer-specific fork of Dustin Smith's [stanford-corenlp-python](https://github.com/dasmith/stanford-corenlp-python), a Python interface to [Stanford CoreNLP](http://nlp.stanford.edu/software/corenlp.shtml). It can either use as python package, or run as a JSON-RPC server.
 
 ## Edited
+   * Tested only with the current annotator configuration: not a general-purpose wrapper
+   * Update to Stanford CoreNLP v3.5.2
    * Added multi-threaded load balancing
-   * Update to Stanford CoreNLP v3.2.0
    * Fix many bugs & improve performance
    * Using jsonrpclib for stability and performance
    * Can edit the constants as argument such as Stanford Core NLP directory
@@ -164,4 +165,5 @@ The function uses XML output feature of Stanford CoreNLP, and you can take all i
    * Robert Elwell [robert@wikia-inc.com]
    * Tristan Chong [tristan@wikia-inc.com]
    * Aditi Muralidharan [aditi.shrikumar@gmail.com]
+   * Ian MacFarland [ianmacfarland@ischool.berkeley.edu]
 

From 34ed4b6ff74e2da7892749b3c05c1de115fe282e Mon Sep 17 00:00:00 2001
From: Ian MacFarland <ianmacfarland@ischool.berkeley.edu>
Date: Fri, 12 Jun 2015 17:51:37 -0700
Subject: [PATCH 26/27] increment version

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index badbb0a..869427c 100644
--- a/setup.py
+++ b/setup.py
@@ -4,10 +4,10 @@
 PACKAGE = "corenlp"
 NAME = "stanford-corenlp-python"
 DESCRIPTION = "A Stanford Core NLP wrapper (wordseer fork)"
-AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan"
+AUTHOR = "Hiroyoshi Komatsu, Dustin Smith, Aditi Muralidharan, Ian MacFarland"
 AUTHOR_EMAIL = "aditi.shrikumar@gmail.com"
 URL = "https://github.com/Wordseer/stanford-corenlp-python"
-VERSION = "3.3.9"
+VERSION = "3.3.10"
 INSTALLATION_REQS = ["unidecode >= 0.04.12", "xmltodict >= 0.4.6"]
 
 PEXPECT = "pexpect >= 2.4"

From 6030814dc624b63ce2eef4b2fe0c88e12e002df8 Mon Sep 17 00:00:00 2001
From: Ian MacFarland <ianmacfarland@ischool.berkeley.edu>
Date: Sat, 13 Jun 2015 15:28:14 -0700
Subject: [PATCH 27/27] readme update

---
 README.md | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/README.md b/README.md
index 4771019..e3a1400 100644
--- a/README.md
+++ b/README.md
@@ -22,15 +22,6 @@ This is a Wordseer-specific fork of Dustin Smith's [stanford-corenlp-python](htt
 
 To use this program you must [download](http://nlp.stanford.edu/software/corenlp.shtml#Download) and unpack the zip file containing Stanford's CoreNLP package.  By default, `corenlp.py` looks for the Stanford Core NLP folder as a subdirectory of where the script is being run.
 
-
-In other words:
-
-    sudo pip install pexpect unidecode jsonrpclib   # jsonrpclib is optional
-    git clone https://bitbucket.org/torotoki/corenlp-python.git
-	  cd corenlp-python
-    wget http://nlp.stanford.edu/software/stanford-corenlp-full-2013-06-20.zip
-    unzip stanford-corenlp-full-2013-06-20.zip
-
 Then, to launch a server:
 
     python corenlp/corenlp.py