potatochip
diff --git a/‎corenlp/corenlp.py
Lines changed: 20 additions & 7 deletions b/‎corenlp/corenlp.py
Lines changed: 20 additions & 7 deletions
diff --git a/‎corenlp/loadbalancer.py
Lines changed: 74 additions & 0 deletions b/‎corenlp/loadbalancer.py
Lines changed: 74 additions & 0 deletions
diff --git a/‎corenlp/subserver.py
Lines changed: 19 additions & 0 deletions b/‎corenlp/subserver.py
Lines changed: 19 additions & 0 deletions
@@ -28,6 +28,7 @@
 import pexpect
 import tempfile
 import shutil
+from loadbalancer import CoreNLPLoadBalancer
 from progressbar import ProgressBar, Fraction
 from unidecode import unidecode
 from subprocess import call
@@ -74,7 +75,7 @@ def __init__(self, value):
 
     def __str__(self):
         return repr(self.value)
-
+            
 
 def init_corenlp_command(corenlp_path, memory, properties):
     """
@@ -482,6 +483,8 @@ def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g", raw_output=Fa
     parser = optparse.OptionParser(usage="%prog [OPTIONS]")
     parser.add_option('-p', '--port', default='8080',
                       help='Port to serve on (default 8080)')
+    parser.add_option('-o', '--ports', default=None,
+                      help='Multiple ports, separated by commas')
     parser.add_option('-H', '--host', default='127.0.0.1',
                       help='Host to serve on (default localhost; 0.0.0.0 to make public)')
     parser.add_option('-q', '--quiet', action='store_false', default=True, dest='verbose',
@@ -495,14 +498,24 @@ def batch_parse(input_folder, corenlp_path=DIRECTORY, memory="3g", raw_output=Fa
     # server = jsonrpc.Server(jsonrpc.JsonRpc20(),
     #                         jsonrpc.TransportTcpIp(addr=(options.host, int(options.port))))
     try:
-        server = SimpleJSONRPCServer((options.host, int(options.port)))
+        if not options.ports:
+            server = SimpleJSONRPCServer((options.host, int(options.port)))
 
-        nlp = StanfordCoreNLP(options.corenlp, properties=options.properties)
-        server.register_function(nlp.parse)
+            nlp = StanfordCoreNLP(options.corenlp, properties=options.properties)
+            server.register_function(nlp.parse)
 
-        print 'Serving on http://%s:%s' % (options.host, options.port)
-        # server.serve()
-        server.serve_forever()
+            print 'Serving on http://%s:%s' % (options.host, options.port)
+            
+            server.serve_forever()
+        else:
+            server = SimpleJSONRPCServer((options.host, int(options.port)))
+            lb = CoreNLPLoadBalancer(options)
+            server.register_function(lb.send)
+            server.register_function(lb.receive)
+            
+            print 'Serving on http://%s:%s, with servers on ports %s' % (options.host, options.port, options.ports)
+
+            server.serve_forever()
     except KeyboardInterrupt:
         print >>sys.stderr, "Bye."
         exit()
@@ -0,0 +1,74 @@
+"""
+A load balancing platform for the CoreNLP python server.
+This allows us to keep multiple instances of the server open 
+at different ports, and allow the same script to handle 
+loadbalancing so client scripts need not worry about such logic.
+"""
+
+import os, requests, json, sys, jsonrpclib
+from subprocess import Popen
+from hashlib import sha1
+
+class CoreNLPLoadBalancer:
+    def __init__(self, options):
+        self.tempdir = "/tmp/"
+        self.ports = options.ports.split(',')
+        self.host = options.host
+        self.serverPool = []
+        self.processPool = {}
+        self.args = ["python", os.getcwd() + "/corenlp.py", \
+                    '--host=%s' % (options.host), \
+                    '--properties=%s' % (options.properties), \
+                    '--corenlp=%s' % (options.corenlp)]
+        if not options.verbose:
+            args += ['--quiet']
+        self.portCounter = 0
+        
+
+    def startup(self):
+        """ Open a traditional server subprocess in a new port """
+        for port in self.ports:
+            self.serverPool[port] = Popen(args + ['--port=%s' % str(port)])
+
+    def shutdown(self):
+        for port in self.ports:
+            self.serverPool[port].terminate()
+
+    def sendThreadedRequest(self, key, port):
+        """ Create a process that communicates with the server in a thread to avoid blocking """
+        host = 'http://%s:%s' % (self.host.replace('http://', ''), port)
+        filename = self.tempdir+key+".tmp"
+        self.processPool[key] = [Popen(['python', 'subserver.py', host, filename], stdout=PIPE)]
+
+    def send(self, text):
+        """ 
+        Writes a temp file with the current text. The subserver script deletes this file for us. 
+        The response sent provides a sha1 key that corresponds to your requested document so we 
+        can correlate requests to responses.
+        """
+        currentPort = self.ports[self.portCounter]
+        key = sha1(text)
+        filename = self.tempdir+key+".tmp"
+        f = open(filename, 'w')
+        f.write(text)
+        f.close()
+        self.sendThreadedRequest(key, currentPort)
+        return {'status':'OK', 'key':key}
+
+    def receive(self, blocking=False):
+        """ Returns all completed parses. Set blocking to True on your last iteration! """
+        go = True
+        response = []
+        while go:
+            for key in self.processPool.keys():
+                process = self.processPool[key]
+                if process.poll() != None:
+                    (out, error) = process.communicate()
+                    if out:
+                        try:
+                            response[key] = [json.loads(out)]
+                        except:
+                            pass
+                    del self.processPool[key]
+            go = blocking and len(self.processPool) > 0
+        return response
@@ -0,0 +1,19 @@
+"""
+This subserver scripts maintain a connection with child
+processes so that our requests are not blocking
+
+arg 1: server
+arg 2: filename of tmp file
+
+"""
+
+import sys, jsonrpclib, os
+
+server = jsonrpclib.Server(sys.argv[1])
+filename = sys.argv[2]
+text = "\n".join(open(filename, r)).readlines()
+
+print server.parse(text)
+
+os.remove(filename)
+