Python Batch Client
- ! /usr/bin/python
- Example client to submit a batch request to TextServer
- SERVICENAME service, and wait for the job to be finished.
- Input must be a ZIP file containing one or more text files to analyze
- Output will be a ZIP file containing results for each input file in
- the requested format (XML, json, conll)
-
- import required libraries
import urllib2
- You may need to install "poster" python module for these two:
from poster.encode import multipart_encode from poster.streaminghttp import register_openers import xml.etree.ElementTree import time
- Register the streaming http handlers with urllib2
register_openers()
- set query elements
fname = raw_input('Input ZIP file: ') outfname = raw_input('Output ZIP file: ') lang = raw_input('Language: ') out = raw_input('Output format (xml,json,conll,naf): ') user = raw_input('TextServer Username: ') pwd = raw_input('TextServer Password: ')
- Encode query in a form-data.
- 'headers' contains the necessary Content-Type and Content-Length.
- 'datagen' is a generator object that yields the encoded parameters.
datagen, headers = multipart_encode({'username' : user,
'password' : pwd, 'file' : open(fname,"rb"), 'language' : lang, 'output' : out, 'interactive':'0' } )
- service URL
TextServer_URL = "http://frodo.lsi.upc.edu:8080/TextWS/textservlet/ws" service = "SERVICENAME"
- Create the Request object
request = urllib2.Request(TextServer_URL + "/processQuery/" + service,
datagen, headers)
try:
# Actually do the request, and get the response resp = urllib2.urlopen(request).read()
except urllib2.HTTPError, e:
# handle connection errors print e, "-", e.read() exit()
- Server response should include a job tokenID, retrieve it
dom = xml.etree.ElementTree.XML(resp) tkid = dom.text print "Job sumbitted. Token id=",tkid
- prepare request to poll for completion and retrieve results
datagen, headers = multipart_encode({'username' : user,
'tokenID' : tkid })
request = urllib2.Request(TextServer_URL + "/resultRetrieve",
datagen, headers)
- Periodically poll server until batch job is ended.
nseconds=20 while (1) :
print "Sleeping for",nseconds,"seconds" time.sleep(nseconds) try: # send request to check for results print "Polling server for job completion..." resp = urllib2.urlopen(request).read() except urllib2.HTTPError, e: if (e.code == 503 and e.read()[0:8] == "[TS-125]" ) : # http status 503, textserver code TS-125 means the job is not finished yet. keep waiting print "Job not finished yet" continue else : # some actual error happened. abort print e, "-", e.read() exit()
# if status=200, we a got ZIP response, the job is done, exit loop. break;
print "Job finished. Saving results to",outfname
- print results to output file
outf=open(outfname, 'w+') print >>outf, resp outf.close()