Difference between revisions of "Python Batch Client"

From Textserver wiki
Jump to: navigation, search
(Created page with "#! /usr/bin/python ######################################################################## # # Example client to submit a batch request to TextServer # SERVICENAME service...")
 
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
 +
<syntaxhighlight lang="python" line="1" >
 
#! /usr/bin/python
 
#! /usr/bin/python
  
Line 25: Line 26:
 
# set query elements
 
# set query elements
 
fname = raw_input('Input ZIP file: ')
 
fname = raw_input('Input ZIP file: ')
outfname = raw_input('Output ZIP file: ')
 
 
lang = raw_input('Language: ')
 
lang = raw_input('Language: ')
 
out = raw_input('Output format (xml,json,conll,naf): ')
 
out = raw_input('Output format (xml,json,conll,naf): ')
Line 58: Line 58:
 
   print e, "-", e.read()
 
   print e, "-", e.read()
 
   exit()
 
   exit()
 
  
 
# Server response should include a job tokenID, retrieve it
 
# Server response should include a job tokenID, retrieve it
Line 64: Line 63:
 
tkid = dom.text  
 
tkid = dom.text  
 
print "Job sumbitted. Token id=",tkid
 
print "Job sumbitted. Token id=",tkid
 
+
</syntaxhighlight>
# prepare request to poll for completion and retrieve results
+
datagen, headers = multipart_encode({'username' : user,
+
                                    'tokenID' : tkid
+
                                    })
+
request = urllib2.Request(TextServer_URL + "/resultRetrieve",
+
                          datagen,
+
                          headers)
+
 
+
# Periodically poll server until batch job is ended.
+
nseconds=20
+
while (1) :
+
 
+
  print "Sleeping for",nseconds,"seconds"
+
  time.sleep(nseconds)
+
  try:
+
    # send request to check for results
+
    print "Polling server for job completion..."
+
    resp =  urllib2.urlopen(request).read()
+
   
+
  except urllib2.HTTPError, e:
+
    if (e.code == 503 and e.read()[0:8] == "[TS-125]" ) :
+
      # http status 503, textserver code TS-125 means the job is not finished yet. keep waiting
+
      print "Job not finished yet"
+
      continue
+
    else :     
+
      # some actual error happened. abort
+
      print e, "-", e.read()
+
      exit()
+
 
+
  # if status=200, we a got ZIP response, the job is done, exit loop.
+
  break;
+
     
+
print "Job finished. Saving results to",outfname
+
# print results to output file
+
outf=open(outfname, 'w+')
+
print >>outf, resp
+
outf.close()
+

Latest revision as of 12:01, 21 January 2016

 1 #! /usr/bin/python
 2 
 3 ########################################################################
 4 #
 5 #  Example client to submit a batch request to TextServer 
 6 # SERVICENAME service, and wait for the job to be finished.
 7 #
 8 #  Input must be a ZIP file containing one or more text files to analyze
 9 #  Output will be a ZIP file containing results for each input file in
10 # the requested format (XML, json, conll)
11 #
12 ########################################################################
13 
14 # import required libraries
15 import urllib2
16 # You may need to install "poster" python module for these two:
17 from poster.encode import multipart_encode
18 from poster.streaminghttp import register_openers
19 import xml.etree.ElementTree
20 import time
21 
22 # Register the streaming http handlers with urllib2
23 register_openers()
24 
25 # set query elements
26 fname = raw_input('Input ZIP file: ')
27 lang = raw_input('Language: ')
28 out = raw_input('Output format (xml,json,conll,naf): ')
29 user = raw_input('TextServer Username: ')
30 pwd = raw_input('TextServer Password: ')
31 
32 # Encode query in a form-data.
33 # 'headers' contains the necessary Content-Type and Content-Length.
34 # 'datagen' is a generator object that yields the encoded parameters.
35 datagen, headers = multipart_encode({'username' : user,
36                                      'password' : pwd,
37                                      'file' : open(fname,"rb"),
38                                      'language' : lang,
39                                      'output' : out,
40                                      'interactive':'0'
41                                      } )
42 # service URL
43 TextServer_URL = "http://frodo.lsi.upc.edu:8080/TextWS/textservlet/ws"
44 service = "SERVICENAME"
45 
46 # Create the Request object
47 request = urllib2.Request(TextServer_URL + "/processQuery/" + service,
48                           datagen, 
49                           headers)
50 
51 try:
52   # Actually do the request, and get the response
53   resp =  urllib2.urlopen(request).read()
54 
55 except urllib2.HTTPError, e:
56   # handle connection errors
57   print e, "-", e.read()
58   exit()
59 
60 # Server response should include a job tokenID, retrieve it
61 dom = xml.etree.ElementTree.XML(resp)
62 tkid = dom.text 
63 print "Job sumbitted. Token id=",tkid