Python Batch Client

From Textserver wiki
Revision as of 13:08, 20 January 2016 by Padro (Talk | contribs)

Jump to: navigation, search
  1 #! /usr/bin/python
  2 
  3 ########################################################################
  4 #
  5 #  Example client to submit a batch request to TextServer 
  6 # SERVICENAME service, and wait for the job to be finished.
  7 #
  8 #  Input must be a ZIP file containing one or more text files to analyze
  9 #  Output will be a ZIP file containing results for each input file in
 10 # the requested format (XML, json, conll)
 11 #
 12 ########################################################################
 13 
 14 # import required libraries
 15 import urllib2
 16 # You may need to install "poster" python module for these two:
 17 from poster.encode import multipart_encode
 18 from poster.streaminghttp import register_openers
 19 import xml.etree.ElementTree
 20 import time
 21 
 22 # Register the streaming http handlers with urllib2
 23 register_openers()
 24 
 25 # set query elements
 26 fname = raw_input('Input ZIP file: ')
 27 outfname = raw_input('Output ZIP file: ')
 28 lang = raw_input('Language: ')
 29 out = raw_input('Output format (xml,json,conll,naf): ')
 30 user = raw_input('TextServer Username: ')
 31 pwd = raw_input('TextServer Password: ')
 32 
 33 # Encode query in a form-data.
 34 # 'headers' contains the necessary Content-Type and Content-Length.
 35 # 'datagen' is a generator object that yields the encoded parameters.
 36 datagen, headers = multipart_encode({'username' : user,
 37                                      'password' : pwd,
 38                                      'file' : open(fname,"rb"),
 39                                      'language' : lang,
 40                                      'output' : out,
 41                                      'interactive':'0'
 42                                      } )
 43 # service URL
 44 TextServer_URL = "http://frodo.lsi.upc.edu:8080/TextWS/textservlet/ws"
 45 service = "SERVICENAME"
 46 
 47 # Create the Request object
 48 request = urllib2.Request(TextServer_URL + "/processQuery/" + service,
 49                           datagen, 
 50                           headers)
 51 
 52 try:
 53   # Actually do the request, and get the response
 54   resp =  urllib2.urlopen(request).read()
 55 
 56 except urllib2.HTTPError, e:
 57   # handle connection errors
 58   print e, "-", e.read()
 59   exit()
 60 
 61 
 62 # Server response should include a job tokenID, retrieve it
 63 dom = xml.etree.ElementTree.XML(resp)
 64 tkid = dom.text 
 65 print "Job sumbitted. Token id=",tkid
 66 
 67 # prepare request to poll for completion and retrieve results
 68 datagen, headers = multipart_encode({'username' : user,
 69                                      'tokenID' : tkid
 70                                      })
 71 request = urllib2.Request(TextServer_URL + "/resultRetrieve", 
 72                           datagen, 
 73                           headers)
 74 
 75 # Periodically poll server until batch job is ended.
 76 nseconds=20
 77 while (1) :
 78 
 79   print "Sleeping for",nseconds,"seconds"
 80   time.sleep(nseconds)
 81   try:
 82     # send request to check for results
 83     print "Polling server for job completion..."
 84     resp =  urllib2.urlopen(request).read()
 85     
 86   except urllib2.HTTPError, e:
 87     if (e.code == 503 and e.read()[0:8] == "[TS-125]" ) :
 88       # http status 503, textserver code TS-125 means the job is not finished yet. keep waiting
 89       print "Job not finished yet" 
 90       continue
 91     else :      
 92       # some actual error happened. abort
 93       print e, "-", e.read()
 94       exit()
 95 
 96   # if status=200, we a got ZIP response, the job is done, exit loop.
 97   break;
 98       
 99 print "Job finished. Saving results to",outfname
100 # print results to output file
101 outf=open(outfname, 'w+')
102 print >>outf, resp
103 outf.close()