Python Batch Client
From Textserver wiki
1 #! /usr/bin/python
2
3 ########################################################################
4 #
5 # Example client to submit a batch request to TextServer
6 # SERVICENAME service, and wait for the job to be finished.
7 #
8 # Input must be a ZIP file containing one or more text files to analyze
9 # Output will be a ZIP file containing results for each input file in
10 # the requested format (XML, json, conll)
11 #
12 ########################################################################
13
14 # import required libraries
15 import urllib2
16 # You may need to install "poster" python module for these two:
17 from poster.encode import multipart_encode
18 from poster.streaminghttp import register_openers
19 import xml.etree.ElementTree
20 import time
21
22 # Register the streaming http handlers with urllib2
23 register_openers()
24
25 # set query elements
26 fname = raw_input('Input ZIP file: ')
27 outfname = raw_input('Output ZIP file: ')
28 lang = raw_input('Language: ')
29 out = raw_input('Output format (xml,json,conll,naf): ')
30 user = raw_input('TextServer Username: ')
31 pwd = raw_input('TextServer Password: ')
32
33 # Encode query in a form-data.
34 # 'headers' contains the necessary Content-Type and Content-Length.
35 # 'datagen' is a generator object that yields the encoded parameters.
36 datagen, headers = multipart_encode({'username' : user,
37 'password' : pwd,
38 'file' : open(fname,"rb"),
39 'language' : lang,
40 'output' : out,
41 'interactive':'0'
42 } )
43 # service URL
44 TextServer_URL = "http://frodo.lsi.upc.edu:8080/TextWS/textservlet/ws"
45 service = "SERVICENAME"
46
47 # Create the Request object
48 request = urllib2.Request(TextServer_URL + "/processQuery/" + service,
49 datagen,
50 headers)
51
52 try:
53 # Actually do the request, and get the response
54 resp = urllib2.urlopen(request).read()
55
56 except urllib2.HTTPError, e:
57 # handle connection errors
58 print e, "-", e.read()
59 exit()
60
61
62 # Server response should include a job tokenID, retrieve it
63 dom = xml.etree.ElementTree.XML(resp)
64 tkid = dom.text
65 print "Job sumbitted. Token id=",tkid
66
67 # prepare request to poll for completion and retrieve results
68 datagen, headers = multipart_encode({'username' : user,
69 'tokenID' : tkid
70 })
71 request = urllib2.Request(TextServer_URL + "/resultRetrieve",
72 datagen,
73 headers)
74
75 # Periodically poll server until batch job is ended.
76 nseconds=20
77 while (1) :
78
79 print "Sleeping for",nseconds,"seconds"
80 time.sleep(nseconds)
81 try:
82 # send request to check for results
83 print "Polling server for job completion..."
84 resp = urllib2.urlopen(request).read()
85
86 except urllib2.HTTPError, e:
87 if (e.code == 503 and e.read()[0:8] == "[TS-125]" ) :
88 # http status 503, textserver code TS-125 means the job is not finished yet. keep waiting
89 print "Job not finished yet"
90 continue
91 else :
92 # some actual error happened. abort
93 print e, "-", e.read()
94 exit()
95
96 # if status=200, we a got ZIP response, the job is done, exit loop.
97 break;
98
99 print "Job finished. Saving results to",outfname
100 # print results to output file
101 outf=open(outfname, 'w+')
102 print >>outf, resp
103 outf.close()