hi... the following sample is an attempt to fetch two subsequent pages from a sameple site. (it's public) the script attempts to implement a request, using the POST method, as well as as cookies. Testing using LiveHttpHeaders/Firefox indicates that the app uses post/cookies, and it doesn't work if cookies are disabled on the browser.
the query for the post, was obtained via the LiveHttpHeaders app. I can get the 1st page, but not the 2nd. I'm assuming that I'm somehow screwing up the use/implementation of the cookies.. Searching the net isn't shedding any light for now.. After showing the 1sr page, the 2nd page is viewed from the browser, by selecting a 'next' link, which invokes a jscript submit for the DOM. The post data is captured via LiveHttpHeaders.. It's this data that forms the data for the 2nd Post attempt in the test.. Any thoughts/comments/pointers would be helpful... (and yeah the test is ugly..!) thanks -tom #!/usr/bin/python #test python script import re import urllib import urllib2 import sys, string, os from mechanize import Browser import mechanize import cookielib ######################## # # Parsing App Information ######################## # datafile cj = "p" COOKIEFILE = 'cookies.lwp' #cookielib = 1 urlopen = urllib2.urlopen #cj = urllib2.cookielib.LWPCookieJar() cj = cookielib.LWPCookieJar() #cj = ClientCookie.LWPCookieJar() Request = urllib2.Request br = Browser() if cj != None: print "sss" #install the CookieJar for the default CookieProcessor if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) print "foo\n" if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) print "foo2\n" user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values1 = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } headers = { 'User-Agent' : user_agent } if __name__ == "__main__": # main app baseurl="https://pisa.ucsc.edu/class_search/index.php" print "b = ",baseurl print "b = ",headers query="action=results&binds%5B%3Aterm%5D=2100&binds%5B%3Areg_status%5D=O&bin ds%5B%3Asubject%5D=&binds%5B%3Acatalog_nbr_op%5D=%3D&binds%5B%3Acatalog_nbr% 5D=&binds%5B%3Atitle%5D=&binds%5B%3Ainstr_name_op%5D=%3D&binds%5B%3Ainstruct or%5D=&binds%5B%3Age%5D=&binds%5B%3Acrse_units_op%5D=%3D&binds%5B%3Acrse_uni ts_from%5D=&binds%5B%3Acrse_units_to%5D=&binds%5B%3Acrse_units_exact%5D=&bin ds%5B%3Adays%5D=&binds%5B%3Atimes%5D=&binds%5B%3Aacad_career%5D=" request = urllib2.Request(baseurl, query, headers) response = urllib2.urlopen(request) print "gggg \n" #print req print "\n gggg 555555\n" print "res = ",response x1 = response.read() #x1 = res.read() print x1 #sys.exit() cj.save(COOKIEFILE) # resave cookies if cj is None: print "We don't have a cookie library available - sorry." print "I can't show you any cookies." else: print 'These are the cookies we have received so far :' for index, cookie in enumerate (cj): print index, ' : ', cookie cj.save(COOKIEFILE) print "ffgg \n" for index, cookie in enumerate (cj): print index, ' : ', cookie #baseurl ="http://students.yale.edu/oci/resultList.jsp" baseurl="https://pisa.ucsc.edu/class_search/index.php" query="action=next&Rec_Dur=100&sel_col%5Bclass_nbr%5D=1&sel_col%5Bclass_id%5 D=1&sel_col%5Bclass_title%5D=1&sel_col%5Btype%5D=1&sel_col%5Bdays%5D=1&sel_c ol%5Btimes%5D=1&sel_col%5Binstr_name%5D=1&sel_col%5Bstatus%5D=1&sel_col%5Ben rl_cap%5D=1&sel_col%5Benrl_tot%5D=1&sel_col%5Bseats_avail%5D=1&sel_col%5Bloc ation%5D=1" request = urllib2.Request(baseurl, query, headers) response = urllib2.urlopen(request) print "gggg \n" #print req print "\n gggg 555555\n" print "res = ",response x1 = response.read() #x1 = res.read() print x1 sys.exit() req = Request(baseurl, query, headers) print "gggg \n" #print req print "\n gggg 555555\n" #br.open(req) res = urlopen(req) print "gggg 000000000000\n" x1 = res.read() print x1 sys.exit() -- http://mail.python.org/mailman/listinfo/python-list