Hi. I'm using mechanize to parse a page/site that uses the meta http-equiv tag in order to perform a refresh/redirect of the page. I've tried a number of settings, and read different posts on various threads, but seem to be missing something.
the test.html page is the page that the url returns, however, i was expecting the test.py app to go ahead and perform the redirect/refresh automatically. does the page (test.html) need to be completely valid html? Any thoughts on what's screwed up here?? thanks ---------------------------------------------------- test.py -------- import re import libxml2dom import urllib import urllib2 import sys, string from mechanize import Browser import mechanize #import tidy import os.path import cookielib from libxml2dom import Node from libxml2dom import NodeList import subprocess import time ######################## # # Parse pricegrabber.com ######################## cj = "p" COOKIEFILE = 'cookies.lwp' #cookielib = 1 urlopen = urllib2.urlopen #cj = urllib2.cookielib.LWPCookieJar() cj = cookielib.LWPCookieJar() Request = urllib2.Request br = Browser() br2 = Browser() if cj != None: print "sss" #install the CookieJar for the default CookieProcessor if os.path.isfile(COOKIEFILE): cj.load(COOKIEFILE) print "foo\n" if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) print "foo2\n" user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values1 = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } headers = { 'User-Agent' : user_agent } url="http://schedule.psu.edu/" #======================================= if __name__ == "__main__": # main app txdata = None #---------------------------- ##br.set_cookiejar(cj) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.set_handle_refresh(True) br.addheaders = [('User-Agent', 'Firefox')] #url=str(url)+str("act_main_search.cfm")+"?" #url=url+"Semester=FALL%202008%20%20%20&" #url=url+"CrseLoc=OZ%3A%3AAbington%20Campus&" #url=url+"CECrseLoc=AllOZ%3A%3AAbington%20Campus&" #url=url+"CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&Search=View+schedule" #url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20 %20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Cam pus&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=" url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20% 20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Camp us&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&CFID=543143&CFTOKEN=71842529" print "url =",url br.open(url) #cj.save(COOKIEFILE) # resave cookies res = br.response() # this is a copy of response s = res.read() print "slen=",len(s) print s ========================================= test.html <html> <head> <TITLE></TITLE> </head> <BODY BGCOLOR="#FFFFFF"> <TD NOWRAP WIDTH="45" VALIGN="top"><A HREF="javascript:openAWindow('http://www.registrar.psu.edu/faculty_staff/enr oll_services/clsrooms.html#C','Intent',625,425,1)"><FONT FACE="Arial, Helvetica, sans-serif" SIZE="2"><strong>Tech Type</strong></FONT></A></TD> <META HTTP-EQUIV="Refresh" CONTENT="0;url=/soc/fall/Alloz/a-c/acctg.html#"> --------------------------------------------------------- sys.exit() -- http://mail.python.org/mailman/listinfo/python-list