refresh question

bruce Fri, 29 Aug 2008 08:22:06 -0700

Hi.

I'm using mechanize to parse a page/site that uses the meta http-equiv tag
in order to perform a refresh/redirect of the page. I've tried a number of
settings, and read different posts on various threads, but seem to be
missing something.


the test.html page is the page that the url returns, however, i was
expecting the test.py app to go ahead and perform the redirect/refresh
automatically.

does the page (test.html) need to be completely valid html?

Any thoughts on what's screwed up here??


thanks

----------------------------------------------------

test.py
--------
import re
import libxml2dom
import urllib
import urllib2
import sys, string
from  mechanize import Browser
import mechanize
#import tidy
import os.path
import cookielib
from libxml2dom import Node
from libxml2dom import NodeList
import subprocess
import time

########################
#
# Parse pricegrabber.com
########################
cj = "p"
COOKIEFILE = 'cookies.lwp'
#cookielib = 1


urlopen = urllib2.urlopen
#cj = urllib2.cookielib.LWPCookieJar()
cj = cookielib.LWPCookieJar()
Request = urllib2.Request
br = Browser()
br2 = Browser()

if cj != None:
  print "sss"
#install the CookieJar for the default CookieProcessor
  if os.path.isfile(COOKIEFILE):
      cj.load(COOKIEFILE)
      print "foo\n"
  if cookielib:
      opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
      urllib2.install_opener(opener)
      print "foo2\n"

user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values1 = {'name' : 'Michael Foord',
          'location' : 'Northampton',
          'language' : 'Python' }
headers = { 'User-Agent' : user_agent }

url="http://schedule.psu.edu/";
#=======================================


if __name__ == "__main__":
# main app

        txdata = None

#----------------------------

        ##br.set_cookiejar(cj)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)
        br.set_handle_refresh(True)
        br.addheaders = [('User-Agent', 'Firefox')]

        #url=str(url)+str("act_main_search.cfm")+"?"
        #url=url+"Semester=FALL%202008%20%20%20&"
        #url=url+"CrseLoc=OZ%3A%3AAbington%20Campus&"
        #url=url+"CECrseLoc=AllOZ%3A%3AAbington%20Campus&"
        #url=url+"CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&Search=View+schedule"

#url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20
%20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Cam
pus&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha="



url="http://schedule.psu.edu/act_main_search.cfm?Semester=FALL%202008%20%20%
20%20&CrseLoc=OZ%3A%3AAbington%20Campus&CECrseLoc=AllOZ%3A%3AAbington%20Camp
us&CourseAbbrev=ACCTG&CourseNum=&CrseAlpha=&CFID=543143&CFTOKEN=71842529"


        print "url =",url
        br.open(url)
        #cj.save(COOKIEFILE)    # resave cookies

        res = br.response()  # this is a copy of response
        s = res.read()
        print "slen=",len(s)
        print s

=========================================
test.html
<html>
<head>
<TITLE></TITLE>
</head>

<BODY BGCOLOR="#FFFFFF">

                        <TD NOWRAP WIDTH="45" VALIGN="top"><A
HREF="javascript:openAWindow('http://www.registrar.psu.edu/faculty_staff/enr
oll_services/clsrooms.html#C','Intent',625,425,1)"><FONT FACE="Arial,
Helvetica, sans-serif" SIZE="2"><strong>Tech Type</strong></FONT></A></TD>


<META HTTP-EQUIV="Refresh" CONTENT="0;url=/soc/fall/Alloz/a-c/acctg.html#">

---------------------------------------------------------




        sys.exit()




--
http://mail.python.org/mailman/listinfo/python-list

python/mechanize - redirect/refresh question

Reply via email to