> Graphical representation of links or pages that don't get linked to.
I tried to test the links (with 2 algorithms, code below) in a generated 
webpage, but the result I
get are very weird.
Probably one you knows a better way ?

cheers,
Stef


from BeautifulSoup import BeautifulSoup
from urllib        import urlopen
from httplib       import HTTP
from urlparse      import urlparse

def Check_URL_1 ( URL ) :
  try:
    fh = urlopen ( URL )
    return fh.code == 200
  except :
    return False

def Check_URL_2 ( URL ) :
  p = urlparse ( URL )
  h = HTTP ( p[1] )
  h.putrequest ( 'HEAD', p[2] )
  h.endheaders()
  if h.getreply()[0] == 200:
    return True
  else:
    return False

def Verify_Links ( URL ) :
  Parts   = URL.split('/')
  Site    = '/'.join ( Parts [:3] )
  Current = '/'.join ( Parts [:-1] )

  fh = urlopen ( URL )
  lines = fh.read ()
  fh.close()

  Soup = BeautifulSoup ( lines )
  hrefs = lines = Soup.findAll ( 'a' )

  for href in hrefs :
    href = href [ 'href' ] #[:-1]     ## <== remove "#" to generate all errors

    if href.startswith ( '/' ) :
      href = Site + href
    elif href.startswith ('#' ) :
      href = URL + href
    elif href.startswith ( 'http' ) :
      pass
    else :
      href = Current + href

    try:
      fh = urllib.urlopen ( href )
    except :
      pass
    print Check_URL_1 ( href ), Check_URL_2 ( href ), href

URL = 'http://127.0.0.1:8000/welcome/default/index'
fh = Verify_Links ( URL )

Reply via email to