> Graphical representation of links or pages that don't get linked to. I tried to test the links (with 2 algorithms, code below) in a generated webpage, but the result I get are very weird. Probably one you knows a better way ?
cheers, Stef from BeautifulSoup import BeautifulSoup from urllib import urlopen from httplib import HTTP from urlparse import urlparse def Check_URL_1 ( URL ) : try: fh = urlopen ( URL ) return fh.code == 200 except : return False def Check_URL_2 ( URL ) : p = urlparse ( URL ) h = HTTP ( p[1] ) h.putrequest ( 'HEAD', p[2] ) h.endheaders() if h.getreply()[0] == 200: return True else: return False def Verify_Links ( URL ) : Parts = URL.split('/') Site = '/'.join ( Parts [:3] ) Current = '/'.join ( Parts [:-1] ) fh = urlopen ( URL ) lines = fh.read () fh.close() Soup = BeautifulSoup ( lines ) hrefs = lines = Soup.findAll ( 'a' ) for href in hrefs : href = href [ 'href' ] #[:-1] ## <== remove "#" to generate all errors if href.startswith ( '/' ) : href = Site + href elif href.startswith ('#' ) : href = URL + href elif href.startswith ( 'http' ) : pass else : href = Current + href try: fh = urllib.urlopen ( href ) except : pass print Check_URL_1 ( href ), Check_URL_2 ( href ), href URL = 'http://127.0.0.1:8000/welcome/default/index' fh = Verify_Links ( URL )