John Nagle added the comment: Suggest adding a user_agent optional parameter, as shown here:
def __init__(self, url='', user_agent=None): urllib.robotparser.RobotFileParser.__init__(self, url) # init parent self.user_agent = user_agent # save user agent def read(self): """ Reads the robots.txt URL and feeds it to the parser. Overrides parent read function. """ try: req = urllib.request.Request( # request with user agent specified self.url, data=None) if self.user_agent is not None : # if overriding user agent req.add_header("User-Agent", self.user_agent) f = urllib.request.urlopen(req) # open connection except urllib.error.HTTPError as err: if err.code in (401, 403): self.disallow_all = True elif err.code >= 400 and err.code < 500: self.allow_all = True else: raw = f.read() self.parse(raw.decode("utf-8").splitlines()) ---------- _______________________________________ Python tracker <rep...@bugs.python.org> <http://bugs.python.org/issue28756> _______________________________________ _______________________________________________ Python-bugs-list mailing list Unsubscribe: https://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com