I have an RE that should work (it even works in Kodos [1], but not in my code), but it keeps failing to match characters after a newline.
I'm writing a little program that scans the webpage of an arbitrary application and gets the newest version advertised on the page. test3.py: > # -*- coding: utf-8 -*- > > import configparser > import re > import urllib.request > import os > import sys > import logging > import collections > > > class CouldNotFindVersion(Exception): > def __init__(self, app_name, reason, exc_value): > self.value = 'The latest version of ' + app_name + ' could not > be determined because ' + reason > self.cause = exc_value > def __str__(self): > return repr(self.value) > > class AppUpdateItem(): > def __init__(self, config_file_name, config_file_section): > self.section = config_file_section > self.name = self.section['Name'] > self.url = self.section['URL'] > self.filename = self.section['Filename'] > self.file_re = re.compile(self.section['FileURLRegex']) > self.ver_re = re.compile(self.section['VersionRegex']) > self.prev_ver = self.section['CurrentVersion'] > try: > self.page = str(urllib.request.urlopen(self.url).read(), > encoding='utf-8') > self.file_URL = self.file_re.findall(self.page)[0] #here > is where it fails > self.last_ver = self.ver_re.findall(self.file_URL)[0] > except urllib.error.URLError: > self.error = str(sys.exc_info()[1]) > logging.info('[' + self.name + ']' + ' Could not load URL: > ' + self.url + ' : ' + self.error) > self.success = False > raise CouldNotFindVersion(self.name, self.error, > sys.exc_info()[0]) > except IndexError: > logging.warning('Regex did not return a match.') > def update_ini(self): > self.section['CurrentVersion'] = self.last_ver > with open(config_file_name, 'w') as configfile: > config.write(configfile) > def rollback_ini(self): > self.section['CurrentVersion'] = self.prev_ver > with open(config_file_name, 'w') as configfile: > config.write(configfile) > def download_file(self): > self.__filename = self.section['Filename'] > with open(self.__filename, 'wb') as file: > self.__file_req = urllib.request.urlopen(self.file_URL).read() > file.write(self.__file_req) > > > if __name__ == '__main__': > config = configparser.ConfigParser() > config_file = 'checklist.ini' > config.read(config_file) > queue = collections.deque() > for section in config.sections(): > try: > queue.append(AppUpdateItem(config_file, config[section])) > except CouldNotFindVersion as exc: > logging.warning(exc.value) > for elem in queue: > if elem.last_ver != elem.prev_ver: > elem.update_ini() > try: > elem.download_file() > except IOError: > logging.warning('[' + elem.name + '] Download failed.') > except: > elem.rollback_ini() > print(elem.name + ' succeeded.') checklist.ini: > [x264_64] > name = x264 (64-bit) > filename = x264.exe > url = http://x264.nl/x264_main.php > fileurlregex = > http://x264.nl/x264/64bit/8bit_depth/revision\n{0,3}[0-9]{4}\n{0,3}/x264\n{0,3}.exe > versionregex = [0-9]{4} > currentversion = 1995 The part it's supposed to match in http://x264.nl/x264_main.php: > <a href="http://x264.nl/x264/64bit/8bit_depth/revision > 1995 > /x264 > > .exe > <view-source-tab:http://x264.nl/x264/64bit/8bit_depth/revision%0A1995%0A/x264%0A%0A.exe>" > I was able to make a regex that matches in my code, but it shouldn't: http://x264.nl/x264/64bit/8bit_depth/revision.\n{1,3}[0-9]{4}.\n{1,3}/x264.\n{1,3}.\n{1,3}.exe I have to add a dot before each "\n". There is no character not accounted for before those newlines, but I don't get a match without the dots. I also need both those ".\n{1,3}" sequences before the ".exe". I'm really confused. Using Python 3.2 on Windows, in case it matters. [1] http://kodos.sourceforge.net/ (using the compiled Win32 version since it doesn't work with Python 3) -- http://mail.python.org/mailman/listinfo/python-list