Hi Diez, The source of the string literals is ConfigParser, so I can't just mark them with an 'r'.
config = ConfigParser.RawConfigParser() config.read(filename) crazyfilepath = config.get(name, "ImagePath") normalfilepath = normalize_path(crazyfilepath) The ultimate origin of the strings is the _winreg function. Here I also can't mark them with an 'r'. regkey = OpenKey(HKEY_LOCAL_MACHINE, "SYSTEM\\CurrentControlSet\\Services\\" + name) crazyimagepath = QueryValueEx(regkey, "ImagePath")[0] CloseKey(key) -- Dan Guido On Wed, Oct 21, 2009 at 2:34 PM, Diez B. Roggisch <de...@nospam.web.de> wrote: > Dan Guido wrote: > >> I'm trying to write a few methods that normalize Windows file paths. >> I've gotten it to work in 99% of the cases, but it seems like my code >> still chokes on '\x'. I've pasted my code below, can someone help me >> figure out a better way to write this? This seems overly complicated >> for such a simple problem... >> >> >> # returns normalized filepath with arguments removed >> def remove_arguments(filepath): >> #print "removing args from: " + filepath >> (head, tail) = os.path.split(filepath) >> pathext = os.environ['PATHEXT'].split(";") >> >> while(tail != ''): >> #print "trying: " + os.path.join(head,tail) >> >> # does it just work? >> if os.path.isfile(os.path.join(head, tail)): >> #print "it just worked" >> return os.path.join(head, tail) >> >> # try every extension >> for ext in pathext: >> if os.path.isfile(os.path.join(head, tail) + ext): >> return os.path.join(head, tail) + ext >> >> # remove the last word, try again >> tail = tail.split()[:-1] >> tail = " ".join(tail) >> >> return None >> >> escape_dict={'\a':r'\a', >> '\b':r'\b', >> '\c':r'\c', >> '\f':r'\f', >> '\n':r'\n', >> '\r':r'\r', >> '\t':r'\t', >> '\v':r'\v', >> '\'':r'\'', >> #'\"':r'\"', >> '\0':r'\0', >> '\1':r'\1', >> '\2':r'\2', >> '\3':r'\3', >> '\4':r'\4', >> '\5':r'\5', >> '\6':r'\6', >> '\7':r'\a', #i have no idea >> '\8':r'\8', >> '\9':r'\9'} >> >> def raw(text): >> """Returns a raw string representation of text""" >> new_string='' >> for char in text: >> try: >> new_string+=escape_dict[char] >> #print "escaped" >> except KeyError: >> new_string+=char >> #print "keyerror" >> #print new_string >> return new_string >> >> # returns the normalized path to a file if it exists >> # returns None if it doesn't exist >> def normalize_path(path): >> #print "not normal: " + path >> >> # make sure it's not blank >> if(path == ""): >> return None >> >> # get rid of mistakenly escaped bytes >> path = raw(path) >> #print "step1: " + path >> >> # remove quotes >> path = path.replace('"', '') >> #print "step2: " + path >> >> #convert to lowercase >> lower = path.lower() >> #print "step3: " + lower >> >> # expand all the normally formed environ variables >> expanded = os.path.expandvars(lower) >> #print "step4: " + expanded >> >> # chop off \??\ >> if expanded[:4] == "\\??\\": >> expanded = expanded[4:] >> #print "step5: " + expanded >> >> # strip a leading '/' >> if expanded[:1] == "\\": >> expanded = expanded[1:] >> #print "step7: " + expanded >> >> systemroot = os.environ['SYSTEMROOT'] >> >> # sometimes systemroot won't have % >> r = re.compile('systemroot', re.IGNORECASE) >> expanded = r.sub(systemroot, expanded) >> #print "step8: " + expanded >> >> # prepend the %systemroot% if its missing >> if expanded[:8] == "system32" or "syswow64": >> expanded = os.path.join(systemroot, expanded) >> #print "step9: " + expanded >> >> stripped = remove_arguments(expanded.lower()) >> >> # just in case you're running as LUA >> # this is a race condition but you can suck it >> if(stripped): >> if os.access(stripped, os.R_OK): >> return stripped >> >> return None >> >> def test_normalize(): >> test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys" >> test2 = "C:\WINDOWS\system32\msdtc.exe" >> test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs" >> test4 = "\SystemRoot\System32\drivers\vga.sys" >> test5 = "system32\DRIVERS\compbatt.sys" >> test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe" >> test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe" >> test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch" >> test9 = "" >> test10 = "SysWow64\drivers\AsIO.sys" >> test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys" >> test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything > > If I'm getting this right, what you try to do is to convert characters that > come from string-literal escape-codes to their literal representation. Why? > > A simple > > test12 = r"C:\windows\system32\xeuwhatever.sys" > > is all you need - note the leading r. Then > > test12[2] == "\\" # need escape on the right because of backslashes at end > of raw-string-literals rule. > > holds. > > Diez > -- > http://mail.python.org/mailman/listinfo/python-list > -- http://mail.python.org/mailman/listinfo/python-list