Dan Guido wrote: > I'm trying to write a few methods that normalize Windows file paths. > I've gotten it to work in 99% of the cases, but it seems like my code > still chokes on '\x'. I've pasted my code below, can someone help me > figure out a better way to write this? This seems overly complicated > for such a simple problem... > > > # returns normalized filepath with arguments removed > def remove_arguments(filepath): > #print "removing args from: " + filepath > (head, tail) = os.path.split(filepath) > pathext = os.environ['PATHEXT'].split(";") > > while(tail != ''): > #print "trying: " + os.path.join(head,tail) > > # does it just work? > if os.path.isfile(os.path.join(head, tail)): > #print "it just worked" > return os.path.join(head, tail) > > # try every extension > for ext in pathext: > if os.path.isfile(os.path.join(head, tail) + ext): > return os.path.join(head, tail) + ext > > # remove the last word, try again > tail = tail.split()[:-1] > tail = " ".join(tail) > > return None > > escape_dict={'\a':r'\a', > '\b':r'\b', > '\c':r'\c', > '\f':r'\f', > '\n':r'\n', > '\r':r'\r', > '\t':r'\t', > '\v':r'\v', > '\'':r'\'', > #'\"':r'\"', > '\0':r'\0', > '\1':r'\1', > '\2':r'\2', > '\3':r'\3', > '\4':r'\4', > '\5':r'\5', > '\6':r'\6', > '\7':r'\a', #i have no idea > '\8':r'\8', > '\9':r'\9'} > > def raw(text): > """Returns a raw string representation of text""" > new_string='' > for char in text: > try: > new_string+=escape_dict[char] > #print "escaped" > except KeyError: > new_string+=char > #print "keyerror" > #print new_string > return new_string > > # returns the normalized path to a file if it exists > # returns None if it doesn't exist > def normalize_path(path): > #print "not normal: " + path > > # make sure it's not blank > if(path == ""): > return None > > # get rid of mistakenly escaped bytes > path = raw(path) > #print "step1: " + path > > # remove quotes > path = path.replace('"', '') > #print "step2: " + path > > #convert to lowercase > lower = path.lower() > #print "step3: " + lower > > # expand all the normally formed environ variables > expanded = os.path.expandvars(lower) > #print "step4: " + expanded > > # chop off \??\ > if expanded[:4] == "\\??\\": > expanded = expanded[4:] > #print "step5: " + expanded > > # strip a leading '/' > if expanded[:1] == "\\": > expanded = expanded[1:] > #print "step7: " + expanded > > systemroot = os.environ['SYSTEMROOT'] > > # sometimes systemroot won't have % > r = re.compile('systemroot', re.IGNORECASE) > expanded = r.sub(systemroot, expanded) > #print "step8: " + expanded > > # prepend the %systemroot% if its missing > if expanded[:8] == "system32" or "syswow64": > expanded = os.path.join(systemroot, expanded) > #print "step9: " + expanded > > stripped = remove_arguments(expanded.lower()) > > # just in case you're running as LUA > # this is a race condition but you can suck it > if(stripped): > if os.access(stripped, os.R_OK): > return stripped > > return None > > def test_normalize(): > test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys" > test2 = "C:\WINDOWS\system32\msdtc.exe" > test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs" > test4 = "\SystemRoot\System32\drivers\vga.sys" > test5 = "system32\DRIVERS\compbatt.sys" > test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe" > test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe" > test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch" > test9 = "" > test10 = "SysWow64\drivers\AsIO.sys" > test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys" > test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything
If I'm getting this right, what you try to do is to convert characters that come from string-literal escape-codes to their literal representation. Why? A simple test12 = r"C:\windows\system32\xeuwhatever.sys" is all you need - note the leading r. Then test12[2] == "\\" # need escape on the right because of backslashes at end of raw-string-literals rule. holds. Diez -- http://mail.python.org/mailman/listinfo/python-list