I'm trying to write a few methods that normalize Windows file paths. I've gotten it to work in 99% of the cases, but it seems like my code still chokes on '\x'. I've pasted my code below, can someone help me figure out a better way to write this? This seems overly complicated for such a simple problem...
# returns normalized filepath with arguments removed def remove_arguments(filepath): #print "removing args from: " + filepath (head, tail) = os.path.split(filepath) pathext = os.environ['PATHEXT'].split(";") while(tail != ''): #print "trying: " + os.path.join(head,tail) # does it just work? if os.path.isfile(os.path.join(head, tail)): #print "it just worked" return os.path.join(head, tail) # try every extension for ext in pathext: if os.path.isfile(os.path.join(head, tail) + ext): return os.path.join(head, tail) + ext # remove the last word, try again tail = tail.split()[:-1] tail = " ".join(tail) return None escape_dict={'\a':r'\a', '\b':r'\b', '\c':r'\c', '\f':r'\f', '\n':r'\n', '\r':r'\r', '\t':r'\t', '\v':r'\v', '\'':r'\'', #'\"':r'\"', '\0':r'\0', '\1':r'\1', '\2':r'\2', '\3':r'\3', '\4':r'\4', '\5':r'\5', '\6':r'\6', '\7':r'\a', #i have no idea '\8':r'\8', '\9':r'\9'} def raw(text): """Returns a raw string representation of text""" new_string='' for char in text: try: new_string+=escape_dict[char] #print "escaped" except KeyError: new_string+=char #print "keyerror" #print new_string return new_string # returns the normalized path to a file if it exists # returns None if it doesn't exist def normalize_path(path): #print "not normal: " + path # make sure it's not blank if(path == ""): return None # get rid of mistakenly escaped bytes path = raw(path) #print "step1: " + path # remove quotes path = path.replace('"', '') #print "step2: " + path #convert to lowercase lower = path.lower() #print "step3: " + lower # expand all the normally formed environ variables expanded = os.path.expandvars(lower) #print "step4: " + expanded # chop off \??\ if expanded[:4] == "\\??\\": expanded = expanded[4:] #print "step5: " + expanded # strip a leading '/' if expanded[:1] == "\\": expanded = expanded[1:] #print "step7: " + expanded systemroot = os.environ['SYSTEMROOT'] # sometimes systemroot won't have % r = re.compile('systemroot', re.IGNORECASE) expanded = r.sub(systemroot, expanded) #print "step8: " + expanded # prepend the %systemroot% if its missing if expanded[:8] == "system32" or "syswow64": expanded = os.path.join(systemroot, expanded) #print "step9: " + expanded stripped = remove_arguments(expanded.lower()) # just in case you're running as LUA # this is a race condition but you can suck it if(stripped): if os.access(stripped, os.R_OK): return stripped return None def test_normalize(): test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys" test2 = "C:\WINDOWS\system32\msdtc.exe" test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs" test4 = "\SystemRoot\System32\drivers\vga.sys" test5 = "system32\DRIVERS\compbatt.sys" test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe" test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe" test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch" test9 = "" test10 = "SysWow64\drivers\AsIO.sys" test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys" test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything print normalize_path(test1) print normalize_path(test2) print normalize_path(test3) print normalize_path(test4) print normalize_path(test5) print normalize_path(test6) print normalize_path(test7) print normalize_path(test8) print normalize_path(test9) print normalize_path(test10) print normalize_path(test11) print normalize_path(test12) -- Dan Guido -- http://mail.python.org/mailman/listinfo/python-list