I'm trying to write a few methods that normalize Windows file paths.
I've gotten it to work in 99% of the cases, but it seems like my code
still chokes on '\x'. I've pasted my code below, can someone help me
figure out a better way to write this? This seems overly complicated
for such a simple problem...


# returns normalized filepath with arguments removed
def remove_arguments(filepath):
        #print "removing args from: " + filepath
        (head, tail) = os.path.split(filepath)
        pathext = os.environ['PATHEXT'].split(";")
        
        while(tail != ''):
                #print "trying: " + os.path.join(head,tail)
                
                # does it just work?
                if os.path.isfile(os.path.join(head, tail)):
                        #print "it just worked"
                        return os.path.join(head, tail)
                
                # try every extension
                for ext in pathext:
                        if os.path.isfile(os.path.join(head, tail) + ext):
                                return os.path.join(head, tail) + ext
        
                # remove the last word, try again
                tail = tail.split()[:-1]
                tail = " ".join(tail)
        
        return None
        
escape_dict={'\a':r'\a',
           '\b':r'\b',
           '\c':r'\c',
           '\f':r'\f',
           '\n':r'\n',
           '\r':r'\r',
           '\t':r'\t',
           '\v':r'\v',
           '\'':r'\'',
           #'\"':r'\"',
           '\0':r'\0',
           '\1':r'\1',
           '\2':r'\2',
           '\3':r'\3',
           '\4':r'\4',
           '\5':r'\5',
           '\6':r'\6',
           '\7':r'\a', #i have no idea
           '\8':r'\8',
           '\9':r'\9'}

def raw(text):
        """Returns a raw string representation of text"""
        new_string=''
        for char in text:
                try:
                        new_string+=escape_dict[char]
                        #print "escaped"
                except KeyError:
                        new_string+=char
                        #print "keyerror"
                #print new_string
        return new_string

# returns the normalized path to a file if it exists
# returns None if it doesn't exist
def normalize_path(path):
        #print "not normal: " + path
        
        # make sure it's not blank
        if(path == ""):
                return None

        # get rid of mistakenly escaped bytes
        path = raw(path)
        #print "step1: " + path

        # remove quotes
        path = path.replace('"', '')
        #print "step2: " + path
        
        #convert to lowercase
        lower = path.lower()
        #print "step3: " + lower
        
        # expand all the normally formed environ variables
        expanded = os.path.expandvars(lower)
        #print "step4: " + expanded
        
        # chop off \??\
        if expanded[:4] == "\\??\\":
                expanded = expanded[4:]
        #print "step5: " + expanded
        
        # strip a leading '/'
        if expanded[:1] == "\\":
                expanded = expanded[1:]
        #print "step7: " + expanded
        
        systemroot = os.environ['SYSTEMROOT']
        
        # sometimes systemroot won't have %
        r = re.compile('systemroot', re.IGNORECASE)
        expanded = r.sub(systemroot, expanded)
        #print "step8: " + expanded
        
        # prepend the %systemroot% if its missing
        if expanded[:8] == "system32" or "syswow64":
                expanded = os.path.join(systemroot, expanded)
        #print "step9: " + expanded
        
        stripped = remove_arguments(expanded.lower())
        
        # just in case you're running as LUA
        # this is a race condition but you can suck it
        if(stripped):
                if os.access(stripped, os.R_OK):
                        return stripped
        
        return None
        
def test_normalize():
        test1 = "\??\C:\WINDOWS\system32\Drivers\CVPNDRVA.sys"
        test2 = "C:\WINDOWS\system32\msdtc.exe"
        test3 = "%SystemRoot%\system32\svchost.exe -k netsvcs"
        test4 = "\SystemRoot\System32\drivers\vga.sys"
        test5 = "system32\DRIVERS\compbatt.sys"
        test6 = "C:\Program Files\ABC\DEC Windows Services\Client Services.exe"
        test7 = "c:\Program Files\Common Files\Symantec Shared\SNDSrvc.exe"
        test8 = "C:\WINDOWS\system32\svchost -k dcomlaunch"
        test9 = ""
        test10 = "SysWow64\drivers\AsIO.sys"
        test11 = "\SystemRoot\system32\DRIVERS\amdsbs.sys"
        test12 = "C:\windows\system32\xeuwhatever.sys" #this breaks everything

        print normalize_path(test1)
        print normalize_path(test2)
        print normalize_path(test3)
        print normalize_path(test4)
        print normalize_path(test5)
        print normalize_path(test6)
        print normalize_path(test7)
        print normalize_path(test8)
        print normalize_path(test9)
        print normalize_path(test10)
        print normalize_path(test11)
        print normalize_path(test12)

--
Dan Guido
-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to