There are some commercial address standardization software such as:

http://www.semaphorecorp.com/zp4/

(i do not work for this company :) but have used their software)
jim


On Nov 29, 7:31 am, Corey Oordt <[EMAIL PROTECTED]> wrote:
> I ported part of the Geo Street Address from the perl module:
>
>  >>>>>>>>address_regex.py
> from geocode.address_dicts import *
>
> STREET_TYPE_REGEX = "|".join(STREET_TYPES.keys()) + "|" +
> "|".join(STREET_TYPES.values())
> STATE_REGEX = "|".join(STATE_CODES.values())
> DIRECTIONS_REGEX = "|".join(DIRECTIONS.keys()) + "|" +
> "|".join(DIRECTIONS.values())
> ZIP_REGEX = "\\d{5}(?:-*\\d{4})?"
> ZIP_LOOSE_REGEX = "[\\d-]+" #To catch malformed zipcodes
> CORNER_REGEX = "(?:\\bAND\\b|\\bAT\\b|&|\\@)"
> UNIT_REGEX = "(?:(?:su?i?te|p\\W*[om]\\W*b(?:ox)?|dept|apt|ro*m|fl|apt|
> unit|box)\\W+|\#\\W*)[\\w-]+"
> NUMBER_REGEX = "\\d+-?\\d*"
> FRACTION_REGEX = "\\d+\/\\d+"
>
> # Possible street combinations:
> STREET_REGEX = """
> (?:
>      # special cases like 100 South Street
>      (?:
>          (?P<street1>""" + DIRECTIONS_REGEX + """)\\W+
>          (?P<street_type1>""" + STREET_TYPE_REGEX + """)\\b
>      )
>      |
>      (?:(?P<pre_dir>""" + DIRECTIONS_REGEX + """)\\W+)?
>      (?:
>          (?P<street2>[^,]+)
>          (?:[^\\w,]+(?P<street_type2>""" + STREET_TYPE_REGEX + """)\\b)
>          (?:[^\\w,]+(?P<post_dir1>""" + DIRECTIONS_REGEX + """)\\b)?
>      |
>          (?P<street3>[^,]*\\d)
>          (?P<post_dir2>""" + DIRECTIONS_REGEX + """)
>      |
>          (?P<street4>[^,]+?)
>          (?:[^\\w,]+(?P<street_type3>""" + STREET_TYPE_REGEX + """)\\b)?
>          (?:[^\\w,]+(?P<post_dir3>""" + DIRECTIONS_REGEX + """)\\b)?
>      )
> )"""
>
> CITYSTATE_REGEX = """
> (?:
>      (?P<city>[^,]+?),\\W+
>      (?P<state>""" + STATE_REGEX +""")\\W*
> )?"""
>
> PLACE_REGEX = CITYSTATE_REGEX + """(?:(?P<zip>""" + ZIP_REGEX +"""))?"""
>
> ADDRESS_REGEX = """^\\W*
> (?P<number>""" + NUMBER_REGEX + """)?\\W*
> (?:""" + FRACTION_REGEX + """\\W*)? # We don't need to keep the
> fractional part
> """ + STREET_REGEX + """\\W+
> (?:""" + UNIT_REGEX + """\\W+)? # We don't need to keep a unit part
> """# + PLACE_REGEX + """\\W*$"""
>
> INTERSECTION_REGEX = "\\W*" + STREET_REGEX + \
>      "\\W*?\\s+" + CORNER_REGEX + "\\s+" \
>      + STREET_REGEX + "\\W+" \
>      + PLACE_REGEX
>
> <<<<<<<<<<<<<<<<<<<<<<<
>
>  >>>>>>>>>>>>address_dicts.py
> DIRECTIONS = {
>      "NORTH": "N",
>      "NORTHEAST": "NE",
>      "EAST": "E",
>      "SOUTHEAST": "SE",
>      "SOUTH": "S",
>      "SOUTHWEST": "SW",
>      "WEST": "W",
>      "NORTHWEST": "NW",
>
> }
>
> STATE_CODES = {
>      "ALABAMA": "AL",
>      "ALASKA": "AK",
>      "AMERICAN SAMOA": "AS",
>      "ARIZONA": "AZ",
>      "ARKANSAS": "AR",
>      "CALIFORNIA": "CA",
>      "COLORADO": "CO",
>      "CONNECTICUT": "CT",
>      "DELAWARE": "DE",
>      "DISTRICT OF COLUMBIA": "DC",
>      "FEDERATED STATES OF MICRONESIA": "FM",
>      "FLORIDA": "FL",
>      "GEORGIA": "GA",
>      "GUAM": "GU",
>      "HAWAII": "HI",
>      "IDAHO": "ID",
>      "ILLINOIS": "IL",
>      "INDIANA": "IN",
>      "IOWA": "IA",
>      "KANSAS": "KS",
>      "KENTUCKY": "KY",
>      "LOUISIANA": "LA",
>      "MAINE": "ME",
>      "MARSHALL ISLANDS": "MH",
>      "MARYLAND": "MD",
>      "MASSACHUSETTS": "MA",
>      "MICHIGAN": "MI",
>      "MINNESOTA": "MN",
>      "MISSISSIPPI": "MS",
>      "MISSOURI": "MO",
>      "MONTANA": "MT",
>      "NEBRASKA": "NE",
>      "NEVADA": "NV",
>      "NEW HAMPSHIRE": "NH",
>      "NEW JERSEY": "NJ",
>      "NEW MEXICO": "NM",
>      "NEW YORK": "NY",
>      "NORTH CAROLINA": "NC",
>      "NORTH DAKOTA": "ND",
>      "NORTHERN MARIANA ISLANDS": "MP",
>      "OHIO": "OH",
>      "OKLAHOMA": "OK",
>      "OREGON": "OR",
>      "PALAU": "PW",
>      "PENNSYLVANIA": "PA",
>      "PUERTO RICO": "PR",
>      "RHODE ISLAND": "RI",
>      "SOUTH CAROLINA": "SC",
>      "SOUTH DAKOTA": "SD",
>      "TENNESSEE": "TN",
>      "TEXAS": "TX",
>      "UTAH": "UT",
>      "VERMONT": "VT",
>      "VIRGIN ISLANDS": "VI",
>      "VIRGINIA": "VA",
>      "WASHINGTON": "WA",
>      "WEST VIRGINIA": "WV",
>      "WISCONSIN": "WI",
>      "WYOMING": "WY",
>
> }
>
> STREET_TYPES = {
>      "ALLEE": "ALY",
>      "ALLEY": "ALY",
>      "ALLY": "ALY",
>      "ANEX": "ANX",
>      "ANNEX": "ANX",
>      "ANNX": "ANX",
>      "ARCADE": "ARC",
>      "AV": "AVE",
>      "AVEN": "AVE",
>      "AVENU": "AVE",
>      "AVENUE": "AVE",
>      "AVN": "AVE",
>      "AVNUE": "AVE",
>      "BAYOO": "BYU",
>      "BAYOU": "BYU",
>      "BEACH": "BCH",
>      "BEND": "BND",
>      "BLUF": "BLF",
>      "BLUFF": "BLF",
>      "BLUFFS": "BLFS",
>      "BOT": "BTM",
>      "BOTTM": "BTM",
>      "BOTTOM": "BTM",
>      "BOUL": "BLVD",
>      "BOULEVARD": "BLVD",
>      "BOULV": "BLVD",
>      "BRANCH": "BR",
>      "BRDGE": "BRG",
>      "BRIDGE": "BRG",
>      "BRNCH": "BR",
>      "BROOK": "BRK",
>      "BROOKS": "BRKS",
>      "BURG": "BG",
>      "BURGS": "BGS",
>      "BYPA": "BYP",
>      "BYPAS": "BYP",
>      "BYPASS": "BYP",
>      "BYPS": "BYP",
>      "CAMP": "CP",
>      "CANYN": "CYN",
>      "CANYON": "CYN",
>      "CAPE": "CPE",
>      "CAUSEWAY": "CSWY",
>      "CAUSWAY": "CSWY",
>      "CEN": "CTR",
>      "CENT": "CTR",
>      "CENTER": "CTR",
>      "CENTERS": "CTRS",
>      "CENTR": "CTR",
>      "CENTRE": "CTR",
>      "CIRC": "CIR",
>      "CIRCL": "CIR",
>      "CIRCLE": "CIR",
>      "CIRCLES": "CIRS",
>      "CK": "CRK",
>      "CLIFF": "CLF",
>      "CLIFFS": "CLFS",
>      "CLUB": "CLB",
>      "CMP": "CP",
>      "CNTER": "CTR",
>      "CNTR": "CTR",
>      "CNYN": "CYN",
>      "COMMON": "CMN",
>      "CORNER": "COR",
>      "CORNERS": "CORS",
>      "COURSE": "CRSE",
>      "COURT": "CT",
>      "COURTS": "CTS",
>      "COVE": "CV",
>      "COVES": "CVS",
>      "CR": "CRK",
>      "CRCL": "CIR",
>      "CRCLE": "CIR",
>      "CRECENT": "CRES",
>      "CREEK": "CRK",
>      "CRESCENT": "CRES",
>      "CRESENT": "CRES",
>      "CREST": "CRST",
>      "CROSSING": "XING",
>      "CROSSROAD": "XRD",
>      "CRSCNT": "CRES",
>      "CRSENT": "CRES",
>      "CRSNT": "CRES",
>      "CRSSING": "XING",
>      "CRSSNG": "XING",
>      "CRT": "CT",
>      "CURVE": "CURV",
>      "DALE": "DL",
>      "DAM": "DM",
>      "DIV": "DV",
>      "DIVIDE": "DV",
>      "DRIV": "DR",
>      "DRIVE": "DR",
>      "DRIVES": "DRS",
>      "DRV": "DR",
>      "DVD": "DV",
>      "ESTATE": "EST",
>      "ESTATES": "ESTS",
>      "EXP": "EXPY",
>      "EXPR": "EXPY",
>      "EXPRESS": "EXPY",
>      "EXPRESSWAY": "EXPY",
>      "EXPW": "EXPY",
>      "EXTENSION": "EXT",
>      "EXTENSIONS": "EXTS",
>      "EXTN": "EXT",
>      "EXTNSN": "EXT",
>      "FALLS": "FLS",
>      "FERRY": "FRY",
>      "FIELD": "FLD",
>      "FIELDS": "FLDS",
>      "FLAT": "FLT",
>      "FLATS": "FLTS",
>      "FORD": "FRD",
>      "FORDS": "FRDS",
>      "FOREST": "FRST",
>      "FORESTS": "FRST",
>      "FORG": "FRG",
>      "FORGE": "FRG",
>      "FORGES": "FRGS",
>      "FORK": "FRK",
>      "FORKS": "FRKS",
>      "FORT": "FT",
>      "FREEWAY": "FWY",
>      "FREEWY": "FWY",
>      "FRRY": "FRY",
>      "FRT": "FT",
>      "FRWAY": "FWY",
>      "FRWY": "FWY",
>      "GARDEN": "GDN",
>      "GARDENS": "GDNS",
>      "GARDN": "GDN",
>      "GATEWAY": "GTWY",
>      "GATEWY": "GTWY",
>      "GATWAY": "GTWY",
>      "GLEN": "GLN",
>      "GLENS": "GLNS",
>      "GRDEN": "GDN",
>      "GRDN": "GDN",
>      "GRDNS": "GDNS",
>      "GREEN": "GRN",
>      "GREENS": "GRNS",
>      "GROV": "GRV",
>      "GROVE": "GRV",
>      "GROVES": "GRVS",
>      "GTWAY": "GTWY",
>      "HARB": "HBR",
>      "HARBOR": "HBR",
>      "HARBORS": "HBRS",
>      "HARBR": "HBR",
>      "HAVEN": "HVN",
>      "HAVN": "HVN",
>      "HEIGHT": "HTS",
>      "HEIGHTS": "HTS",
>      "HGTS": "HTS",
>      "HIGHWAY": "HWY",
>      "HIGHWY": "HWY",
>      "HILL": "HL",
>      "HILLS": "HLS",
>      "HIWAY": "HWY",
>      "HIWY": "HWY",
>      "HLLW": "HOLW",
>      "HOLLOW": "HOLW",
>      "HOLLOWS": "HOLW",
>      "HOLWS": "HOLW",
>      "HRBOR": "HBR",
>      "HT": "HTS",
>      "HWAY": "HWY",
>      "INLET": "INLT",
>      "ISLAND": "IS",
>      "ISLANDS": "ISS",
>      "ISLES": "ISLE",
>      "ISLND": "IS",
>      "ISLNDS": "ISS",
>      "JCTION": "JCT",
>      "JCTN": "JCT",
>      "JCTNS": "JCTS",
>      "JUNCTION": "JCT",
>      "JUNCTIONS": "JCTS",
>      "JUNCTN": "JCT",
>      "JUNCTON": "JCT",
>      "KEY": "KY",
>      "KEYS": "KYS",
>      "KNOL": "KNL",
>      "KNOLL": "KNL",
>      "KNOLLS": "KNLS",
>      "LA": "LN",
>      "LAKE": "LK",
>      "LAKES": "LKS",
>      "LANDING": "LNDG",
>      "LANE": "LN",
>      "LANES": "LN",
>      "LDGE": "LDG",
>      "LIGHT": "LGT",
>      "LIGHTS": "LGTS",
>      "LNDNG": "LNDG",
>      "LOAF": "LF",
>      "LOCK": "LCK",
>      "LOCKS": "LCKS",
>      "LODG": "LDG",
>      "LODGE": "LDG",
>      "LOOPS": "LOOP",
>      "MANOR": "MNR",
>      "MANORS": "MNRS",
>      "MEADOW": "MDW",
>      "MEADOWS": "MDWS",
>      "MEDOWS": "MDWS",
>      "MILL": "ML",
>      "MILLS": "MLS",
>      "MISSION": "MSN",
>      "MISSN": "MSN",
>      "MNT": "MT",
>      "MNTAIN": "MTN",
>      "MNTN": "MTN",
>      "MNTNS": "MTNS",
>      "MOTORWAY": "MTWY",
>      "MOUNT": "MT",
>      "MOUNTAIN": "MTN",
>      "MOUNTAINS": "MTNS",
>      "MOUNTIN": "MTN",
>      "MSSN": "MSN",
>      "MTIN": "MTN",
>      "NECK": "NCK",
>      "ORCHARD": "ORCH",
>      "ORCHRD": "ORCH",
>      "OVERPASS": "OPAS",
>      "OVL": "OVAL",
>      "PARKS": "PARK",
>      "PARKWAY": "PKWY",
>      "PARKWAYS": "PKWY",
>      "PARKWY": "PKWY",
>      "PASSAGE": "PSGE",
>      "PATHS": "PATH",
>      "PIKES": "PIKE",
>      "PINE": "PNE",
>      "PINES": "PNES",
>      "PK": "PARK",
>      "PKWAY": "PKWY",
>      "PKWYS": "PKWY",
>      "PKY": "PKWY",
>      "PLACE": "PL",
>      "PLAIN": "PLN",
>      "PLAINES": "PLNS",
>      "PLAINS": "PLNS",
>      "PLAZA": "PLZ",
>      "PLZA": "PLZ",
>      "POINT": "PT",
>      "POINTS": "PTS",
>      "PORT": "PRT",
>      "PORTS": "PRTS",
>      "PRAIRIE": "PR",
>      "PRARIE": "PR",
>      "PRK": "PARK",
>      "PRR": "PR",
>      "RAD": "RADL",
>      "RADIAL": "RADL",
>      "RADIEL": "RADL",
>      "RANCH": "RNCH",
>      "RANCHES": "RNCH",
>      "RAPID": "RPD",
>      "RAPIDS": "RPDS",
>      "RDGE": "RDG",
>      "REST": "RST",
>      "RIDGE": "RDG",
>      "RIDGES": "RDGS",
>      "RIVER": "RIV",
>      "RIVR": "RIV",
>      "RNCHS": "RNCH",
>      "ROAD": "RD",
>      "ROADS": "RDS",
>      "ROUTE": "RTE",
>      "RVR": "RIV",
>      "SHOAL": "SHL",
>      "SHOALS": "SHLS",
>      "SHOAR": "SHR",
>      "SHOARS": "SHRS",
>      "SHORE": "SHR",
>      "SHORES": "SHRS",
>      "SKYWAY": "SKWY",
>      "SPNG": "SPG",
>      "SPNGS": "SPGS",
>      "SPRING": "SPG",
>      "SPRINGS": "SPGS",
>      "SPRNG": "SPG",
>      "SPRNGS": "SPGS",
>      "SPURS": "SPUR",
>      "SQR": "SQ",
>      "SQRE": "SQ",
>      "SQRS": "SQS",
>      "SQU": "SQ",
>      "SQUARE": "SQ",
>      "SQUARES": "SQS",
>      "STATION": "STA",
>      "STATN": "STA",
>      "STN": "STA",
>      "STR": "ST",
>      "STRAV": "STRA",
>      "STRAVE": "STRA",
>      "STRAVEN": "STRA",
>      "STRAVENUE": "STRA",
>      "STRAVN": "STRA",
>      "STREAM": "STRM",
>      "STREET": "ST",
>      "STREETS": "STS",
>      "STREME": "STRM",
>      "STRT": "ST",
>      "STRVN": "STRA",
>      "STRVNUE": "STRA",
>      "SUMIT": "SMT",
>      "SUMITT": "SMT",
>      "SUMMIT": "SMT",
>      "TERR": "TER",
>      "TERRACE": "TER",
>      "THROUGHWAY": "TRWY",
>      "TPK": "TPKE",
>      "TR": "TRL",
>      "TRACE": "TRCE",
>      "TRACES": "TRCE",
>      "TRACK": "TRAK",
>      "TRACKS": "TRAK",
>      "TRAFFICWAY": "TRFY",
>      "TRAIL": "TRL",
>      "TRAILS": "TRL",
>      "TRK": "TRAK",
>      "TRKS": "TRAK",
>      "TRLS": "TRL",
>      "TRNPK": "TPKE",
>      "TRPK": "TPKE",
>      "TUNEL": "TUNL",
>      "TUNLS": "TUNL",
>      "TUNNEL": "TUNL",
>      "TUNNELS": "TUNL",
>      "TUNNL": "TUNL",
>      "TURNPIKE": "TPKE",
>      "TURNPK": "TPKE",
>      "UNDERPASS": "UPAS",
>      "UNION": "UN",
>      "UNIONS": "UNS",
>      "VALLEY": "VLY",
>      "VALLEYS": "VLYS",
>      "VALLY": "VLY",
>      "VDCT": "VIA",
>      "VIADCT": "VIA",
>      "VIADUCT": "VIA",
>      "VIEW": "VW",
>      "VIEWS": "VWS",
>      "VILL": "VLG",
>      "VILLAG": "VLG",
>      "VILLAGE": "VLG",
>      "VILLAGES": "VLGS",
>      "VILLE": "VL",
>      "VILLG": "VLG",
>      "VILLIAGE": "VLG",
>      "VIST": "VIS",
>      "VISTA": "VIS",
>      "VLLY": "VLY",
>      "VST": "VIS",
>      "VSTA": "VIS",
>      "WALKS": "WALK",
>      "WELL": "WL",
>      "WELLS": "WLS",
>      "WY": "WAY",}
>
> <<<<<<<<<<<<<<<<<<<<<<<
>
>  >>>>>>>>>>>>>>>>>> address.py
> import re
>
> from geocode.address_dicts import *
> from geocode.address_regex import *
> from geocode.models import GeocodeCache, GeocodeError
> from geocode import app_settings
> from geopy import geocoders
>
> # These are the possible order of pieces of an address. We are going
> to parse
> # in reverse order.
>
> class Address(object):
>      """A class to handle the pieces of a parsed address"""
>      def __init__(self, zipcode = '', state = '', city = '', unit = '',
>          post_dir = '', street_type = '', street = '', pre_dir = '',
>          fraction = '', number = ''):
>          self.zipcode = ''
>          self.state = ''
>          self.city = ''
>          self.unit = ''
>          self.post_dir = ''
>          self.street_type = ''
>          self.street = ''
>          self.pre_dir = ''
>          self.fraction = ''
>          self.number = ''
>
>      def __str__(self):
>          out = " ".join((self.number, self.fraction, self.pre_dir,
> self.street,
>              self.street_type, self.post_dir, self.unit))
>
>          out = re.sub("  +", " ", out.strip()) # convert multiple
> spaces into 1
>          out2 = self.city + ", " + self.state
>          if len(self.zipcode) > 5:
>              out2 +=  " " + self.zipcode[:5]
>          else:
>              out2 += " " + self.zipcode
>          out2 = re.sub("  +", " ", out2) # convert multiple spaces
> into 1
>          out2 = re.sub(",$", "", out2.strip()) # remove trailing space
> and/or comma
>          out2 = re.sub("^, ", "", out2) # remove leading comma space
>
>          if out != '' and out2 != '':
>              return out + ", " + out2
>          else:
>              return out + out2
>
> def is_zipcode(piece, addr_parts):
>      """Is the passed string a zipcode
>      Expects addr_parts to be an Address Object"""
>      m = re.match("(?P<zipcode>%s)" % ZIP_LOOSE_REGEX, piece,
> re.IGNORECASE|re.VERBOSE)
>      if m:
>          addr_parts.zipcode = m.groupdict()['zipcode']
>          return True
>      else:
>          return False
>
> def is_state(piece, addr_parts):
>      """Is the passed string a state"""
>      m = re.match("(?P<state>%s)" % STATE_REGEX, piece, re.IGNORECASE|
> re.VERBOSE)
>      if m:
>          addr_parts["state"] = m.groupdict()['state']
>          return True
>      else:
>          return False
>
> def is_city(piece, addr_parts):
>      """Is the passed string a city"""
>      # If the previous piece was a state, then we can safely assume
> that this
>      # piece is a city. Otherwise, skip it.
>      if addr_parts.has_key('state') and addr_parts['state'] != '':
>          addr_parts['city'] = piece
>          return True
>      else:
>          return False
>
> def is_unit(piece, addr_parts):
>      """Is the passed string a unit description"""
>      m = re.match("(?P<unit>%s)" % UNIT_REGEX, piece, re.IGNORECASE|
> re.VERBOSE)
>      if m:
>          addr_parts["unit"] = m.groupdict()['unit']
>          return True
>      else:
>          return False
>
> def is_post_dir(piece, addr_parts):
>      """Is the passed string a direction"""
>      m = re.match("(?P<post_dir>%s)" % DIRECTIONS_REGEX, piece,
> re.IGNORECASE|re.VERBOSE)
>      if m:
>          addr_parts["post_dir"] = m.groupdict()['post_dir']
>          if addr_parts['post_dir'] in DIRECTIONS.keys():
>              addr_parts['post_dir'] = DIRECTIONS[addr_parts['post_dir']]
>          return True
>      else:
>          return False
>
> def is_pre_dir(piece, addr_parts):
>      """Is the passed string a direction"""
>      m = re.match("(?P<pre_dir>%s)" % DIRECTIONS_REGEX, piece,
> re.IGNORECASE|re.VERBOSE)
>      if m:
>          addr_parts["pre_dir"] = m.groupdict()['pre_dir']
>          if addr_parts['pre_dir'] in DIRECTIONS.keys():
>              addr_parts['pre_dir'] = DIRECTIONS[addr_parts['pre_dir']]
>          return True
>      else:
>          return False
>
> def is_street_type(piece, addr_parts):
>      """Is the passed string a street descriptor"""
>      m = re.match("(?P<street_type>%s)" % STREET_TYPE_REGEX, piece,
> re.IGNORECASE|re.VERBOSE)
>      if m:
>          addr_parts["street_type"] = m.groupdict()['street_type']
>          if addr_parts['street_type'] in STREET_TYPES.keys():
>              addr_parts['street_type'] =
> STREET_TYPES[addr_parts['street_type']]
>          return True
>      else:
>          return False
>
> def is_street(piece, addr_parts):
>      """Is the passed string a street. If it gets this far, we must
> assume it is."""
>      addr_parts['street'] = piece
>      return True
>
> def is_fraction(piece, addr_parts):
>      """Is the passed string a fraction"""
>      m = re.match("(?P<fraction>%s)" % FRACTION_REGEX, piece,
> re.IGNORECASE|re.VERBOSE)
>      if m:
>          addr_parts["fraction"] = m.groupdict()['fraction']
>          return True
>      else:
>          return False
>
> def is_number(piece, addr_parts):
>      """Is the passed string a house number"""
>      # If we are this far, we assume it is. Because there are strange
> numbers
>      # such as W333 S405, we are going to added it to the end of the
> number in addr_parts
>      if addr_parts['number'] != '':
>          addr_parts['number'] += " "
>      addr_parts['number'] += piece
>      return True
>
> def is_address(piece, addr_parts):
>      """Is the passed string an address line"""
>      m =re.match(ADDRESS_REGEX, piece, re.IGNORECASE|re.VERBOSE)
>      if m:
>          match_dict = m.groupdict()
>          for key, value in match_dict.items():
>              addr_parts[key] = value or ''
>          if addr_parts['pre_dir'] in DIRECTIONS.keys():
>              addr_parts['pre_dir'] = DIRECTIONS[addr_parts['pre_dir']]
>          if addr_parts['street_type'] in DIRECTIONS.keys():
>              addr_parts['street_type'] =
> DIRECTIONS[addr_parts['street_type']]
>          if addr_parts['post_dir'] in DIRECTIONS.keys():
>              addr_parts['post_dir'] = DIRECTIONS[addr_parts['post_dir']]
>
>          return True
>      else:
>          return False
>
> def is_citystatezip(piece, addr_parts):
>      """
>      Is the passed string a city, state zip.
>      Expects addr_parts to be an Address object
>      """
>      m = re.match(PLACE_REGEX, piece, re.IGNORECASE|re.VERBOSE)
>      if m and m.end() != 0:
>          match_dict = m.groupdict()
>          if 'city' in match_dict.keys():
>              addr_parts.city = match_dict['city'] or ''
>          else:
>              addr_parts.city = ''
>          if 'state' in match_dict.keys():
>              addr_parts.state = match_dict['state'] or ''
>          else:
>              addr_parts.state = ''
>          if 'state' in match_dict.keys():
>              addr_parts.zipcode = match_dict['zip'] or ''
>          else:
>              addr_parts.zipcode = ''
>
>          return True
>      else:
>          return False
>
> def addr_piece_test_generator():
>      """A generator that will return the next test."""
>
>      addr_piece_tests = [
>          is_zipcode,
>          is_state,
>          is_city,
>          is_unit,
>          is_post_dir,
>          is_street_type,
>          is_street,
>          is_pre_dir,
>          is_fraction,
>          is_number,
>      ]
>
>      for test in addr_piece_tests:
>          yield test
>
>      while True:
>          yield addr_piece_tests[len(addr_piece_tests)] # Always yield
> the number function when all done
>
> def parse_location(address):
>      """
>      Given a location, convert it to uppercase and make sure it is in a
>      standard format
>      """
>          # Convert commas to spaces, and remove periods
>      address = address.upper().replace(".", "")
>      addr = Address()
>
>      addr_result = re.search(ADDRESS_REGEX + PLACE_REGEX + "\\W*$",
> address, re.IGNORECASE|re.VERBOSE)
>
>      if addr_result:
>          result_dict = addr_result.groupdict()
>          # Normalize predir
>          if result_dict["pre_dir"] in DIRECTIONS.keys():
>              addr.pre_dir = DIRECTIONS[result_dict["pre_dir"]]
>          else:
>              addr.pre_dir = result_dict["pre_dir"] or ""
>
>          # Normalize postdir
>          addr.post_dir = result_dict["post_dir1"] or \
>                      result_dict["post_dir2"] or \
>                      result_dict["post_dir3"] or ""
>          if addr.post_dir in DIRECTIONS.keys():
>              addr.post_dir = DIRECTIONS[addr.post_dir]
>
>          # Normalize street type
>          addr.street_type = result_dict["street_type1"] or \
>                          result_dict["street_type2"] or \
>                          result_dict["street_type3"] or ""
>          if addr.street_type in STREET_TYPES.keys():
>              addr.street_type = STREET_TYPES[addr.street_type]
>
>          # Get the street
>          addr.street = result_dict["street1"] or \
>                      result_dict["street2"] or \
>                      result_dict["street3"] or \
>                      result_dict["street4"] or ""
>          addr.number = result_dict["number"] or ""
>
>          addr.city = result_dict["city"]
>          addr.state = result_dict["state"]
>          addr.zipcode = result_dict["zip"] or "     "
>          if len(addr.zipcode) > 5:
>              addr.zipcode = result_dict["zip"][:5]  # get rid of the
> plus 4, if there
>
>          return addr
>      else:
>          if is_citystatezip(address, addr):
>              return addr
>          else:
>              if is_zipcode(address, addr):
>                  return addr
>
> def normalize_location(location):
>      """Determine if the location is a corner or an address"""
>
>      if re.search(".+" + CORNER_REGEX + ".+", location, re.IGNORECASE|
> re.VERBOSE):
>          return location.upper()
>      else:
>          addr = parse_location(location)
>
>          return str(addr)
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django users" group.
To post to this group, send email to django-users@googlegroups.com
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at 
http://groups.google.com/group/django-users?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to