Module Name: src Committed By: jmcneill Date: Sat Jan 28 13:12:16 UTC 2023
Modified Files: src/share/misc: nanpa.awk nanpa.sed Log Message: Catch up to 20 years of HTML and URL changes. To generate a diff of this commit: cvs rdiff -u -r1.2 -r1.3 src/share/misc/nanpa.awk src/share/misc/nanpa.sed Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/share/misc/nanpa.awk diff -u src/share/misc/nanpa.awk:1.2 src/share/misc/nanpa.awk:1.3 --- src/share/misc/nanpa.awk:1.2 Thu Mar 13 02:55:01 2003 +++ src/share/misc/nanpa.awk Sat Jan 28 13:12:16 2023 @@ -1,11 +1,49 @@ -# $NetBSD: nanpa.awk,v 1.2 2003/03/13 02:55:01 jhawk Exp $ +# $NetBSD: nanpa.awk,v 1.3 2023/01/28 13:12:16 jmcneill Exp $ # # todo: -# parse "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa? -# function=list_npa_introduced" to produce parenthetical -# notes about what area codes are overlayed by others -# (or split from). +# parse "https://nationalnanpa.com/nanp1/npa_report.csv" +# instead of scraping HTML. # +function trim(s) +{ + gsub(/^[ \t]+|[ \t]+$/, "", s); + return s; +} +function mapinit(postdb) +{ + while ((getline < postdb) > 0) { + sub(/#.*/, ""); + if (length($0)==0) continue; + NF=split($0, f); + location[f[1]] = f[2]; + flocation[tolower(f[2])] = f[2]; + country[f[1]] = f[4]; + fcountry[tolower(f[2])] = f[4]; + } +} +function countrymap(s) +{ + if (s == "CA") return "Canada"; + if (s == "US") return "USA"; + return s; +} +function locationmap(s, t) +{ + if (s in location) { + t = location[s]; + if (s in country) { + t = t " (" countrymap(country[s]) ")"; + } + } else if (tolower(s) in flocation) { + t = flocation[tolower(s)]; + if (tolower(s) in fcountry) { + t = t " (" countrymap(fcountry[tolower(s)]) ")"; + } + } else { + t = s; + } + return t; +} function parse(file, ispipe, isplanning, i, planinit, t) { planinit = 0; @@ -13,30 +51,30 @@ function parse(file, ispipe, isplanning, sub(/#.*/, ""); if (length($0)==0) continue; if (isplanning) { - split($0, f); - if (!planinit && f[2]=="NEW NPA") { + NF=split($0, f); + if (!planinit && f[2]=="New NPA") { planinit=1; for (i=1; i<=NF; i++) - fnames[$i]=i-1; - } else if (planinit && length(f[fnames["NEW NPA"]])>1) { - t = f[fnames["LOCATION"]] FS; - if (f[fnames["OVERLAY?"]]=="Yes") - t = t "Overlay of " f[fnames["OLD NPA"]]; - else if (f[fnames["OLD NPA"]]) - t = t "Split of " f[fnames["OLD NPA"]]; - if (f[fnames["STATUS"]]) - t = t " (" f[fnames["STATUS"]] ")"; - if (length(f[fnames["IN SERVICE DATE"]]) > 1) + fnames[f[i]]=i-1; + } else if (planinit && length(f[fnames["New NPA"]])>1) { + t = locationmap(trim(f[fnames["Location"]])) FS; + if (trim(f[fnames["Overlay?"]])=="Yes") + t = t "Overlay of " trim(f[fnames["Old NPA"]]); + else if (f[fnames["Old NPA"]]) + t = t "Split of " trim(f[fnames["Old NPA"]]); + if (f[fnames["Status"]]) + t = t " (" trim(f[fnames["Status"]]) ")"; + if (length(f[fnames["In Service Date"]]) > 1) t = t " effective " \ - f[fnames["IN SERVICE DATE"]]; - data[f[fnames["NEW NPA"]] "*"] = t; + trim(f[fnames["In Service Date"]]); + data[trim(f[fnames["New NPA"]]) "*"] = t; } } else { # digits only match($0, /^[0-9]/); if (RSTART==0) continue; i=index($0, FS); - data[substr($0, 1, i-1)]=substr($0,i+1); + data[substr($0, 1, i-1)]=locationmap(trim(substr($0,i+1))); } } close(file); @@ -44,8 +82,9 @@ function parse(file, ispipe, isplanning, BEGIN{ FS=":" + mapinit("na.postal"); print "# $""NetBSD: $"; - print "# Generated from http://www.nanpa.com/area_codes/index.html"; + print "# Generated from https://nationalnanpa.com/area_codes/index.html"; print "# (with local exceptions)"; print "# "; print "# format:"; @@ -54,14 +93,14 @@ BEGIN{ print "# A * in the Area Code field indicates a future area code." print "# "; parse("ftp -o - " \ - "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \ - "function=list_npa_geo_number | sed -f nanpa.sed", 1, 0); + "https://nationalnanpa.com/enas/geoAreaCodeNumberReport.do" \ + " | sed -f nanpa.sed", 1, 0); parse("ftp -o - " \ - "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \ - "function=list_npa_non_geo | sed -f nanpa.sed", 1, 0); + "https://nationalnanpa.com/enas/nonGeoNpaServiceReport.do" \ + " | sed -f nanpa.sed", 1, 0); parse("ftp -o - " \ - "http://docs.nanpa.com/cgi-bin/npa_reports/nanpa\\?" \ - "function=list_npa_not_in_service | sed -f nanpa.sed", 1, 1); + "https://nationalnanpa.com/enas/plannedNpasNotInServiceReport.do" \ + " | sed -f nanpa.sed", 1, 1); parse("na.phone.add", 0, 0); sort="sort -n"; for (i in data) Index: src/share/misc/nanpa.sed diff -u src/share/misc/nanpa.sed:1.2 src/share/misc/nanpa.sed:1.3 --- src/share/misc/nanpa.sed:1.2 Mon Dec 25 18:39:48 2006 +++ src/share/misc/nanpa.sed Sat Jan 28 13:12:16 2023 @@ -1,4 +1,4 @@ -# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $ +# $NetBSD: nanpa.sed,v 1.3 2023/01/28 13:12:16 jmcneill Exp $ # # Parse HTML tables output by # http://docs.nanpa.com/cgi-bin/npa_reports/nanpa @@ -34,7 +34,7 @@ s/\$$// # Remove lines not starting with <TR> /<[Tt][Rr][^>]*>/!d # Replace all <TD> with colon -s/[ ]*<TD[^>]*> */:/g +s/[ ]*<[Tt][Dd][^>]*> */:/g # Strip all HTML tags s/<[^>]*>//g # Handle HTML characters @@ -42,7 +42,9 @@ s/ / /g # Compress spaces/tabs s/[ ][ ]*/ /g # Strip leading colons -s/^:// +s/:// # Strip leading/trailing whitespace -s/^ // +s/ *// s/ $// +# Strip HTML comments +s/^--.*$//