#! /usr/bin/env python """world -- Print mappings between country names and DNS country codes. Contact: Barry Warsaw Email: barry@python.org Version: %(__version__)s This script will take a list of Internet addresses and print out where in the world those addresses originate from, based on the top-level domain country code found in the address. Addresses can be in any of the following forms: xx -- just the country code or top-level domain identifier host.domain.xx -- any Internet host or network name somebody@where.xx -- an Internet email address If no match is found, the address is interpreted as a regular expression and a reverse lookup is attempted. This script will search the country names and print a list of matching entries. You can force reverse mappings with the `-r' flag (see below). For example: %% world tz us tz originated from Tanzania, United Republic of us originated from United States %% world united united matches 6 countries: ae: United Arab Emirates uk: United Kingdom (common practice) um: United States Minor Outlying Islands us: United States tz: Tanzania, United Republic of gb: United Kingdom Country codes are maintained by the RIPE Network Coordination Centre, in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The authoritative source of country code mappings is: The latest known change to this information was: Monday, 10 October 2006, 17:59:51 UTC 2006 This script also knows about non-geographic top-level domains, and the additional ccTLDs reserved by IANA. Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...] --dump -d Print mapping of all top-level domains. --parse file -p file Parse an iso3166-countrycodes file extracting the two letter country code followed by the country name. Note that the three letter country codes and numbers, which are also provided in the standard format file, are ignored. --outputdict -o When used in conjunction with the `-p' option, output is in the form of a Python dictionary, and country names are normalized w.r.t. capitalization. This makes it appropriate for cutting and pasting back into this file. Output is always to standard out. --reverse -r Force reverse lookup. In this mode the address can be any Python regular expression; this is matched against all country names and a list of matching mappings is printed. In normal mode (e.g. without this flag), reverse lookup is performed on addresses if no matching country code is found. -h --help Print this message. """ __version__ = '$Revision$' import sys import getopt import re PROGRAM = sys.argv[0] def usage(code, msg=''): print(__doc__ % globals()) if msg: print(msg) sys.exit(code) def resolve(rawaddr): parts = rawaddr.split('.') if not len(parts): # no top level domain found, bounce it to the next step return rawaddr addr = parts[-1] if addr in nameorgs: print(rawaddr, 'is in the', nameorgs[addr], 'top level domain') return None elif addr in countries: print(rawaddr, 'originated from', countries[addr]) return None else: # Not resolved, bounce it to the next step return rawaddr def reverse(regexp): matches = [] cre = re.compile(regexp, re.IGNORECASE) for code, country in all.items(): mo = cre.search(country) if mo: matches.append(code) # print results if not matches: # not resolved, bounce it to the next step return regexp if len(matches) == 1: code = matches[0] print(regexp, "matches code `%s', %s" % (code, all[code])) else: print(regexp, 'matches %d countries:' % len(matches)) for code in matches: print(" %s: %s" % (code, all[code])) return None def parse(file, normalize): try: fp = open(file) except IOError as err: errno, msg = err.args print(msg, ':', file) return cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}') scanning = 0 if normalize: print('countries = {') while 1: line = fp.readline() if line == '': break # EOF if scanning: mo = cre.match(line) if not mo: line = line.strip() if not line: continue elif line[0] == '-': break else: print('Could not parse line:', line) continue country, code = mo.group(1, 2) if normalize: words = country.split() for i in range(len(words)): w = words[i] # XXX special cases if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'): words[i] = w.lower() elif w == 'THE' and i != 1: words[i] = w.lower() elif len(w) > 3 and w[1] == "'": words[i] = w[0:3].upper() + w[3:].lower() elif w in ('(U.S.)', 'U.S.'): pass elif w[0] == '(' and w != '(local': words[i] = '(' + w[1:].capitalize() elif w.find('-') != -1: words[i] = '-'.join( [s.capitalize() for s in w.split('-')]) else: words[i] = w.capitalize() code = code.lower() country = ' '.join(words) print(' "%s": "%s",' % (code, country)) else: print(code, country) elif line[0] == '-': scanning = 1 if normalize: print(' }') def main(): help = 0 status = 0 dump = 0 parsefile = None normalize = 0 forcerev = 0 try: opts, args = getopt.getopt( sys.argv[1:], 'p:rohd', ['parse=', 'reverse', 'outputdict', 'help', 'dump']) except getopt.error as msg: usage(1, msg) for opt, arg in opts: if opt in ('-h', '--help'): help = 1 elif opt in ('-d', '--dump'): dump = 1 elif opt in ('-p', '--parse'): parsefile = arg elif opt in ('-o', '--outputdict'): normalize = 1 elif opt in ('-r', '--reverse'): forcerev = 1 if help: usage(status) if dump: print('Official country coded domains:') codes = sorted(countries) for code in codes: print(' %2s:' % code, countries[code]) print('\nOther top-level domains:') codes = sorted(nameorgs) for code in codes: print(' %6s:' % code, nameorgs[code]) elif parsefile: parse(parsefile, normalize) else: if not forcerev: args = filter(None, map(resolve, args)) args = filter(None, map(reverse, args)) for arg in args: print('Where in the world is %s?' % arg) # The mappings nameorgs = { # New top level domains as described by ICANN # http://www.icann.org/tlds/ "aero": "air-transport industry", "asia": "from Asia/for Asia", "arpa": "Arpanet", "biz": "business", "cat": "Catalan community", "com": "commercial", "coop": "cooperatives", "edu": "educational", "gov": "government", "info": "unrestricted `info'", "int": "international", "jobs": "employment-related", "mil": "military", "mobi": "mobile specific", "museum": "museums", "name": "`name' (for registration by individuals)", "net": "networking", "org": "non-commercial", "pro": "professionals", "tel": "business telecommunications", "travel": "travel and tourism", # These additional ccTLDs are included here even though they are not part # of ISO 3166. IANA has a decoding table listing all reserved ccTLDs: # # http://www.iso.org/iso/iso-3166-1_decoding_table # # Note that `uk' is the common practice country code for the United # Kingdom. AFAICT, the official `gb' code is routinely ignored! # # tells me that `uk' was long in use before ISO3166 # was adopted for top-level DNS zone names (although in the reverse order # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a # large-scale renaming process as the UK switched from their old `Coloured # Book' protocols over X.25 to Internet protocols over IP. # # See # # Also, `su', while obsolete is still in limited use. "ac": "Ascension Island", "cp": "Clipperton Island", "dg": "Diego Garcia", "ea": "Ceuta, Melilla", "eu": "European Union", "fx": "Metropolitan France", "ic": "Canary Islands", "ta": "Tristan da Cunha", "uk": "United Kingdom (common practice)", "su": "Soviet Union (still in limited use)", } countries = { "af": "Afghanistan", "ax": "Aland Islands", "al": "Albania", "dz": "Algeria", "as": "American Samoa", "ad": "Andorra", "ao": "Angola", "ai": "Anguilla", "aq": "Antarctica", "ag": "Antigua and Barbuda", "ar": "Argentina", "am": "Armenia", "aw": "Aruba", "au": "Australia", "at": "Austria", "az": "Azerbaijan", "bs": "Bahamas", "bh": "Bahrain", "bd": "Bangladesh", "bb": "Barbados", "by": "Belarus", "be": "Belgium", "bz": "Belize", "bj": "Benin", "bm": "Bermuda", "bt": "Bhutan", "bo": "Bolivia", "ba": "Bosnia and Herzegovina", "bw": "Botswana", "bv": "Bouvet Island", "br": "Brazil", "io": "British Indian Ocean Territory", "bn": "Brunei Darussalam", "bg": "Bulgaria", "bf": "Burkina Faso", "bi": "Burundi", "kh": "Cambodia", "cm": "Cameroon", "ca": "Canada", "cv": "Cape Verde", "ky": "Cayman Islands", "cf": "Central African Republic", "td": "Chad", "cl": "Chile", "cn": "China", "cx": "Christmas Island", "cc": "Cocos (Keeling) Islands", "co": "Colombia", "km": "Comoros", "cg": "Congo", "cd": "Congo, The Democratic Republic of the", "ck": "Cook Islands", "cr": "Costa Rica", "ci": "Cote D'Ivoire", "hr": "Croatia", "cu": "Cuba", "cy": "Cyprus", "cz": "Czech Republic", "dk": "Denmark", "dj": "Djibouti", "dm": "Dominica", "do": "Dominican Republic", "ec": "Ecuador", "eg": "Egypt", "sv": "El Salvador", "gq": "Equatorial Guinea", "er": "Eritrea", "ee": "Estonia", "et": "Ethiopia", "fk": "Falkland Islands (Malvinas)", "fo": "Faroe Islands", "fj": "Fiji", "fi": "Finland", "fr": "France", "gf": "French Guiana", "pf": "French Polynesia", "tf": "French Southern Territories", "ga": "Gabon", "gm": "Gambia", "ge": "Georgia", "de": "Germany", "gh": "Ghana", "gi": "Gibraltar", "gr": "Greece", "gl": "Greenland", "gd": "Grenada", "gp": "Guadeloupe", "gu": "Guam", "gt": "Guatemala", "gg": "Guernsey", "gn": "Guinea", "gw": "Guinea-Bissau", "gy": "Guyana", "ht": "Haiti", "hm": "Heard Island and Mcdonald Islands", "va": "Holy See (Vatican City State)", "hn": "Honduras", "hk": "Hong Kong", "hu": "Hungary", "is": "Iceland", "in": "India", "id": "Indonesia", "ir": "Iran (Islamic Republic of)", "iq": "Iraq", "ie": "Ireland", "im": "Isle of Man", "il": "Israel", "it": "Italy", "jm": "Jamaica", "jp": "Japan", "je": "Jersey", "jo": "Jordan", "kz": "Kazakhstan", "ke": "Kenya", "ki": "Kiribati", "kp": "Korea, Democratic People's Republic of", "kr": "Korea, Republic of", "kw": "Kuwait", "kg": "Kyrgyzstan", "la": "Lao People's Democratic Republic", "lv": "Latvia", "lb": "Lebanon", "ls": "Lesotho", "lr": "Liberia", "ly": "Libyan Arab Jamahiriya", "li": "Liechtenstein", "lt": "Lithuania", "lu": "Luxembourg", "mo": "Macao", "mk": "Macedonia, The Former Yugoslav Republic of", "mg": "Madagascar", "mw": "Malawi", "my": "Malaysia", "mv": "Maldives", "ml": "Mali", "mt": "Malta", "mh": "Marshall Islands", "mq": "Martinique", "mr": "Mauritania", "mu": "Mauritius", "yt": "Mayotte", "mx": "Mexico", "fm": "Micronesia, Federated States of", "md": "Moldova, Republic of", "mc": "Monaco", "mn": "Mongolia", "me": "Montenegro", "ms": "Montserrat", "ma": "Morocco", "mz": "Mozambique", "mm": "Myanmar", "na": "Namibia", "nr": "Nauru", "np": "Nepal", "nl": "Netherlands", "an": "Netherlands Antilles", "nc": "New Caledonia", "nz": "New Zealand", "ni": "Nicaragua", "ne": "Niger", "ng": "Nigeria", "nu": "Niue", "nf": "Norfolk Island", "mp": "Northern Mariana Islands", "no": "Norway", "om": "Oman", "pk": "Pakistan", "pw": "Palau", "ps": "Palestinian Territory, Occupied", "pa": "Panama", "pg": "Papua New Guinea", "py": "Paraguay", "pe": "Peru", "ph": "Philippines", "pn": "Pitcairn", "pl": "Poland", "pt": "Portugal", "pr": "Puerto Rico", "qa": "Qatar", "re": "Reunion", "ro": "Romania", "ru": "Russian Federation", "rw": "Rwanda", "sh": "Saint Helena", "kn": "Saint Kitts and Nevis", "lc": "Saint Lucia", "pm": "Saint Pierre and Miquelon", "vc": "Saint Vincent and the Grenadines", "ws": "Samoa", "sm": "San Marino", "st": "Sao Tome and Principe", "sa": "Saudi Arabia", "sn": "Senegal", "rs": "Serbia", "sc": "Seychelles", "sl": "Sierra Leone", "sg": "Singapore", "sk": "Slovakia", "si": "Slovenia", "sb": "Solomon Islands", "so": "Somalia", "za": "South Africa", "gs": "South Georgia and the South Sandwich Islands", "es": "Spain", "lk": "Sri Lanka", "sd": "Sudan", "sr": "Suriname", "sj": "Svalbard and Jan Mayen", "sh": "St. Helena", "pm": "St. Pierre and Miquelon", "sz": "Swaziland", "se": "Sweden", "ch": "Switzerland", "sy": "Syrian Arab Republic", "tw": "Taiwan, Province of China", "tj": "Tajikistan", "tz": "Tanzania, United Republic of", "th": "Thailand", "tl": "Timor-Leste", "tg": "Togo", "tk": "Tokelau", "to": "Tonga", "tt": "Trinidad and Tobago", "tn": "Tunisia", "tr": "Turkey", "tm": "Turkmenistan", "tc": "Turks and Caicos Islands", "tv": "Tuvalu", "ug": "Uganda", "ua": "Ukraine", "ae": "United Arab Emirates", "gb": "United Kingdom", "us": "United States", "um": "United States Minor Outlying Islands", "uy": "Uruguay", "uz": "Uzbekistan", "vu": "Vanuatu", "va": "Vatican City State (Holy See)", "ve": "Venezuela", "vn": "Viet Nam", "vg": "Virgin Islands (British)", "vi": "Virgin Islands (U.S.)", "wf": "Wallis and Futuna", "eh": "Western Sahara", "ye": "Yemen", "yu": "Yugoslavia", "zm": "Zambia", "zw": "Zimbabwe", } all = nameorgs.copy() all.update(countries) if __name__ == '__main__': main()