/Tools/world/world

http://unladen-swallow.googlecode.com/ · #! · 551 lines · 504 code · 47 blank · 0 comment · 0 complexity · 2fbdaab6506fd2765e721aeda96b1c58 MD5 · raw file

  1. #! /usr/bin/env python
  2. """world -- Print mappings between country names and DNS country codes.
  3. Contact: Barry Warsaw
  4. Email: barry@python.org
  5. Version: %(__version__)s
  6. This script will take a list of Internet addresses and print out where in the
  7. world those addresses originate from, based on the top-level domain country
  8. code found in the address. Addresses can be in any of the following forms:
  9. xx -- just the country code or top-level domain identifier
  10. host.domain.xx -- any Internet host or network name
  11. somebody@where.xx -- an Internet email address
  12. If no match is found, the address is interpreted as a regular expression and a
  13. reverse lookup is attempted. This script will search the country names and
  14. print a list of matching entries. You can force reverse mappings with the
  15. `-r' flag (see below).
  16. For example:
  17. %% world tz us
  18. tz originated from Tanzania, United Republic of
  19. us originated from United States
  20. %% world united
  21. united matches 6 countries:
  22. ae: United Arab Emirates
  23. uk: United Kingdom (common practice)
  24. um: United States Minor Outlying Islands
  25. us: United States
  26. tz: Tanzania, United Republic of
  27. gb: United Kingdom
  28. Country codes are maintained by the RIPE Network Coordination Centre,
  29. in coordination with the ISO 3166 Maintenance Agency at DIN Berlin. The
  30. authoritative source of country code mappings is:
  31. <url:ftp://ftp.ripe.net/iso3166-countrycodes.txt>
  32. The latest known change to this information was:
  33. Friday, 5 April 2002, 12.00 CET 2002
  34. This script also knows about non-geographic top-level domains, and the
  35. additional ccTLDs reserved by IANA.
  36. Usage: %(PROGRAM)s [-d] [-p file] [-o] [-h] addr [addr ...]
  37. --dump
  38. -d
  39. Print mapping of all top-level domains.
  40. --parse file
  41. -p file
  42. Parse an iso3166-countrycodes file extracting the two letter country
  43. code followed by the country name. Note that the three letter country
  44. codes and numbers, which are also provided in the standard format
  45. file, are ignored.
  46. --outputdict
  47. -o
  48. When used in conjunction with the `-p' option, output is in the form
  49. of a Python dictionary, and country names are normalized
  50. w.r.t. capitalization. This makes it appropriate for cutting and
  51. pasting back into this file. Output is always to standard out.
  52. --reverse
  53. -r
  54. Force reverse lookup. In this mode the address can be any Python
  55. regular expression; this is matched against all country names and a
  56. list of matching mappings is printed. In normal mode (e.g. without
  57. this flag), reverse lookup is performed on addresses if no matching
  58. country code is found.
  59. -h
  60. --help
  61. Print this message.
  62. """
  63. __version__ = '$Revision: 27624 $'
  64. import sys
  65. import getopt
  66. import re
  67. PROGRAM = sys.argv[0]
  68. def usage(code, msg=''):
  69. print __doc__ % globals()
  70. if msg:
  71. print msg
  72. sys.exit(code)
  73. def resolve(rawaddr):
  74. parts = rawaddr.split('.')
  75. if not len(parts):
  76. # no top level domain found, bounce it to the next step
  77. return rawaddr
  78. addr = parts[-1]
  79. if nameorgs.has_key(addr):
  80. print rawaddr, 'is in the', nameorgs[addr], 'top level domain'
  81. return None
  82. elif countries.has_key(addr):
  83. print rawaddr, 'originated from', countries[addr]
  84. return None
  85. else:
  86. # Not resolved, bounce it to the next step
  87. return rawaddr
  88. def reverse(regexp):
  89. matches = []
  90. cre = re.compile(regexp, re.IGNORECASE)
  91. for code, country in all.items():
  92. mo = cre.search(country)
  93. if mo:
  94. matches.append(code)
  95. # print results
  96. if not matches:
  97. # not resolved, bounce it to the next step
  98. return regexp
  99. if len(matches) == 1:
  100. code = matches[0]
  101. print regexp, "matches code `%s', %s" % (code, all[code])
  102. else:
  103. print regexp, 'matches %d countries:' % len(matches)
  104. for code in matches:
  105. print " %s: %s" % (code, all[code])
  106. return None
  107. def parse(file, normalize):
  108. try:
  109. fp = open(file)
  110. except IOError, (err, msg):
  111. print msg, ':', file
  112. cre = re.compile('(.*?)[ \t]+([A-Z]{2})[ \t]+[A-Z]{3}[ \t]+[0-9]{3}')
  113. scanning = 0
  114. if normalize:
  115. print 'countries = {'
  116. while 1:
  117. line = fp.readline()
  118. if line == '':
  119. break # EOF
  120. if scanning:
  121. mo = cre.match(line)
  122. if not mo:
  123. line = line.strip()
  124. if not line:
  125. continue
  126. elif line[0] == '-':
  127. break
  128. else:
  129. print 'Could not parse line:', line
  130. continue
  131. country, code = mo.group(1, 2)
  132. if normalize:
  133. words = country.split()
  134. for i in range(len(words)):
  135. w = words[i]
  136. # XXX special cases
  137. if w in ('AND', 'OF', 'OF)', 'name:', 'METROPOLITAN'):
  138. words[i] = w.lower()
  139. elif w == 'THE' and i <> 1:
  140. words[i] = w.lower()
  141. elif len(w) > 3 and w[1] == "'":
  142. words[i] = w[0:3].upper() + w[3:].lower()
  143. elif w in ('(U.S.)', 'U.S.'):
  144. pass
  145. elif w[0] == '(' and w <> '(local':
  146. words[i] = '(' + w[1:].capitalize()
  147. elif w.find('-') <> -1:
  148. words[i] = '-'.join(
  149. [s.capitalize() for s in w.split('-')])
  150. else:
  151. words[i] = w.capitalize()
  152. code = code.lower()
  153. country = ' '.join(words)
  154. print ' "%s": "%s",' % (code, country)
  155. else:
  156. print code, country
  157. elif line[0] == '-':
  158. scanning = 1
  159. if normalize:
  160. print ' }'
  161. def main():
  162. help = 0
  163. status = 0
  164. dump = 0
  165. parsefile = None
  166. normalize = 0
  167. forcerev = 0
  168. try:
  169. opts, args = getopt.getopt(
  170. sys.argv[1:],
  171. 'p:rohd',
  172. ['parse=', 'reverse', 'outputdict', 'help', 'dump'])
  173. except getopt.error, msg:
  174. usage(1, msg)
  175. for opt, arg in opts:
  176. if opt in ('-h', '--help'):
  177. help = 1
  178. elif opt in ('-d', '--dump'):
  179. dump = 1
  180. elif opt in ('-p', '--parse'):
  181. parsefile = arg
  182. elif opt in ('-o', '--outputdict'):
  183. normalize = 1
  184. elif opt in ('-r', '--reverse'):
  185. forcerev = 1
  186. if help:
  187. usage(status)
  188. if dump:
  189. print 'Non-geographic domains:'
  190. codes = nameorgs.keys()
  191. codes.sort()
  192. for code in codes:
  193. print ' %4s:' % code, nameorgs[code]
  194. print '\nCountry coded domains:'
  195. codes = countries.keys()
  196. codes.sort()
  197. for code in codes:
  198. print ' %2s:' % code, countries[code]
  199. elif parsefile:
  200. parse(parsefile, normalize)
  201. else:
  202. if not forcerev:
  203. args = filter(None, map(resolve, args))
  204. args = filter(None, map(reverse, args))
  205. for arg in args:
  206. print 'Where in the world is %s?' % arg
  207. # The mappings
  208. nameorgs = {
  209. # New top level domains as described by ICANN
  210. # http://www.icann.org/tlds/
  211. "aero": "air-transport industry",
  212. "arpa": "Arpanet",
  213. "biz": "business",
  214. "com": "commercial",
  215. "coop": "cooperatives",
  216. "edu": "educational",
  217. "gov": "government",
  218. "info": "unrestricted `info'",
  219. "int": "international",
  220. "mil": "military",
  221. "museum": "museums",
  222. "name": "`name' (for registration by individuals)",
  223. "net": "networking",
  224. "org": "non-commercial",
  225. "pro": "professionals",
  226. # These additional ccTLDs are included here even though they are not part
  227. # of ISO 3166. IANA has 5 reserved ccTLDs as described here:
  228. #
  229. # http://www.iso.org/iso/en/prods-services/iso3166ma/04background-on-iso-3166/iso3166-1-and-ccTLDs.html
  230. #
  231. # but I can't find an official list anywhere.
  232. #
  233. # Note that `uk' is the common practice country code for the United
  234. # Kingdom. AFAICT, the official `gb' code is routinely ignored!
  235. #
  236. # <D.M.Pick@qmw.ac.uk> tells me that `uk' was long in use before ISO3166
  237. # was adopted for top-level DNS zone names (although in the reverse order
  238. # like uk.ac.qmw) and was carried forward (with the reversal) to avoid a
  239. # large-scale renaming process as the UK switched from their old `Coloured
  240. # Book' protocols over X.25 to Internet protocols over IP.
  241. #
  242. # See <url:ftp://ftp.ripe.net/ripe/docs/ripe-159.txt>
  243. #
  244. # Also, `su', while obsolete is still in limited use.
  245. "ac": "Ascension Island",
  246. "gg": "Guernsey",
  247. "im": "Isle of Man",
  248. "je": "Jersey",
  249. "uk": "United Kingdom (common practice)",
  250. "su": "Soviet Union (still in limited use)",
  251. }
  252. countries = {
  253. "af": "Afghanistan",
  254. "al": "Albania",
  255. "dz": "Algeria",
  256. "as": "American Samoa",
  257. "ad": "Andorra",
  258. "ao": "Angola",
  259. "ai": "Anguilla",
  260. "aq": "Antarctica",
  261. "ag": "Antigua and Barbuda",
  262. "ar": "Argentina",
  263. "am": "Armenia",
  264. "aw": "Aruba",
  265. "au": "Australia",
  266. "at": "Austria",
  267. "az": "Azerbaijan",
  268. "bs": "Bahamas",
  269. "bh": "Bahrain",
  270. "bd": "Bangladesh",
  271. "bb": "Barbados",
  272. "by": "Belarus",
  273. "be": "Belgium",
  274. "bz": "Belize",
  275. "bj": "Benin",
  276. "bm": "Bermuda",
  277. "bt": "Bhutan",
  278. "bo": "Bolivia",
  279. "ba": "Bosnia and Herzegowina",
  280. "bw": "Botswana",
  281. "bv": "Bouvet Island",
  282. "br": "Brazil",
  283. "io": "British Indian Ocean Territory",
  284. "bn": "Brunei Darussalam",
  285. "bg": "Bulgaria",
  286. "bf": "Burkina Faso",
  287. "bi": "Burundi",
  288. "kh": "Cambodia",
  289. "cm": "Cameroon",
  290. "ca": "Canada",
  291. "cv": "Cape Verde",
  292. "ky": "Cayman Islands",
  293. "cf": "Central African Republic",
  294. "td": "Chad",
  295. "cl": "Chile",
  296. "cn": "China",
  297. "cx": "Christmas Island",
  298. "cc": "Cocos (Keeling) Islands",
  299. "co": "Colombia",
  300. "km": "Comoros",
  301. "cg": "Congo",
  302. "cd": "Congo, The Democratic Republic of the",
  303. "ck": "Cook Islands",
  304. "cr": "Costa Rica",
  305. "ci": "Cote D'Ivoire",
  306. "hr": "Croatia",
  307. "cu": "Cuba",
  308. "cy": "Cyprus",
  309. "cz": "Czech Republic",
  310. "dk": "Denmark",
  311. "dj": "Djibouti",
  312. "dm": "Dominica",
  313. "do": "Dominican Republic",
  314. "tp": "East Timor",
  315. "ec": "Ecuador",
  316. "eg": "Egypt",
  317. "sv": "El Salvador",
  318. "gq": "Equatorial Guinea",
  319. "er": "Eritrea",
  320. "ee": "Estonia",
  321. "et": "Ethiopia",
  322. "fk": "Falkland Islands (Malvinas)",
  323. "fo": "Faroe Islands",
  324. "fj": "Fiji",
  325. "fi": "Finland",
  326. "fr": "France",
  327. "gf": "French Guiana",
  328. "pf": "French Polynesia",
  329. "tf": "French Southern Territories",
  330. "ga": "Gabon",
  331. "gm": "Gambia",
  332. "ge": "Georgia",
  333. "de": "Germany",
  334. "gh": "Ghana",
  335. "gi": "Gibraltar",
  336. "gr": "Greece",
  337. "gl": "Greenland",
  338. "gd": "Grenada",
  339. "gp": "Guadeloupe",
  340. "gu": "Guam",
  341. "gt": "Guatemala",
  342. "gn": "Guinea",
  343. "gw": "Guinea-Bissau",
  344. "gy": "Guyana",
  345. "ht": "Haiti",
  346. "hm": "Heard Island and Mcdonald Islands",
  347. "va": "Holy See (Vatican City State)",
  348. "hn": "Honduras",
  349. "hk": "Hong Kong",
  350. "hu": "Hungary",
  351. "is": "Iceland",
  352. "in": "India",
  353. "id": "Indonesia",
  354. "ir": "Iran, Islamic Republic of",
  355. "iq": "Iraq",
  356. "ie": "Ireland",
  357. "il": "Israel",
  358. "it": "Italy",
  359. "jm": "Jamaica",
  360. "jp": "Japan",
  361. "jo": "Jordan",
  362. "kz": "Kazakstan",
  363. "ke": "Kenya",
  364. "ki": "Kiribati",
  365. "kp": "Korea, Democratic People's Republic of",
  366. "kr": "Korea, Republic of",
  367. "kw": "Kuwait",
  368. "kg": "Kyrgyzstan",
  369. "la": "Lao People's Democratic Republic",
  370. "lv": "Latvia",
  371. "lb": "Lebanon",
  372. "ls": "Lesotho",
  373. "lr": "Liberia",
  374. "ly": "Libyan Arab Jamahiriya",
  375. "li": "Liechtenstein",
  376. "lt": "Lithuania",
  377. "lu": "Luxembourg",
  378. "mo": "Macau",
  379. "mk": "Macedonia, The Former Yugoslav Republic of",
  380. "mg": "Madagascar",
  381. "mw": "Malawi",
  382. "my": "Malaysia",
  383. "mv": "Maldives",
  384. "ml": "Mali",
  385. "mt": "Malta",
  386. "mh": "Marshall Islands",
  387. "mq": "Martinique",
  388. "mr": "Mauritania",
  389. "mu": "Mauritius",
  390. "yt": "Mayotte",
  391. "mx": "Mexico",
  392. "fm": "Micronesia, Federated States of",
  393. "md": "Moldova, Republic of",
  394. "mc": "Monaco",
  395. "mn": "Mongolia",
  396. "ms": "Montserrat",
  397. "ma": "Morocco",
  398. "mz": "Mozambique",
  399. "mm": "Myanmar",
  400. "na": "Namibia",
  401. "nr": "Nauru",
  402. "np": "Nepal",
  403. "nl": "Netherlands",
  404. "an": "Netherlands Antilles",
  405. "nc": "New Caledonia",
  406. "nz": "New Zealand",
  407. "ni": "Nicaragua",
  408. "ne": "Niger",
  409. "ng": "Nigeria",
  410. "nu": "Niue",
  411. "nf": "Norfolk Island",
  412. "mp": "Northern Mariana Islands",
  413. "no": "Norway",
  414. "om": "Oman",
  415. "pk": "Pakistan",
  416. "pw": "Palau",
  417. "ps": "Palestinian Territory, Occupied",
  418. "pa": "Panama",
  419. "pg": "Papua New Guinea",
  420. "py": "Paraguay",
  421. "pe": "Peru",
  422. "ph": "Philippines",
  423. "pn": "Pitcairn",
  424. "pl": "Poland",
  425. "pt": "Portugal",
  426. "pr": "Puerto Rico",
  427. "qa": "Qatar",
  428. "re": "Reunion",
  429. "ro": "Romania",
  430. "ru": "Russian Federation",
  431. "rw": "Rwanda",
  432. "sh": "Saint Helena",
  433. "kn": "Saint Kitts and Nevis",
  434. "lc": "Saint Lucia",
  435. "pm": "Saint Pierre and Miquelon",
  436. "vc": "Saint Vincent and the Grenadines",
  437. "ws": "Samoa",
  438. "sm": "San Marino",
  439. "st": "Sao Tome and Principe",
  440. "sa": "Saudi Arabia",
  441. "sn": "Senegal",
  442. "sc": "Seychelles",
  443. "sl": "Sierra Leone",
  444. "sg": "Singapore",
  445. "sk": "Slovakia",
  446. "si": "Slovenia",
  447. "sb": "Solomon Islands",
  448. "so": "Somalia",
  449. "za": "South Africa",
  450. "gs": "South Georgia and the South Sandwich Islands",
  451. "es": "Spain",
  452. "lk": "Sri Lanka",
  453. "sd": "Sudan",
  454. "sr": "Suriname",
  455. "sj": "Svalbard and Jan Mayen",
  456. "sz": "Swaziland",
  457. "se": "Sweden",
  458. "ch": "Switzerland",
  459. "sy": "Syrian Arab Republic",
  460. "tw": "Taiwan, Province of China",
  461. "tj": "Tajikistan",
  462. "tz": "Tanzania, United Republic of",
  463. "th": "Thailand",
  464. "tg": "Togo",
  465. "tk": "Tokelau",
  466. "to": "Tonga",
  467. "tt": "Trinidad and Tobago",
  468. "tn": "Tunisia",
  469. "tr": "Turkey",
  470. "tm": "Turkmenistan",
  471. "tc": "Turks and Caicos Islands",
  472. "tv": "Tuvalu",
  473. "ug": "Uganda",
  474. "ua": "Ukraine",
  475. "ae": "United Arab Emirates",
  476. "gb": "United Kingdom",
  477. "us": "United States",
  478. "um": "United States Minor Outlying Islands",
  479. "uy": "Uruguay",
  480. "uz": "Uzbekistan",
  481. "vu": "Vanuatu",
  482. "ve": "Venezuela",
  483. "vn": "Viet Nam",
  484. "vg": "Virgin Islands, British",
  485. "vi": "Virgin Islands, U.S.",
  486. "wf": "Wallis and Futuna",
  487. "eh": "Western Sahara",
  488. "ye": "Yemen",
  489. "yu": "Yugoslavia",
  490. "zm": "Zambia",
  491. "zw": "Zimbabwe",
  492. }
  493. all = nameorgs.copy()
  494. all.update(countries)
  495. if __name__ == '__main__':
  496. main()