PageRenderTime 69ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/DDG/Spice/Zipcode.pm

http://github.com/duckduckgo/zeroclickinfo-spice
Perl | 152 lines | 111 code | 21 blank | 20 comment | 9 complexity | cb54f92170cbd28e78e6a46ef1a42897 MD5 | raw file
Possible License(s): Apache-2.0
  1. package DDG::Spice::Zipcode;
  2. # ABSTRACT: return the location and approximate area of a postal code.
  3. use strict;
  4. use DDG::Spice;
  5. use URI::Escape;
  6. #Attribution
  7. primary_example_queries "19201", "19301 Turkey";
  8. secondary_example_queries "zipcode 19087", "postal code L3P 1T4";
  9. description "zip and postal code maps";
  10. name "Zipcode";
  11. icon_url "/i/mapq.st.ico";
  12. source "MapQuest";
  13. code_url "https://github.com/duckduckgo/zeroclickinfo-spice/blob/master/lib/DDG/Spice/Zipcode.pm";
  14. category "geography";
  15. topics "everyday", "geography", "travel";
  16. attribution github => ["https://github.com/almanackist", "Almanackist"],
  17. twitter => ["https://twitter.com/cajoyce", "Almanackist"];
  18. # Triggers
  19. my $zip_string = qr/(zip|post(al)?)\s*(code)?/;
  20. triggers query_lc => qr/(?:$zip_string|[a-z\d\-\s]{2,15})/;
  21. spice from => '([A-Z0-9\-]+)(?:/([A-Z]+)?)?';
  22. spice to => '"http://where.yahooapis.com/v1/places{{dollar}}and(.q($1,$2),.type(11));count=0?appid={{ENV{DDG_SPICE_ZIPCODE_APIKEY}}}&format=json&callback={{callback}}"';
  23. # Definitions
  24. my %countries = ("afghanistan" => "af", "albania" => "al", "algeria" => "dz", "american samoa" => "as", "andorra" => "ad", "angola" => "ao", "anguilla" => "ai", "antarctica" => "aq", "antigua & barbuda" => "ag", "antigua and barbuda" => "ag", "antigua barbuda" => "ag", "antigua" => "ag", "argentina" => "ar", "armenia" => "am", "aruba" => "aw", "australia" => "au", "austria" => "at", "azerbaijan" => "az", "bahama" => "bs", "bahamas" => "bs", "bahrain" => "bh", "bangladesh" => "bd", "barbados" => "bb", "belarus" => "by", "belgium" => "be", "belize" => "bz", "benin" => "bj", "bermuda" => "bm", "bhutan" => "bt", "bolivia" => "bo", "bosnia and herzegovina" => "ba", "bosnia herzegovina" => "ba", "bosnia" => "ba", "botswana" => "bw", "bouvet island" => "bv", "brazil" => "br", "british indian ocean territory" => "io", "british virgin islands" => "vg", "brunei darussalam" => "bn", "brunei" => "bn", "bulgaria" => "bg", "burkina faso" => "bf", "burma" => "bu", "burundi" => "bi", "cambodia" => "kh", "cameroon" => "cm", "canada" => "ca", "cape verde" => "cv", "cayman islands" => "ky", "central african republic" => "cf", "chad" => "td", "chile" => "cl", "china" => "cn", "christmas island" => "cx", "cocos islands" => "cc", "colombia" => "co", "comoros" => "km", "congo" => "cg", "cook islands" => "ck", "costa rica" => "cr", "cote d'ivoire" => "ci", "cote divoire" => "ci", "croatia" => "hr", "cuba" => "cu", "curacao" => "cw", "curaรงao" => "cw", "cyprus" => "cy", "czech republic" => "cz", "czechoslovakia" => "cs", "democratic republic of congo" => "cg", "democratic yemen" => "yd", "denmark" => "dk", "djibouti" => "dj", "dominica" => "dm", "dominican republic" => "do", "east timor" => "tp", "ecuador" => "ec", "egypt" => "eg", "el salvador" => "sv", "equatorial guinea" => "gq", "eritrea" => "er", "estonia" => "ee", "ethiopia" => "et", "falkland islands" => "fk", "faroe islands" => "fo", "fiji" => "fj", "finland" => "fi", "france metropolitan" => "fx", "france" => "fr", "french guiana" => "gf", "french polynesia" => "pf", "french southern and antarctic territories" => "tf", "gabon" => "ga", "gambia" => "gm", "georgia" => "ge", "germany" => "de", "ghana" => "gh", "gibraltar" => "gi", "great britain" => "gb", "greece" => "gr", "greenland" => "gl", "grenada" => "gd", "guadeloupe" => "gp", "guam" => "gu", "guatemala" => "gt", "guinea bissau" => "gw", "guinea" => "gn", "guinea-bissau" => "gw", "guyana" => "gy", "haiti" => "ht", "heard & mcdonald islands" => "hm", "heard and mcdonald islands" => "hm", "heard mcdonald islands" => "hm", "honduras" => "hn", "hong kong" => "hk", "hungary" => "hu", "iceland" => "is", "india" => "in", "indonesia" => "id", "iran" => "ir", "iraq" => "iq", "ireland" => "ie", "israel" => "il", "italy" => "it", "jamaica" => "jm", "japan" => "jp", "jordan" => "jo", "kazakhstan" => "kz", "kenya" => "ke", "kiribati" => "ki", "korea" => "kp", "korea" => "kr", "kuwait" => "kw", "kyrgyzstan" => "kg", "laos" =>"la", "latvia" => "lv", "lebanon" => "lb", "lesotho" => "ls", "liberia" => "lr", "libya" => "ly", "libyan arab jamahiriya" => "ly", "liechtenstein" => "li", "lithuania" => "lt", "luxembourg" => "lu", "macedonia" => "mk", "macau" => "mo", "madagascar" => "mg", "malawi" => "mw", "malaysia" => "my", "maldives" => "mv", "mali" => "ml", "malta" => "mt", "marshall islands" => "mh", "martinique" => "mq", "mauritania" => "mr", "mauritius" => "mu", "mayotte" => "yt", "mexico" => "mx", "micronesia" => "fm", "moldova" => "md", "monaco" => "mc", "mongolia" => "mn", "monserrat" => "ms", "morocco" => "ma", "mozambique" => "mz", "myanmar" => "mm", "nambia" => "na", "nauru" => "nr", "nepal" => "np", "netherlands antilles" => "an", "netherlands" => "nl", "neutral zone" => "nt", "new caledonia" => "nc", "new zealand" => "nz", "newzealand" => "nz", "nicaragua" => "ni", "niger" => "ne", "nigeria" => "ng", "niue" => "nu", "norfolk island" => "nf", "northern mariana islands" => "mp", "norway" => "no", "oman" => "om", "pakistan" => "pk", "palau" => "pw", "panama" => "pa", "papua new guinea" => "pg", "paraguay" => "py", "peru" => "pe", "philippines" => "ph", "pitcairn" => "pn", "poland" => "pl", "portugal" => "pt", "puerto rico" => "pr", "qatar" => "qa", "reunion" => "re", "romania" => "ro", "russia" => "ru", "russian federation" => "ru", "rwanda" => "rw", "saint helena" => "sh", "saint kitts and nevis" => "kn", "saint kitts nevis" => "kn", "saint kitts" => "kn", "saint lucia" => "lc", "saint pierre miquelon" => "pm", "saint vincent grenadines" => "vc", "saint vincent" => "vc", "samoa" => "ws", "san marino" => "sm", "sao tome & principe" => "st", "sao tome and principe" => "st", "sao tome principe" => "st", "saudi arabia" => "sa", "senegal" => "sn", "seychelles" => "sc", "sierra leone" => "sl", "singapore" => "sg", "slovakia" => "sk", "slovenia" => "si", "solomon islands" => "sb", "somalia" => "so", "south africa" => "za", "south georgia and the south sandwich islands" => "gs", "south georgia" => "gs", "south sandwich islands" => "gs", "spain" => "es", "sri lanka" => "lk", "srilanka" => "lk", "st helena" => "sh", "st kitts and nevis" => "kn", "st kitts nevis" => "kn", "st kitts" => "kn", "st pierre & miquelon" => "pm", "st pierre and miquelon" => "pm", "st pierre miquelon" => "pm", "st vincent & the grenadines" => "vc", "st vincent and the grenadines" => "vc", "st vincent grenadines" => "vc", "st vincent" => "vc", "sudan" => "sd", "suriname" => "sr", "svalbard & jan mayen islands" => "sj", "svalbard and jan mayen islands" => "sj", "svalbard jan mayen islands" => "sj", "swaziland" => "sz", "sweden" => "se", "switzerland" => "ch", "syria" => "sy", "syrian arab republic" => "sy", "taiwan province of china" => "tw", "taiwan" => "tw", "tajikistan" => "tj", "tanzania" => "tz", "thailand" => "th", "togo" => "tg", "tokelau" => "tk", "tonga" => "to", "trinidad & tobago" => "tt", "trinidad and tobago" => "tt", "trinidad tobago" => "tt", "tunisia" => "tn", "turkey" => "tr", "turkmenistan" => "tm", "turks & caicos islands" => "tc", "turks and caicos islands" => "tc", "turks caicos islands" => "tc", "turks caicos" => "tc", "tuvalu" => "tv", "uganda" => "ug", "ukraine" => "ua", "united arab emirates" => "ae", "united kingdom" => "gb", "united republic of tanzania" => "tz", "united states minor outlying islands" => "um", "united states of america" => "us", "united states virgin islands" => "vi", "united states" => "us", "uruguay" => "uy", "us minor outlying islands" => "um", "us of a" => "us", "us virgin islands" => "vi", "us" => "us", "usa" => "us", "uzbekistan" => "uz", "vanuatu" => "vu", "vatican city" => "va", "vatican" => "va", "venezuela" => "ve", "viet nam" => "vn", "vietnam" => "vn", "virgin islands" => "vg", "wallis & futuna islands" => "wf", "wallis and futuna islands" => "wf", "wallis futuna islands" => "wf", "western sahara" => "eh", "yemen" => "ye", "yugoslavia" => "yu", "zaire" => "zr", "zambia" => "zm", "zimbabwe" => "zw");
  25. my @names = reverse keys %countries;
  26. # Handle statement
  27. handle query_lc => sub {
  28. my ($country, $code);
  29. # Check for and remove trigger words
  30. s/\s*$zip_string\s*//g;
  31. # Check to make sure trigger wasn't the entire query
  32. return if ($_ eq '');
  33. # Check for country name in query
  34. foreach my $name (@names){
  35. if (m/\b$name\b/){
  36. $country = $name;
  37. s/\s*$name\s*//g;
  38. last;
  39. }
  40. }
  41. # Regexs to disqualify:
  42. # Check for presence of a digit unless matches edge case
  43. unless (m/\d/ or m/^\b[a-z]{2}\s*[a-z]{2}\b$/) {
  44. return;
  45. }
  46. # Check for too many digits
  47. if (m/\d{8,}$/) {
  48. return;
  49. }
  50. # Check for too many letters
  51. if (m/[a-z]{5,}$/ and $_ !~ m/(\d\d [a-z\d\-]+ \d\d)/x ) {
  52. return;
  53. }
  54. # Check for known false triggers
  55. # (more can be added...)
  56. if (m/\b
  57. (
  58. 4 \s* chan
  59. | \d+ \s* (st|nd|rd|th)
  60. | mp(3|4)
  61. | (19|20)\d{2} \s+ v(4|6|8|12)
  62. | calend(a|e)r
  63. )
  64. \b
  65. /x) {
  66. return;
  67. }
  68. # Check if postal code matches any known patterns
  69. if (m/^
  70. (
  71. [a-z]{2} \s* [a-z]{2} \b
  72. | \d{2} [a-z]+ [a-z\s]+ \d{2}
  73. | \d{3} \- \d{3} \- \d
  74. | \d{6} \s* \d{3} \- \d{3}
  75. )
  76. $/x) {
  77. $code = $1;
  78. }
  79. # Alphanumeric postal code patterns with dashes in them
  80. elsif (m/^ ( (?: \d{1,4} | [a-z]{2} ) \d\-\d\d \d{1,4} ) $/x) {
  81. $code = $1;
  82. }
  83. # Alphanumeric postal code patterns with spaces in them
  84. # Alphanumeric postal code edgecases
  85. elsif (m/^
  86. (
  87. \d{4} \s* \d{3,4}
  88. | \d{4} \s* [a-z]{1,2}
  89. | [a-z] \d [a-z] \s* \d [a-z] \d
  90. | [a-z]{3} \s* \d{3}
  91. | [a-z]{2} \s* \d{2}
  92. | \d{3} \s* \d{2,3}
  93. | (?:
  94. [a-z]{2} \d ( [a-z] | \d )
  95. | [a-z] (?: \d{2} | \d[a-z] | [a-z]\d )
  96. | [a-z]\d
  97. ) \s* \d [a-z]{2}
  98. )
  99. $/x) {
  100. $code = $1;
  101. }
  102. # Alphanumeric postal code patterns without spaces in them
  103. elsif (m/^
  104. (
  105. [a-z]{1,3} \d{3,6}
  106. | [a-z]{4} \d [a-z]{2}
  107. | [a-z]{5} \d{2}
  108. )
  109. $/x) {
  110. $code = $1;
  111. }
  112. # Numeric postal code patterns (optional dash for searches like "19301-")
  113. elsif (m/^ ( \d{2,7} ) \-? $/x) {
  114. $code = $1;
  115. }
  116. # No postal code found
  117. else {
  118. return;
  119. };
  120. # remove spaces from postal code
  121. $code =~ s/ //g;
  122. if (defined $country and defined $code) {
  123. return uri_escape(uc($code)), uc $countries{$country};
  124. }
  125. # No country given, default to ZZ for global search
  126. return (uc $code, 'ZZ') if defined $code;
  127. };
  128. 1;