PageRenderTime 126ms CodeModel.GetById 20ms RepoModel.GetById 3ms app.codeStats 0ms

/lib/DDG/Spice/PackageTracking.pm

http://github.com/duckduckgo/zeroclickinfo-spice
Perl | 306 lines | 205 code | 52 blank | 49 comment | 28 complexity | ab7fbd0768fa5e5eb43dcc8fad03dc48 MD5 | raw file
Possible License(s): Apache-2.0
  1. package DDG::Spice::PackageTracking;
  2. use strict;
  3. use DDG::Spice;
  4. use Text::Trim;
  5. use List::Util qw(uniq);
  6. use YAML::XS 'LoadFile';
  7. spice is_cached => 1;
  8. spice proxy_cache_valid => "200 1m";
  9. spice wrap_jsonp_callback => 1;
  10. spice from => '([^/]+)/?.+?$';
  11. spice to => 'https://api.packagetrackr.com/ddg/v1/track/simple?n=$1&api_key={{ENV{DDG_SPICE_PACKAGETRACKR_API_KEY}}}';
  12. spice upstream_timeouts => +{ connect => '100ms',
  13. send => '100ms',
  14. read => '500ms' };
  15. my @carriers = sort { length $b <=> length $a } @{LoadFile(share('carriers.yml'))};
  16. my $triggers_re = qr/(package|parcel)|track(ing)?( num(ber)?)?|shipping status/i;
  17. my $carriers_re = join "|", @carriers;
  18. # allow carrier names without spaces (e.g royal mail OR royalmail)
  19. $carriers_re =~ s/ /\\s*/g;
  20. my $strip_re = qr/\b(?:$carriers_re|$triggers_re)\b/i;
  21. ### Regex triggers for queries containing carrier names
  22. ### or words related to pacakge tracking
  23. # Carrier names
  24. triggers query_lc => qr/\b(?:$carriers_re)\b/i;
  25. # Package words
  26. triggers query_lc => qr/^$triggers_re .+|.+ $triggers_re$/i;
  27. ### Regex triggers for queries only containing a tracking number
  28. my %patterns_re = (
  29. ## UPS
  30. # Soure: https://www.ups.com/content/ca/en/tracking/help/tracking/tnh.html
  31. # To Do: Some additional formats exist
  32. ups => qr/^
  33. (?:
  34. 1Z[0-9A-Z]{16} |
  35. \d{9} |
  36. \d{12} |
  37. T\d{10}
  38. )
  39. $/xi,
  40. ## Fedex
  41. # Source: https://www.trackingex.com/fedex-tracking.html
  42. # https://www.trackingex.com/fedexuk-tracking.html
  43. # https://www.trackingex.com/fedex-poland-domestic-tracking.html
  44. fedex => qr/^
  45. \d{12,22}
  46. $/xi,
  47. ## USPS
  48. # Source: https://tools.usps.com/go/TrackConfirmAction!input.action
  49. usps => qr/^
  50. (?:
  51. (94001|92055|94073|93033|92701|92088|92021)\d{17} |
  52. 82\d{8} |
  53. [A-Z]{2}\d{9}US
  54. )
  55. $/xi,
  56. ## Parcelforce
  57. # Source: http://www.parcelforce.com/help-and-advice/sending-worldwide/tracking-number-formats
  58. # Note: May need to restrict pattern #3 if overtriggering
  59. # https://github.com/duckduckgo/zeroclickinfo-goodies/issues/3900
  60. parcelforce => qr/^
  61. (?:
  62. [A-Z]{2}\d{7} |
  63. [A-Z]{4}\d{10} |
  64. [A-Z]{2}\d{9}[A-Z]{2} |
  65. \d{12}
  66. )
  67. $/xi,
  68. ## CanadaPost
  69. # Source: https://www.canadapost.ca/web/en/kb/details.page?article=learn_about_tracking&cattype=kb&cat=receiving&subcat=tracking
  70. canadapost => qr/^
  71. (?:
  72. [\d]{12} |
  73. [\d]{16} |
  74. [A-Z]{2}\d{9}CA
  75. )
  76. $/xi,
  77. ## DHL
  78. dhl => qr/^
  79. (?:
  80. \d{10} |
  81. \[a-zA-Z]{5}\d{10} |
  82. \[a-zA-Z]{3}\d{20}
  83. )
  84. $/xi,
  85. ##HKDK
  86. hkdk => qr/^
  87. (?:
  88. [a-z]{2}\d{9}(?:hk|dk)
  89. )
  90. $/xi,
  91. ## IPS
  92. ips => qr/^
  93. (?:
  94. E[MA]\d{9}(?:IN|HR)
  95. )
  96. $/xi,
  97. ## LaserShip
  98. lasership => qr/^
  99. (?:
  100. l[a-z]\d{8}
  101. )
  102. $/xi,
  103. ## OnTrac
  104. ontrac => qr/^
  105. (?:
  106. [cd]\d{14}
  107. )
  108. $/xi
  109. );
  110. foreach my $regex (values %patterns_re){
  111. triggers query_nowhitespace_nodash => $regex;
  112. }
  113. handle query => sub {
  114. # remove trigger words & carrier names
  115. s/\b$strip_re\b//ixg;
  116. trim($_);
  117. return unless $_;
  118. # remainder should be numeric or alphanumeric, not alpha
  119. return if /^[A-Z\-\s]+$/i;
  120. # ignore searches for carrier holiday dates
  121. # e.g. "ups holidays 2017"
  122. return if /\bholidays?\b/i;
  123. # ignore remainder with 2+ words
  124. return if /\b[A-Z]+\s+[A-Z]+\b/i;
  125. # ignore phone numbers
  126. return if /^(\d(-|\s))?\d{3}(-|\s)\d{3}(-|\s)\d{4}$/;
  127. return if /^\d{5} \d{7}$/;
  128. return if /^\d{4} \d{3} \d{3}$/;
  129. # ignore address lookup
  130. return if /^#\d+ [A-Z\s]+$/i;
  131. # ignore Microsoft knowledge base codes and Luhn Check queries
  132. # e.g. KB2553549
  133. return if /^(kb|luhn)\s?\d+/i;
  134. # ignore pattern: "word number word"
  135. # e.g. ups building 2 worldport
  136. return if /\b[A-Z]+ \d{1,8} [A-Z]+\b/i;
  137. # ignore numbers that start with 0
  138. return if /^0.+/i;
  139. # remove spaces/dashes
  140. s/(\s|-)//g;
  141. # ignore repeated strings of single digit (e.g. 0000 0000 0000)
  142. return if /^(\d)\1+$/;
  143. # remainder should be 6-30 characters long
  144. return unless /^[A-Z0-9]{6,30}$/i;
  145. # ignore if isbn is present
  146. return if /isbn/i;
  147. # let query through if a carrier is mentioned
  148. # this allows the fallback prompt in cases where an invalid code is given
  149. my @possible_carriers;
  150. if ($req->{query_lc} =~ /\b($carriers_re)\b/) {
  151. push @possible_carriers, $1;
  152. }
  153. else {
  154. # Validate likely UPS tracking numbers
  155. # Skipping \d{12} because that matches several other carriers as well
  156. if (/$patterns_re{ups}/ && !/\d{12}/) {
  157. return unless is_valid_ups($_);
  158. }
  159. # Validate DHL tracking numbers
  160. # Ensure \d{10} doesn't overlap with UPS code
  161. elsif (/$patterns_re{dhl}/ && !/82\d{8}/) {
  162. return unless is_valid_dhl($_)
  163. }
  164. }
  165. while (my($carrier, $regex) = each %patterns_re) {
  166. if ($_ =~ /$regex/) {
  167. push(@possible_carriers, $carrier);
  168. }
  169. }
  170. @possible_carriers = uniq sort @possible_carriers;
  171. return $_, (join ',', @possible_carriers);
  172. };
  173. sub is_valid_dhl {
  174. my $package_number = $_;
  175. my $checksum = 0;
  176. my @chars = split( //, $package_number );
  177. my $length = scalar(@chars);
  178. my $char_count = 0;
  179. my $odd_sum = 0;
  180. my $even_sum = 0;
  181. my $is_valid = 0;
  182. foreach my $char (@chars) {
  183. $char_count++;
  184. if ($char_count % 2 == 0) {
  185. $even_sum += $char;
  186. }
  187. else {
  188. $odd_sum += $char;
  189. }
  190. }
  191. $even_sum *= 1;
  192. $odd_sum *= 1;
  193. $checksum = join( '', @chars[ 0 .. $length - 2 ] ) % 7;
  194. $is_valid = 1 if ($checksum eq $chars[-1]);
  195. return $is_valid;
  196. };
  197. my %ups_checksum = (
  198. 'A' => 2,
  199. 'B' => 3,
  200. 'C' => 4,
  201. 'D' => 5,
  202. 'E' => 6,
  203. 'F' => 7,
  204. 'G' => 8,
  205. 'H' => 9,
  206. 'I' => 0,
  207. 'J' => 1,
  208. 'K' => 2,
  209. 'L' => 3,
  210. 'M' => 4,
  211. 'N' => 5,
  212. 'O' => 6,
  213. 'P' => 7,
  214. 'Q' => 8,
  215. 'R' => 9,
  216. 'S' => 0,
  217. 'T' => 1,
  218. 'U' => 2,
  219. 'V' => 3,
  220. 'W' => 4,
  221. 'X' => 5,
  222. 'Y' => 6,
  223. 'Z' => 7,
  224. );
  225. sub is_valid_ups {
  226. my $package_number = uc $_;
  227. my $checksum = 0;
  228. my $is_valid = 0;
  229. my @chars = split(//, $package_number);
  230. # Skip 1Z
  231. @chars = @chars[ 2 .. scalar(@chars) - 1 ];
  232. my $length = scalar(@chars);
  233. my $char_count = 0;
  234. my $odd_sum = 0;
  235. my $even_sum = 0;
  236. foreach my $char (@chars) {
  237. $char_count++;
  238. my $tmp_num = $char;
  239. if ( exists $ups_checksum{$char} ) {
  240. $tmp_num = $ups_checksum{$char};
  241. }
  242. if ( $char_count % 2 == 0 ) {
  243. $even_sum += $tmp_num;
  244. }
  245. else {
  246. $odd_sum += $tmp_num;
  247. }
  248. }
  249. $even_sum *= 2;
  250. $checksum = ( $odd_sum + $even_sum ) % 10;
  251. $is_valid = 1 if ($checksum eq $chars[-1]);
  252. return $is_valid;
  253. }
  254. 1;