PageRenderTime 43ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/classified/probe/pan/__init__.py

https://github.com/tehmaze/classified
Python | 252 lines | 231 code | 9 blank | 12 comment | 2 complexity | 0b7f305389ae3dc44b4c8e931e3940ca MD5 | raw file
Possible License(s): JSON
  1. # Parts are courtesey of `Ben Hogdson <http://benhodgson.com/>`_.
  2. # Python imports
  3. import logging
  4. import re
  5. # Project imports
  6. from classified.probe.base import Probe
  7. decimal_decoder = lambda s: int(s, 10)
  8. decimal_encoder = lambda i: str(i)
  9. def luhn_sum_mod_base(string, base=10, decoder=decimal_decoder):
  10. # Adapted from http://en.wikipedia.org/wiki/Luhn_algorithm
  11. digits = map(decoder, string)
  12. return (sum(digits[::-2]) +
  13. sum(map(lambda d: sum(divmod(2*d, base)), digits[-2::-2]))) % base
  14. def generate(string, base=10, encoder=decimal_encoder,
  15. decoder=decimal_decoder):
  16. '''
  17. Calculates the Luhn mod N check character for the given input string. This
  18. character should be appended to the input string to produce a valid Luhn
  19. mod N string in the given base.
  20. >>> value = '4205092350249'
  21. >>> generate(value)
  22. '1'
  23. When operating in a base other than decimal, encoder and decoder callables
  24. should be supplied. The encoder should take a single argument, an integer,
  25. and return the character corresponding to that integer in the operating
  26. base. Conversely, the decoder should take a string containing a single
  27. character and return its integer value in the operating base. Note that
  28. the mapping between values and characters defined by the encoder and
  29. decoder should be one-to-one.
  30. For example, when working in hexadecimal:
  31. >>> hex_alphabet = '0123456789abcdef'
  32. >>> hex_encoder = lambda i: hex_alphabet[i]
  33. >>> hex_decoder = lambda s: hex_alphabet.index(s)
  34. >>> value = 'a8b56f'
  35. >>> generate(value, base=16, encoder=hex_encoder, decoder=hex_decoder)
  36. 'b'
  37. >>> verify('a8b56fb', base=16, decoder=hex_decoder)
  38. True
  39. >>> verify('a8b56fc', base=16, decoder=hex_decoder)
  40. False
  41. '''
  42. d = luhn_sum_mod_base(string+encoder(0), base=base, decoder=decoder)
  43. if d != 0:
  44. d = base - d
  45. return encoder(d)
  46. def verify(string, base=10, decoder=decimal_decoder):
  47. '''
  48. Verifies that the given string is a valid Luhn mod N string.
  49. >>> verify('5105105105105100') # MasterCard test number
  50. True
  51. When operating in a base other than decimal, encoder and decoder callables
  52. should be supplied. The encoder should take a single argument, an integer,
  53. and return the character corresponding to that integer in the operating
  54. base. Conversely, the decoder should take a string containing a single
  55. character and return its integer value in the operating base. Note that
  56. the mapping between values and characters defined by the encoder and
  57. decoder should be one-to-one.
  58. For example, 'b' is the correct check character for the hexadecimal string
  59. 'a8b56f':
  60. >>> hex_decoder = lambda s: '0123456789abcdef'.index(s)
  61. >>> verify('a8b56fb', base=16, decoder=hex_decoder)
  62. True
  63. Any other check digit (in this example: 'c'), will result in a failed
  64. verification:
  65. >>> verify('a8b56fc', base=16, decoder=hex_decoder)
  66. False
  67. '''
  68. return luhn_sum_mod_base(string, base=base, decoder=decoder) == 0
  69. def mask(card_number, keep=4):
  70. '''
  71. Mask a card number so it's suitable for printing.
  72. '''
  73. keep *= -1
  74. return '*' * len(card_number[:keep]) + card_number[keep:]
  75. class PAN(Probe):
  76. '''
  77. Scan for Primary Account Number (PAN) data in (text) files.
  78. '''
  79. target = ('text/*',)
  80. format = '{filename}[{line:d}]: {company} {card_number_masked}'
  81. ignore = list('\x00-:\r\n')
  82. _check = {
  83. 'American Express': dict(
  84. length = [15],
  85. prefix = re.compile(r'^3[47]'),
  86. ),
  87. 'Diners Club EnRoute': dict(
  88. length = [15],
  89. prefix = re.compile(r'^(?:2014|2149)'),
  90. ),
  91. 'Diners Club Carte Blanche': dict(
  92. length = [14],
  93. prefix = re.compile(r'^30[1-5]'),
  94. ),
  95. 'Diners Club International': dict(
  96. length = [14],
  97. prefix = re.compile(r'^36'),
  98. ),
  99. 'Diners Club America': dict(
  100. length = [14],
  101. prefix = re.compile(r'^5[45]'),
  102. ),
  103. 'Discover': dict(
  104. length = [16],
  105. prefix = re.compile(r'^6011'),
  106. ),
  107. 'InstaPayment': dict(
  108. length = [16],
  109. prefix = re.compile(r'^63[7-9]'),
  110. ),
  111. 'JCB': dict(
  112. length = [16],
  113. prefix = re.compile(r'^(?:3088|3096|3112|3158|3337|352[89]|35[3-7][0-9]|358[0-9])'),
  114. ),
  115. 'Laser': dict(
  116. length = range(12, 20),
  117. prefix = re.compile(r'^(?:6304|6706|6771|6709)'),
  118. ),
  119. 'Maestro': dict(
  120. length = range(12, 20),
  121. prefix = re.compile(r'^(?:5018|5020|5038|5893|6304|6759|676[1-3]|0604)'),
  122. ),
  123. 'MasterCard': dict(
  124. length = [16],
  125. prefix = re.compile(r'^5[1-5]'),
  126. ),
  127. 'VISA': dict(
  128. length = [13, 16],
  129. prefix = re.compile(r'^4'),
  130. ),
  131. }
  132. def __init__(self, config, *args, **kwargs):
  133. super(PAN, self).__init__(config, *args, **kwargs)
  134. # Also keep track of per prefix size checks
  135. self._check_size = {}
  136. for company, checks in self._check.iteritems():
  137. for length in checks['length']:
  138. if length not in self._check_size:
  139. self._check_size[length] = {}
  140. self._check_size[length][company] = checks['prefix']
  141. # Ignores, if configured
  142. if self.config.has_option('probe:pan', 'ignore'):
  143. self.ignore = map(
  144. lambda char: chr(int(char, 16)),
  145. self.config.getlist('probe:pan', 'ignore')
  146. )
  147. def luhn_check(self, card_number):
  148. # Do the Luhn check
  149. if verify(card_number):
  150. return self.process_prefix(card_number)
  151. def process_prefix(self, card_number):
  152. length = len(card_number)
  153. if length in self._check_size:
  154. for company, prefix in self._check_size[length].iteritems():
  155. if prefix.match(card_number):
  156. return company
  157. def probe(self, item):
  158. # Keep track of consecutive ranges of numbers, stripping out potential
  159. # padding characters
  160. digits = []
  161. digits_min = min(self._check_size)
  162. digits_max = max(self._check_size)
  163. line = 0
  164. hits = 0
  165. try:
  166. limit = self.config.getint('probe:pan', 'limit')
  167. except self.config.Error:
  168. limit = 0
  169. prev = chr(0)
  170. for text in item.open():
  171. line += 1
  172. for char in text:
  173. # If we have a digit, append it to the digits list
  174. if char.isdigit():
  175. digits.append(int(char))
  176. if len(digits) >= digits_max:
  177. digits = digits[1:]
  178. if len(digits) >= digits_min:
  179. for x in xrange(digits_min, digits_max + 1):
  180. card_number = ''.join(map(str, digits[:x]))
  181. card_company = self.luhn_check(card_number)
  182. if card_company is not None:
  183. self.record(item,
  184. raw=text,
  185. line=line,
  186. card_number=card_number,
  187. card_number_masked=mask(card_number),
  188. company=card_company,
  189. )
  190. # Rotate digits
  191. digits = digits[x:]
  192. # Keep track of hits
  193. hits += 1
  194. if limit and hits >= limit:
  195. logging.debug('pan probe hit limit '
  196. 'of %d' % limit)
  197. return
  198. break
  199. # We ignore dashes, new lines and carriage returns
  200. elif char in self.ignore:
  201. # .. if we have two successive ignored characters, reset
  202. # the digits array
  203. if prev in self.ignore:
  204. digits = []
  205. # Otherwise we'll reset the buffer
  206. else:
  207. digits = []
  208. # Keep track of the previous character
  209. prev = char