PageRenderTime 66ms CodeModel.GetById 33ms RepoModel.GetById 1ms app.codeStats 0ms

/edx/analytics/tasks/util/tests/test_obfuscate_util.py

https://github.com/edx/edx-analytics-pipeline
Python | 676 lines | 587 code | 38 blank | 51 comment | 0 complexity | b9c30a62d54b0cb823c74a21911bf807 MD5 | raw file
Possible License(s): AGPL-3.0
  1. # -*- coding: utf-8 -*-
  2. """Tests for obfuscation utilities."""
  3. import textwrap
  4. from unittest import TestCase
  5. from ddt import data, ddt, unpack
  6. from mock import MagicMock, patch
  7. import edx.analytics.tasks.util.obfuscate_util as obfuscate_util
  8. from edx.analytics.tasks.util.tests.target import FakeTask
  9. @ddt
  10. class BackslashHandlingTestCase(TestCase):
  11. """Test encoding and decoding of backslashed data."""
  12. @data(
  13. 'This is a test.\\nThis is another.',
  14. )
  15. def test_needs_backslash_decoding(self, text):
  16. self.assertTrue(obfuscate_util.needs_backslash_decoding(text))
  17. @data(
  18. 'This is a test.\\nThis is another.\n',
  19. 'This is a test.',
  20. )
  21. def test_needs_no_backslash_decoding(self, text):
  22. self.assertFalse(obfuscate_util.needs_backslash_decoding(text))
  23. @data(
  24. 'This is a test.\\nThis is another.',
  25. # 'This is a test.\\\nThis is another.',
  26. 'This is a test.\\\\\\nThis is another.',
  27. u'This is a test.\\nThis is another.',
  28. u'This is a test.\\\\\\nThis is another.',
  29. u'This is a \u00e9 test.\\\\nThis is another.',
  30. )
  31. def test_decoding_round_trip(self, text):
  32. self.assertEquals(text, obfuscate_util.backslash_encode_value(obfuscate_util.backslash_decode_value(text)))
  33. @data(
  34. 'This is a test.\\nThis is another.',
  35. 'This is a test.\\\nThis is another.',
  36. 'This is a test.\\\\\\nThis is another.',
  37. u'This is a test.\\nThis is another.',
  38. u'This is a test.\\\\\\nThis is another.',
  39. u'This is a \u00e9 test.\\\\nThis is another.',
  40. )
  41. def test_encoding_round_trip(self, text):
  42. self.assertEquals(text, obfuscate_util.backslash_decode_value(obfuscate_util.backslash_encode_value(text)))
  43. @data(
  44. ('Test1\nTest2', 'Test1\\nTest2'),
  45. ('Test1\\nTest2', 'Test1\\\\nTest2'),
  46. ('Test1\\\nTest2', 'Test1\\\\\\nTest2'),
  47. )
  48. @unpack
  49. def test_encoding(self, text, expected_result):
  50. self.assertEquals(obfuscate_util.backslash_encode_value(text), expected_result)
  51. @data(
  52. ('Test1\\nTest2', 'Test1\nTest2'),
  53. ('Test1\\\nTest2', 'Test1\\\nTest2'),
  54. ('Test1\\\\nTest2', 'Test1\\nTest2'),
  55. ('Test1\\\\\nTest2', 'Test1\\\nTest2'),
  56. ('Test1\\\\\\nTest2', 'Test1\\\nTest2'),
  57. )
  58. @unpack
  59. def test_decoding(self, text, expected_result):
  60. self.assertEquals(obfuscate_util.backslash_decode_value(text), expected_result)
  61. @ddt
  62. class FindMatchesTestCase(TestCase):
  63. """Test finding matches for regular expressions in strings."""
  64. SIMPLE_CONTEXT = u"This is left context: {} This is right context."
  65. #####################
  66. # phone
  67. #####################
  68. @data(
  69. '555-1212',
  70. '555 1212',
  71. '555 - 1212',
  72. '555 1212',
  73. '1-201-555-1212',
  74. '201-555-1212',
  75. '+91 12 1234 1234',
  76. '+91-123-123456',
  77. '+46-123 123 123',
  78. '+57 123 123-4567',
  79. '+919876543210',
  80. '(202) 123-4567',
  81. '(202)123-4567',
  82. '+98-123-1234567',
  83. '+91-8765432101',
  84. '+63 123 123 12 12',
  85. '+63 1 123 1234',
  86. '1 849 123 1234',
  87. # These are failing...
  88. # '818.123.1234',
  89. # '(04) 123 1234',
  90. # '0300 123 1234',
  91. # '0800 123 123',
  92. # '0800 123 1234',
  93. # '010-5432-1234',
  94. )
  95. def test_find_simple_phone_numbers(self, text):
  96. raw = self.SIMPLE_CONTEXT.format(text)
  97. expected = self.SIMPLE_CONTEXT.format("<<PHONE_NUMBER>>")
  98. result = obfuscate_util.find_phone_numbers(raw)
  99. self.assertEquals(expected, result)
  100. @data(
  101. # These overflags need to be fixed:
  102. # 'http://link.springer.com/article/10.1007%2Fs13524-014-0321-x',
  103. # u' Mindfulness, 1 \\u2013 8. DOI 10.1007/s12671-015-0408-5\nMarlatt, A. G., & Donavon, D. M. (2005)',
  104. # 'ISBN 0-813 2410-7',
  105. # 'maybe 1,000 - 1100 words.',
  106. # 'www.digikey.com/product-detail/en/AST-030C0MR-R/668-1138-ND/1464877',
  107. # '450-1650-ND',
  108. '123.4567',
  109. )
  110. def test_skip_non_phone_numbers(self, text):
  111. raw = self.SIMPLE_CONTEXT.format(text)
  112. result = obfuscate_util.find_phone_numbers(raw)
  113. self.assertEquals(raw, result)
  114. @data(
  115. ('Cell #:240-123-4567', 'Cell #:<<PHONE_NUMBER>>'),
  116. )
  117. @unpack
  118. def test_find_phone_numbers_in_context(self, text, result):
  119. raw = self.SIMPLE_CONTEXT.format(text)
  120. expected = self.SIMPLE_CONTEXT.format(result)
  121. actual = obfuscate_util.find_phone_numbers(raw)
  122. self.assertEquals(expected, actual)
  123. #####################
  124. # email
  125. #####################
  126. @data(
  127. 'testuser@gmail.com',
  128. 'test.user@gmail.com',
  129. 'test_user@gmail.com',
  130. 'Ttestuser@gmail.com',
  131. 'test.user+foo@gmail.com',
  132. 'email1234-yes@yahoo.de',
  133. 'info@edx.org',
  134. 'Testers@yahoo.com',
  135. 'a.tester@hotmail.co.uk',
  136. 'a-person@a-domain.de',
  137. 'aperson@yahoo.co.in',
  138. 'aperson21@hotmail.com',
  139. 'aperson.cs@abc-def.edu.pk',
  140. '123456789@example.com',
  141. 'this_is_a_test@example.com',
  142. 't0e1s2t3@test.abc.dk',
  143. 'First.Last@example.co.uk',
  144. )
  145. def test_find_simple_emails(self, text):
  146. raw = self.SIMPLE_CONTEXT.format(text)
  147. expected = self.SIMPLE_CONTEXT.format("<<EMAIL>>")
  148. result = obfuscate_util.find_emails(raw)
  149. self.assertEquals(expected, result)
  150. @data(
  151. 'Twitter at @username.',
  152. # These overflags need to be fixed:
  153. # edx.org/asset-v1:edX+DemoX+1T2015+type@asset+block@File-Name.pdf
  154. # https://www.google.ro/maps/place/Romania/@45.1234567,25.0123456,4z/
  155. )
  156. def test_skip_non_emails(self, text):
  157. raw = self.SIMPLE_CONTEXT.format(text)
  158. result = obfuscate_util.find_emails(raw)
  159. self.assertEquals(raw, result)
  160. @data(
  161. (
  162. 'https://example.com/stream?q=user:acct:person@example.com',
  163. 'https://example.com/stream?q=user:acct:<<EMAIL>>'
  164. ),
  165. (' (mailto:course@organization.edu), ', ' (mailto:<<EMAIL>>), '),
  166. # At some point, it would be good to broaden the target for email
  167. # matching to include mail headers that include a full name. I.e.:
  168. # 'From: "First Last" <firstlast@example.com.au>' =>
  169. # 'From: <<EMAIL>>' instead of 'From: "First Last" <<<EMAIL>>>'.
  170. )
  171. @unpack
  172. def test_find_emails_in_context(self, text, result):
  173. raw = self.SIMPLE_CONTEXT.format(text)
  174. expected = self.SIMPLE_CONTEXT.format(result)
  175. actual = obfuscate_util.find_emails(raw)
  176. self.assertEquals(expected, actual)
  177. #####################
  178. # username
  179. #####################
  180. @data(
  181. ('username', 'username'),
  182. ('Username', 'username'),
  183. ('UserName', 'username'),
  184. ('USERNAME', 'username'),
  185. ('Username1234', 'username1234'),
  186. ('User-name', 'user-name'),
  187. # We do not expect there to be usernames with non-ascii characters.
  188. # ('Øyaland'.decode('utf8'), 'Øyaland'.decode('utf8')),
  189. # However, there are usernames that contain leading and/or trailing dashes, and this
  190. # confuses the regex as well. That's because the \b boundaries don't match properly.
  191. # ('-User-name-', '-user-name-'),
  192. )
  193. @unpack
  194. def test_find_simple_usernames(self, text, username):
  195. raw = self.SIMPLE_CONTEXT.format(text)
  196. expected = self.SIMPLE_CONTEXT.format("<<USERNAME>>")
  197. result = obfuscate_util.find_username(raw, username)
  198. self.assertEquals(expected, result)
  199. @data(
  200. ('ausername', 'username'),
  201. )
  202. @unpack
  203. def test_skip_non_usernames(self, text, username):
  204. raw = self.SIMPLE_CONTEXT.format(text)
  205. result = obfuscate_util.find_username(raw, username)
  206. self.assertEquals(raw, result)
  207. @data(
  208. ("My name is Username, I'm from A.", "username", "My name is <<USERNAME>>, I'm from A."),
  209. ("My name is Username12345. I'm from A.", "username12345", "My name is <<USERNAME>>. I'm from A."),
  210. ("My name is John (username), I'm from A.", "username", "My name is John (<<USERNAME>>), I'm from A."),
  211. (
  212. "My name is John (name=username), I'm from A.", "username",
  213. "My name is John (name=<<USERNAME>>), I'm from A."
  214. ),
  215. (
  216. "Visit my website http://username.com/this/link.", "username",
  217. "Visit my website http://<<USERNAME>>.com/this/link."
  218. ),
  219. ("http://instagram.com/username", "username", "http://instagram.com/<<USERNAME>>"),
  220. ("[http://twitter.com/username]", "username", "[http://twitter.com/<<USERNAME>>]"),
  221. )
  222. @unpack
  223. def test_find_usernames_in_context(self, text, username, result):
  224. raw = self.SIMPLE_CONTEXT.format(text)
  225. expected = self.SIMPLE_CONTEXT.format(result)
  226. actual = obfuscate_util.find_username(raw, username)
  227. self.assertEquals(expected, actual)
  228. #####################
  229. # fullname
  230. #####################
  231. @data(
  232. ('first', 'First Last'),
  233. ('last', 'First Last'),
  234. ('first last', 'First Last'),
  235. ('first1234', 'First1234 Last'),
  236. ('FIRST', 'First Last'),
  237. ('LAST', 'First Last'),
  238. ('First', 'first last'),
  239. ('First-name', 'First-name Last-name'),
  240. ('First', ' first last '),
  241. ('First', ' Last, First '),
  242. ('Last', ' Last, First '),
  243. ('Last', '**First Last**'),
  244. ('Last', '"First Last"'),
  245. ('Last', '"First" "Last"'),
  246. ('Last', 'First M. Last'),
  247. ('Last', 'First (Last)'),
  248. ("O'Last", "First O'Last"),
  249. ('MacLast', 'First MacLast'),
  250. ('Björk'.decode('utf8'), 'Björn Björk'.decode('utf8')),
  251. ('Olav Øyaland'.decode('utf8'), 'Olav Øyaland'.decode('utf8')),
  252. ('Øyaland'.decode('utf8'), 'Øyaland'.decode('utf8')),
  253. (u'T\u00e9st', u'my t\u00e9st'),
  254. # Nicknames and alternate forms of names are not found.
  255. # ('My name is Rob', 'Robert Last'),
  256. )
  257. @unpack
  258. def test_find_simple_fullnames(self, text, fullname):
  259. raw = self.SIMPLE_CONTEXT.format(text)
  260. expected = self.SIMPLE_CONTEXT.format("<<FULLNAME>>")
  261. result = obfuscate_util.find_user_fullname(raw, fullname)
  262. self.assertEquals(expected, result)
  263. @data(
  264. ('the best is what I am', 'I am the Beast'),
  265. ('the best is what I am', 'I am The Beast'),
  266. # These are example overflags:
  267. # ('a mark on your paper', 'Mark Last'),
  268. # ('he said you would come', 'Said Last'),
  269. # ('he felt great joy', 'Joy Last'),
  270. )
  271. @unpack
  272. def test_skip_non_fullnames(self, text, fullname):
  273. raw = self.SIMPLE_CONTEXT.format(text)
  274. result = obfuscate_util.find_user_fullname(raw, fullname)
  275. self.assertEquals(raw, result)
  276. @data(
  277. ('My name is First Last.', 'First Last', 'My name is <<FULLNAME>>.'),
  278. ('My name is First,I am taking', 'First Last', 'My name is <<FULLNAME>>,I am taking'),
  279. ('My name is First Last.', 'First Last', 'My name is <<FULLNAME>>.'),
  280. ('www.linkedin.com/pub/first-last-last2/1/200/190/', 'First Last-Last2',
  281. 'www.linkedin.com/pub/<<FULLNAME>>-<<FULLNAME>>/1/200/190/'),
  282. ('My name is First Maiden Last.', 'First Last', 'My name is <<FULLNAME>> Maiden <<FULLNAME>>.'),
  283. (u'This is a (T\u00e9st)', u'my t\u00e9st', 'This is a (<<FULLNAME>>)'),
  284. )
  285. @unpack
  286. def test_find_fullnames_in_context(self, text, fullname, result):
  287. raw = self.SIMPLE_CONTEXT.format(text)
  288. expected = self.SIMPLE_CONTEXT.format(result)
  289. actual = obfuscate_util.find_user_fullname(raw, fullname)
  290. self.assertEquals(expected, actual)
  291. @data(
  292. u'a/l',
  293. u'???',
  294. u'???', # Test caching by calling twice
  295. u'First \u201cNickname\u201d Last',
  296. u'user@example.com',
  297. u'Jos\ufffd Last',
  298. u'FIRST LAST S/O FATHERFIRST FATHERLAST,DOB:01/01/1961',
  299. )
  300. def test_reject_bad_fullnames(self, fullname):
  301. raw = self.SIMPLE_CONTEXT.format('dummy')
  302. result = obfuscate_util.find_user_fullname(raw, fullname)
  303. self.assertEquals(raw, result)
  304. self.assertTrue(fullname in obfuscate_util.REJECTED_NAMES)
  305. @ddt
  306. class FindMatchLogContextTestCase(TestCase):
  307. """Test finding matches for regular expressions in strings."""
  308. def setUp(self):
  309. super(FindMatchLogContextTestCase, self).setUp()
  310. patcher = patch('edx.analytics.tasks.util.obfuscate_util.log')
  311. self.mock_log = patcher.start()
  312. self.addCleanup(patcher.stop)
  313. LEFT_CONTEXT = u" This is left context: "
  314. RIGHT_CONTEXT = u" This is right context. "
  315. TEXT = '240-123-4567'
  316. TYPE = 'PHONE_NUMBER'
  317. def test_no_logging(self):
  318. raw = "{}{}{}".format(self.LEFT_CONTEXT, self.TEXT, self.RIGHT_CONTEXT)
  319. obfuscate_util.find_phone_numbers(raw)
  320. self.assertEquals(self.mock_log.info.call_count, 0)
  321. def find_logged_contexts(self, raw, log_context):
  322. """Pull out strings from context-logging, to allow that to be tested."""
  323. obfuscate_util.find_phone_numbers(raw, log_context=log_context)
  324. self.assertEquals(self.mock_log.info.call_count, 1)
  325. args, _ = self.mock_log.info.call_args
  326. self.assertEquals(len(args), 5)
  327. self.assertEquals(args[1], self.TYPE)
  328. self.assertEquals(args[3], self.TEXT)
  329. return (args[2], args[4])
  330. def test_no_context(self):
  331. raw = "{}{}{}".format(self.LEFT_CONTEXT, self.TEXT, self.RIGHT_CONTEXT)
  332. left, right = self.find_logged_contexts(raw, log_context=0)
  333. self.assertEquals(left, '')
  334. self.assertEquals(right, '')
  335. def test_all_context(self):
  336. raw = "{}{}{}".format(self.LEFT_CONTEXT, self.TEXT, self.RIGHT_CONTEXT)
  337. left, right = self.find_logged_contexts(raw, log_context=50)
  338. self.assertEquals(left, self.LEFT_CONTEXT)
  339. self.assertEquals(right, self.RIGHT_CONTEXT)
  340. def test_some_context(self):
  341. raw = "{}{}{}".format(self.LEFT_CONTEXT, self.TEXT, self.RIGHT_CONTEXT)
  342. left, right = self.find_logged_contexts(raw, log_context=10)
  343. self.assertEquals(left, self.LEFT_CONTEXT[-10:])
  344. self.assertEquals(right, self.RIGHT_CONTEXT[:10])
  345. def test_no_left_context(self):
  346. raw = "{}{}".format(self.TEXT, self.RIGHT_CONTEXT)
  347. left, right = self.find_logged_contexts(raw, log_context=10)
  348. self.assertEquals(left, "")
  349. self.assertEquals(right, self.RIGHT_CONTEXT[:10])
  350. def test_no_right_context(self):
  351. raw = "{}{}".format(self.LEFT_CONTEXT, self.TEXT)
  352. left, right = self.find_logged_contexts(raw, log_context=10)
  353. self.assertEquals(left, self.LEFT_CONTEXT[-10:])
  354. self.assertEquals(right, "")
  355. def test_multiple_matches(self):
  356. raw = "{}{}{}{}{}{}{}".format(
  357. self.LEFT_CONTEXT, self.TEXT, self.RIGHT_CONTEXT, self.TEXT,
  358. self.RIGHT_CONTEXT, self.TEXT, self.LEFT_CONTEXT
  359. )
  360. obfuscate_util.find_phone_numbers(raw, log_context=10)
  361. self.assertEquals(self.mock_log.info.call_count, 3)
  362. args_list = self.mock_log.info.call_args_list
  363. # Create context lists in reverse so they can be popped.
  364. left_contexts = [self.RIGHT_CONTEXT, self.RIGHT_CONTEXT, self.LEFT_CONTEXT]
  365. right_contexts = list(reversed(left_contexts))
  366. for args, _ in args_list:
  367. self.assertEquals(len(args), 5)
  368. self.assertEquals(args[1], self.TYPE)
  369. self.assertEquals(args[3], self.TEXT)
  370. left = left_contexts.pop()
  371. right = right_contexts.pop()
  372. self.assertEquals(args[2], left[-10:])
  373. self.assertEquals(args[4], right[:10])
  374. # Create default tables that use arbitrary leading whitespace before the first field value,
  375. # but tabs as the between-field delimiter. That way, fields can contain spaces (as in full names).
  376. DEFAULT_AUTH_USER = """
  377. 1 honor
  378. 2 audit
  379. 3 verified
  380. 4 staff
  381. """
  382. DEFAULT_AUTH_USER_PROFILE = """
  383. 1 Honor Student
  384. 2 Audit John
  385. 3 Verified Vera
  386. 4 Static Staff
  387. """
  388. def get_mock_user_info_requirements(auth_user=DEFAULT_AUTH_USER, auth_user_profile=DEFAULT_AUTH_USER_PROFILE):
  389. """Replacement for get_user_info_requirements for testing purposes."""
  390. def reformat_as_sqoop_output(string):
  391. """Convert tab-delimited data to look like Sqoop output."""
  392. return textwrap.dedent(string).strip().replace('\t', '\x01')
  393. # These keys need to return a fake Task whose output() is a fake Target.
  394. user_info_setup = {
  395. 'auth_user': FakeTask(value=reformat_as_sqoop_output(auth_user)),
  396. 'auth_userprofile': FakeTask(value=reformat_as_sqoop_output(auth_user_profile)),
  397. }
  398. return MagicMock(return_value=user_info_setup)
  399. class UserInfoTestCase(TestCase):
  400. """Test encoding and decoding of backslashed data."""
  401. def setUp(self):
  402. super(UserInfoTestCase, self).setUp()
  403. obfuscate_util.reset_user_info_for_testing()
  404. def test_default(self):
  405. user_info = obfuscate_util.UserInfoMixin()
  406. user_info.user_info_requirements = get_mock_user_info_requirements()
  407. self.assertDictEqual(user_info.user_by_id, {
  408. 1: {'user_id': 1, 'username': 'honor', 'name': 'Honor Student'},
  409. 2: {'user_id': 2, 'username': 'audit', 'name': 'Audit John'},
  410. 3: {'user_id': 3, 'username': 'verified', 'name': 'Verified Vera'},
  411. 4: {'user_id': 4, 'username': 'staff', 'name': 'Static Staff'},
  412. })
  413. self.assertDictEqual(user_info.user_by_username, {
  414. 'honor': {'user_id': 1, 'username': 'honor', 'name': 'Honor Student'},
  415. 'audit': {'user_id': 2, 'username': 'audit', 'name': 'Audit John'},
  416. 'verified': {'user_id': 3, 'username': 'verified', 'name': 'Verified Vera'},
  417. 'staff': {'user_id': 4, 'username': 'staff', 'name': 'Static Staff'},
  418. })
  419. def test_with_metadata(self):
  420. metadata = '{"start_time": "2015-01-01T10:30:35.123456", "end_time": "2016-01-01T-09:15:13.654321"}'
  421. my_auth_user = '{}1\ttest'.format(metadata)
  422. my_auth_user_profile = '{}1\tTest User'.format(metadata)
  423. user_info = obfuscate_util.UserInfoMixin()
  424. user_info.user_info_requirements = get_mock_user_info_requirements(
  425. auth_user=my_auth_user, auth_user_profile=my_auth_user_profile
  426. )
  427. self.assertDictEqual(user_info.user_by_id, {
  428. 1: {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  429. })
  430. self.assertDictEqual(user_info.user_by_username, {
  431. 'test': {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  432. })
  433. def test_with_older_auth_user(self):
  434. my_auth_user = '1\ttest'
  435. user_info = obfuscate_util.UserInfoMixin()
  436. user_info.user_info_requirements = get_mock_user_info_requirements(
  437. auth_user=my_auth_user,
  438. )
  439. self.assertDictEqual(user_info.user_by_id, {
  440. 1: {'user_id': 1, 'username': 'test', 'name': 'Honor Student'},
  441. })
  442. self.assertDictEqual(user_info.user_by_username, {
  443. 'test': {'user_id': 1, 'username': 'test', 'name': 'Honor Student'},
  444. })
  445. def test_with_older_auth_user_profile(self):
  446. my_auth_user_profile = '1\tTest User'
  447. user_info = obfuscate_util.UserInfoMixin()
  448. user_info.user_info_requirements = get_mock_user_info_requirements(
  449. auth_user_profile=my_auth_user_profile,
  450. )
  451. self.assertDictEqual(user_info.user_by_id, {
  452. 1: {'user_id': 1, 'username': 'honor', 'name': 'Test User'},
  453. 2: {'user_id': 2, 'username': 'audit'},
  454. 3: {'user_id': 3, 'username': 'verified'},
  455. 4: {'user_id': 4, 'username': 'staff'},
  456. })
  457. self.assertDictEqual(user_info.user_by_username, {
  458. 'honor': {'user_id': 1, 'username': 'honor', 'name': 'Test User'},
  459. 'audit': {'user_id': 2, 'username': 'audit'},
  460. 'verified': {'user_id': 3, 'username': 'verified'},
  461. 'staff': {'user_id': 4, 'username': 'staff'},
  462. })
  463. def test_with_bad_profile_entry(self):
  464. my_auth_user = """
  465. 1 test
  466. 2 test2
  467. """
  468. my_auth_user_profile = """
  469. 1 Test User
  470. two Second User
  471. """
  472. user_info = obfuscate_util.UserInfoMixin()
  473. user_info.user_info_requirements = get_mock_user_info_requirements(
  474. auth_user=my_auth_user, auth_user_profile=my_auth_user_profile
  475. )
  476. self.assertDictEqual(user_info.user_by_id, {
  477. 1: {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  478. 2: {'user_id': 2, 'username': 'test2'},
  479. })
  480. self.assertDictEqual(user_info.user_by_username, {
  481. 'test': {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  482. 'test2': {'user_id': 2, 'username': 'test2'},
  483. })
  484. def test_with_bad_user_entry(self):
  485. my_auth_user = """
  486. 1 test
  487. two test2
  488. """
  489. my_auth_user_profile = """
  490. 1 Test User
  491. 2 Second User
  492. """
  493. user_info = obfuscate_util.UserInfoMixin()
  494. user_info.user_info_requirements = get_mock_user_info_requirements(
  495. auth_user=my_auth_user, auth_user_profile=my_auth_user_profile
  496. )
  497. self.assertDictEqual(user_info.user_by_id, {
  498. 1: {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  499. })
  500. self.assertDictEqual(user_info.user_by_username, {
  501. 'test': {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  502. })
  503. def test_with_empty_username(self):
  504. my_auth_user = """
  505. 1 test
  506. 2 {intentional_blank}
  507. 3 test3
  508. """.format(intentional_blank=' ')
  509. my_auth_user_profile = """
  510. 1 Test User
  511. 2 Second User
  512. 3 Third User
  513. """
  514. user_info = obfuscate_util.UserInfoMixin()
  515. user_info.user_info_requirements = get_mock_user_info_requirements(
  516. auth_user=my_auth_user, auth_user_profile=my_auth_user_profile
  517. )
  518. self.assertDictEqual(user_info.user_by_id, {
  519. 1: {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  520. 3: {'user_id': 3, 'username': 'test3', 'name': 'Third User'},
  521. })
  522. self.assertDictEqual(user_info.user_by_username, {
  523. 'test': {'user_id': 1, 'username': 'test', 'name': 'Test User'},
  524. 'test3': {'user_id': 3, 'username': 'test3', 'name': 'Third User'},
  525. })
  526. @ddt
  527. class ObfuscatorTestCase(TestCase):
  528. """Test Obfuscator methods."""
  529. def test_obfuscate_email_before_username(self):
  530. obfuscator = obfuscate_util.Obfuscator()
  531. input_text = "Email is testusername@example.com."
  532. result = obfuscator.obfuscate_text(
  533. input_text,
  534. user_info={'username': ['testusername']},
  535. )
  536. self.assertEquals(result, 'Email is <<EMAIL>>.')
  537. result = obfuscator.obfuscate_text(
  538. input_text,
  539. user_info={'username': ['testusername']},
  540. entities=['username'],
  541. )
  542. self.assertEquals(result, 'Email is <<USERNAME>>@example.com.')
  543. @data(
  544. ('testusername@email.com', ['email']),
  545. ('testusername', ['username']),
  546. ('Test User', ['fullname']),
  547. ('213-456-7890', ['phone']),
  548. ('12345', ['userid']),
  549. ('240-123-4567', ['possible_phone']),
  550. ('phone number', ['phone_context']),
  551. ('my name', ['name_context']),
  552. ('write me', ['email_context']),
  553. ('https://www.facebook.com/someusername', ['facebook']),
  554. )
  555. @unpack
  556. def test_entity_flags(self, text, entities):
  557. obfuscator = obfuscate_util.Obfuscator()
  558. result = obfuscator.obfuscate_text(
  559. text,
  560. user_info={
  561. 'username': ['testusername'],
  562. 'user_id': [12345],
  563. 'name': ['Test User'],
  564. },
  565. entities=entities,
  566. )
  567. self.assertTrue(result.startswith('<<'))
  568. self.assertTrue(result.endswith('>>'))
  569. def test_obfuscate_list(self):
  570. input_obj = {
  571. 'key': [
  572. {'email': 'test@example.com'},
  573. {'dummy': 'unchanging value'},
  574. {'phone': '213-456-7890'},
  575. {'list': ['unchanging list value']},
  576. ]
  577. }
  578. obfuscator = obfuscate_util.Obfuscator()
  579. result = obfuscator.obfuscate_structure(input_obj, 'root')
  580. expected = {'key': [
  581. {'email': '<<EMAIL>>'},
  582. {'dummy': 'unchanging value'},
  583. {'phone': '<<PHONE_NUMBER>>'},
  584. {'list': ['unchanging list value']},
  585. ]}
  586. self.assertEquals(result, expected)
  587. @data(
  588. ('n321-4567\\\\n', None),
  589. ('\n321-4567\\\\n', '\n<<PHONE_NUMBER>>\\\\n'),
  590. ('\\n321-4567\\\\n', '\\n<<PHONE_NUMBER>>\\\\n'),
  591. ('\\\n321-4567\\\\n', '\\\n<<PHONE_NUMBER>>\\\\n'),
  592. ('\\\\n321-4567\\\\n', '\\\\n<<PHONE_NUMBER>>\\\\n'),
  593. )
  594. @unpack
  595. def test_backslash_decoding(self, text, expected):
  596. obfuscator = obfuscate_util.Obfuscator()
  597. result = obfuscator.obfuscate_structure(text, 'root')
  598. self.assertEquals(result, expected)
  599. @data(
  600. (u'321-4567'.encode('utf8'), u'<<PHONE_NUMBER>>'.encode('utf8')),
  601. ('Test User', u'<<FULLNAME>>'.encode('utf8')),
  602. (u'Olav Øyaland'.encode('utf8'), u'<<FULLNAME>>'.encode('utf8')),
  603. )
  604. @unpack
  605. def test_unicode_decoding(self, text, expected):
  606. obfuscator = obfuscate_util.Obfuscator()
  607. user_info = {'name': ['Olav Øyaland'.decode('utf8'), 'Test User']}
  608. result = obfuscator.obfuscate_structure(text, 'root', user_info=user_info)
  609. self.assertEquals(result, expected)