/regexp/re_search_list.py

https://github.com/horvatha/linux · Python · 131 lines · 95 code · 13 blank · 23 comment · 10 complexity · bd17f9f2ae039007bc20d3f31de9d332 MD5 · raw file

  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. """
  4. 1. You can start from command line::
  5. python keres.py
  6. 2. You can import it e.g in Jupyter notebook.
  7. from keres import *
  8. keres(
  9. r"^ http s? :// ([a-z-]+\.)+ [a-z]{2,3} (/[a-z-_\.]+)* $",
  10. urls )
  11. keres?
  12. """
  13. from __future__ import print_function
  14. import re
  15. urls = """http://bocs.hu
  16. http://arek.uni-obuda.hu
  17. https://elearning.uni-obuda.hu
  18. http://bocs.hu/ado/index.html
  19. https://github.com/horvatha/linux/blob/master/regexp/keres_ipy.py
  20. ftp://ftp.sztaki.hu/pub/tex
  21. file:///var/www/index.html
  22. http://.hu
  23. http://arek..hu
  24. http://arek.uni-obuda.hungary
  25. http://bocs.hu//vimrc
  26. hhttp://bocs.hu
  27. """.split()
  28. cars_number = """ABC-001
  29. AAA-000
  30. TRA-548
  31. FERI-12
  32. EWING-1
  33. -01
  34. AAA-
  35. KISAPA-01
  36. KISAPA-0122333
  37. """.split()
  38. IPs = """192.168.3.26
  39. 1.5.222.54
  40. 192.168.3.026
  41. 1992.168.3.26
  42. 184..233.45
  43. 011.12.022.03
  44. 560.370.13.25
  45. """.split()
  46. emails = """
  47. SuperPandas@WesMcKinney.com
  48. horvaarp@morganstanley.com
  49. joci@csillagasz.at
  50. x_ypsilon@gmail.com
  51. very.common@example.gov
  52. bobebaba13@futrinka.mtv.hu
  53. BobeBaba13@futrinka.MTV.hu
  54. bobe@baba13@futrinka.mtv.hu
  55. abc.example.com
  56. bobebaba13@futrinka.mtv.
  57. bobebaba13@futrinka..hu
  58. """.split()
  59. passwords = """AlMA99
  60. 7DonaldDuck
  61. Game4Of2Thrones7
  62. 7years
  63. 7YEARS
  64. Univers42Question
  65. UniversQuestion""".split()
  66. print("Examples: cars_number, IPs, urls, emails")
  67. def search_list(regexp, words):
  68. """It searches a regular expression in more strings.
  69. regexp:
  70. The regular expression we are searching in.
  71. You can split into more lines (see re.VERBOSE).
  72. words:
  73. The list a words it searches in.
  74. Példa::
  75. search_list(
  76. r"^ http s? :// ([a-z-]+\.)+ [a-z]{2,3} (/[a-z-_\.]+)* $",
  77. urls )
  78. There might be useful to start the string by r in the form of
  79. r"pattern" because you don't need to excape the backslash (\) characters.
  80. """
  81. print('00 does not match at all, -- match a part,'
  82. ' ++ match the whole')
  83. for word in words:
  84. print(' ', word)
  85. search = re.search(regexp, word, re.VERBOSE)
  86. if search:
  87. if search.start() == 0 and search.end() == len(word):
  88. print('++ ', end='')
  89. else:
  90. print('-- ', end='')
  91. print(' '*search.start(), search.group(), sep='', end='')
  92. groupdict = search.groupdict()
  93. if groupdict:
  94. print(" "*(len(word)-search.end()),
  95. ", ".join([
  96. "{}={}".format(k, groupdict[k]) for k in groupdict
  97. ])
  98. )
  99. else:
  100. print()
  101. else:
  102. print("00")
  103. if __name__ == '__main__':
  104. search_list(
  105. r"^ http s? :// ([a-z-]+\.)+ [a-z]{2,3} (/[a-z-_\.]+)* $",
  106. urls
  107. )