PageRenderTime 24ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/contrib/sanitize-ejabberd-apache-logs/sanitize-ejabberd-apache-logs.py

https://bitbucket.org/clarifiednetworks/vsroom/
Python | 87 lines | 65 code | 18 blank | 4 comment | 24 complexity | a06c6c32d75c118de6ee27c0b619f5c2 MD5 | raw file
  1. import re,sys,time
  2. ej_time = re.compile('(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d) ===')
  3. matchers = list()
  4. matchers.append(re.compile('(Accepted authentication for \S+)'))
  5. matchers.append(re.compile('(Failed authentication for \S+)'))
  6. matchers.append(re.compile('(Accepted connection {{.*?})'))
  7. matchers.append(re.compile('(Opened.*)'))
  8. matchers.append(re.compile('(Close session.*)'))
  9. matchers.append(re.compile('(\S+ MUC room.*)'))
  10. matchers.append(re.compile('(user_available.*)'))
  11. matchers.append(re.compile('(unset_presence.*)'))
  12. def default_match(rex,row,isotime):
  13. match = re.search(rex,row)
  14. if match != None:
  15. return match.group(1)
  16. return False
  17. def sanitize_ejabberd(ej_fd):
  18. isotime = None
  19. entries = dict()
  20. for line in ej_fd.readlines():
  21. line = line.strip()
  22. match = re.search(ej_time,line)
  23. if match != None:
  24. isotime = match.group(1)
  25. continue
  26. if isotime == None: continue
  27. found = False
  28. for matcher in matchers:
  29. hit = default_match(matcher,line,isotime)
  30. if hit:
  31. yield isotime, hit
  32. found=True
  33. break
  34. if found == False and len(line) != 0:
  35. #also add unparsable non-empty rows
  36. yield isotime, line
  37. def sanitize_apache(apache_fd):
  38. isotime = None
  39. entries = dict()
  40. for line in apache_fd.readlines():
  41. lines = line.strip().split(" ")
  42. timestr = lines[3]
  43. lines.pop(3)
  44. #We assume that the log files are in the
  45. #same timezone (no timezone info in ejabberd log)
  46. #Thus throw the timezone info away.
  47. lines.pop(3)
  48. isotime = time.strftime("%Y-%m-%d %H:%M:%S", time.strptime(timestr,"[%d/%b/%Y:%H:%M:%S"))
  49. yield isotime," ".join(lines)
  50. if __name__ == "__main__":
  51. hits = dict()
  52. if len(sys.argv) < 2 or len(sys.argv) > 3:
  53. sys.stderr.write("Usage: %s <ejabberd log> [apache log]\n" % sys.argv[0])
  54. sys.exit(1)
  55. if len(sys.argv) >=2:
  56. for isotime, hit in sanitize_ejabberd(open(sys.argv[1],'r')):
  57. if isotime not in hits:
  58. hits[isotime] = set()
  59. hits[isotime].add(hit)
  60. if len(sys.argv) == 3:
  61. for isotime, hit in sanitize_apache(open(sys.argv[2],'r')):
  62. if isotime not in hits:
  63. hits[isotime] = set()
  64. hits[isotime].add(hit)
  65. times = hits.keys()
  66. times.sort()
  67. for time in times:
  68. for hit in hits[time]:
  69. print time, hit