PageRenderTime 42ms CodeModel.GetById 5ms RepoModel.GetById 0ms app.codeStats 0ms

/identifier.py

https://github.com/fabricioferracioli/mestrado_files
Python | 54 lines | 39 code | 10 blank | 5 comment | 9 complexity | 40e8e62d4b787a5f3a4412bfbad692ff MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. #!/usr/bin/pyhton
  3. #quarto programa a rodar identificador de usuarios, paginas e reconhecimento de acessos (sessoes)
  4. #recebe como parametro o arquivo com as urls a serem filtradas pois ele possui os nomes dos arquivos gerados com as urls ja filtradas
  5. from optparse import OptionParser
  6. import logutil, mining_database
  7. usage = 'usage: %prog -i input_file_path [options]'
  8. parser = OptionParser(usage)
  9. #optionparser sempre assume que a opcao sera do tipo string e que deve armazenar em dest
  10. parser.add_option('-i', '--input', dest='inputfile', help='read the INPUTFILE as parameters defined by user to access the correct filtered logfiles', metavar='INPUTFILE')
  11. parser.add_option('-f', '--force', dest='force_db_creation', help='force the database creation. if it exists, create again', default=False)
  12. (options, args) = parser.parse_args()
  13. if options.inputfile == None:
  14. parser.error('please, inform a path for the input file')
  15. if options.force_db_creation != False:
  16. options.force_db_creation = True
  17. db = mining_database.MiningDatabase(options.force_db_creation)
  18. db.createTables()
  19. logfiles = []
  20. inputfile = open(options.inputfile, 'r')
  21. for line in inputfile:
  22. conf = line.split()
  23. logfiles.append(conf[3])
  24. db.insertPage(conf[0])
  25. db.insertPage(conf[1])
  26. db.insertConfig(conf[0], conf[1], conf[2], conf[3])
  27. inputfile.close()
  28. lu = logutil.LogUtil()
  29. for logfilepath in logfiles:
  30. try:
  31. logfile = open(logfilepath, 'r')
  32. for line in logfile:
  33. requester = lu.getRequester(line)
  34. if len(db.searchUser('ip', 'like', requester).fetchall()) == 0:
  35. db.insertUser(requester)
  36. document_requested = lu.getRequestedDocument(line)
  37. if len(db.searchPage('page', 'like', document_requested).fetchall()) == 0:
  38. db.insertPage(document_requested)
  39. request_date = lu.getRequestDate(line)
  40. server_response_status = lu.getServerResponseStatus(line)
  41. response_size = lu.getResponseSize(line)
  42. db.insertAccess(requester, document_requested, request_date, server_response_status, response_size)
  43. except IOError:
  44. print 'Arquivo '+ logfilepath +' nao encontrado'