PageRenderTime 80ms CodeModel.GetById 32ms RepoModel.GetById 1ms app.codeStats 0ms

/tracker.py

https://gitlab.com/jan.raddatz/myimmitracker-analyzer
Python | 163 lines | 155 code | 1 blank | 7 comment | 0 complexity | 0a67dec2e6ff48529f7b146bb879c980 MD5 | raw file
  1. #pip install --proxy proxy:8080 urllib2
  2. import requests
  3. #pip install --proxy proxy:8080 bs4
  4. from bs4 import BeautifulSoup
  5. # pip install --proxy proxy:8080 ansicolors
  6. from colors import black, red, green, yellow, blue, magenta, cyan, white
  7. from datetime import datetime
  8. import sys
  9. ## Expression of interest case.
  10. #
  11. class CCase(object):
  12. ## Username of case.
  13. #
  14. m_Username = ""
  15. ## EOI date of effect.
  16. #
  17. m_EoiDateOfEffect = datetime.today
  18. ## EOI score.
  19. #
  20. m_Score = 0
  21. ## EOI status.
  22. #
  23. m_Status = 'Unknown'
  24. ## Constructor.
  25. #
  26. def __init__(self, p_Username, p_EoiDateOfEffect, p_Score, p_Status):
  27. self.m_Username = p_Username
  28. self.m_EoiDateOfEffect = p_EoiDateOfEffect
  29. self.m_Score = p_Score
  30. self.m_Status = p_Status
  31. ## Representation.
  32. #
  33. def __repr__(self):
  34. return str(self.m_EoiDateOfEffect) + ' - ' + str(self.m_Score) + ' - ' + self.m_Status
  35. ## String.
  36. #
  37. def __str__(self):
  38. return str(self.m_EoiDateOfEffect) + ' - ' + str(self.m_Score) + ' - ' + self.m_Status
  39. class bcolors:
  40. HEADER = '\033[95m'
  41. OKBLUE = '\033[94m'
  42. OKGREEN = '\033[92m'
  43. WARNING = '\033[93m'
  44. FAIL = '\033[91m'
  45. ENDC = '\033[0m'
  46. BOLD = '\033[1m'
  47. UNDERLINE = '\033[4m'
  48. class CELLS:
  49. COMMENTS = 0
  50. WATCH = 1
  51. DATE_CREATED = 2
  52. DATE_LAST_UPDATED = 3
  53. USERNAME = 4
  54. NATIONALITY = 5
  55. ANZSCO_CODE = 6
  56. OCCUPATION = 7
  57. POINTS = 8
  58. EOI_DATE_OF_EFFECT = 9
  59. DATE_INVITED = 10
  60. STATUS = 11
  61. DAYS_TO_INVITE = 12
  62. http_proxy = "http://proxy:8080"
  63. https_proxy = "https://proxy:8080"
  64. ftp_proxy = "ftp://proxy:8080"
  65. proxyDict = {
  66. "http" : http_proxy,
  67. "https" : https_proxy,
  68. "ftp" : ftp_proxy
  69. }
  70. def scrapeURL(hostname, url_to_scrape, username, caseList):
  71. ownCase = None
  72. r = requests.get(hostname + url_to_scrape)
  73. # r = requests.get(hostname + url_to_scrape, proxies=proxyDict)
  74. soup = BeautifulSoup(r.text, 'html.parser')
  75. all_tables = soup.find_all('table')
  76. right_table = soup.find('table', id='original')
  77. rows = right_table.findAll("tr")
  78. rowCount = 0
  79. for row in rows:
  80. if(rowCount >= 2): # Skip first two rows with headings.
  81. cells = row.findAll('td')
  82. case = CCase(cells[CELLS.USERNAME].string, datetime.strptime(cells[CELLS.EOI_DATE_OF_EFFECT].string, '%d/%m/%Y'), int(cells[CELLS.POINTS].string), cells[CELLS.STATUS].string)
  83. caseList.append(case)
  84. if (case.m_Username == username):
  85. ownCase = case
  86. rowCount += 1
  87. # Check for for more data and initiate loading recursively
  88. nextlink = soup.find('a', rel='next', href=True)
  89. if nextlink is not None:
  90. oc = scrapeURL(hostname, nextlink['href'], username, caseList)
  91. if(ownCase == None):
  92. ownCase = oc
  93. return ownCase
  94. ## Main routine.
  95. #
  96. ###############################################################################
  97. def main():
  98. if(len(sys.argv) != 3):
  99. print('Tracker analyzer 1.0')
  100. print('Please specify username and and ANZSCO code')
  101. print('python tracker.py USERNAME ANZSCO')
  102. return
  103. username = sys.argv[1]
  104. anzsco = sys.argv[2]
  105. print('Scrapping data for user: ' + username + ' with ANZSCO code: ' + anzsco)
  106. hostname='https://myimmitracker.com'
  107. url_to_scrape_tmpl = '/en/trackers/expression-of-interest-sc189?filter%5B_active_slash_inactive_%5D=&filter%5B_anzsco_code_%5D=$$ANZSCO$$&filter%5B_days_to_invite_%5D=&filter%5B_eoi_date_of_effect_%5D=&filter%5B_invited_%5D=&filter%5B_nationality_%5D=&filter%5B_occupation_%5D=&filter%5B_points_%5D=&filter%5B_status_%5D=&filter%5B_username_%5D=&order_by%5B_eoi_date_of_effect_%5D=desc'
  108. url_to_scrape = url_to_scrape_tmpl.replace('$$ANZSCO$$', anzsco)
  109. cases = []
  110. ownCase = scrapeURL(hostname, url_to_scrape, username, cases)
  111. # Statistic evaluation
  112. casesInFront = 0
  113. lastClearedCase = None
  114. for case in cases:
  115. if (case.m_Status == 'Submitted' and case.m_Score >= ownCase.m_Score):
  116. if (case == ownCase):
  117. color = bcolors.OKBLUE
  118. elif (case.m_Status == "Invited"):
  119. color = bcolors.OKGREEN
  120. elif (case.m_Status == "Submitted"):
  121. color = bcolors.WARNING
  122. else:
  123. color = bcolors.ENDC
  124. print(color + str(case))
  125. if (case != ownCase and case.m_Status == 'Submitted'):
  126. if(case.m_Score > ownCase.m_Score):
  127. casesInFront += 1
  128. elif(case.m_Score == ownCase.m_Score and case.m_EoiDateOfEffect <= ownCase.m_EoiDateOfEffect):
  129. casesInFront += 1
  130. if(case.m_Score == ownCase.m_Score and case.m_Status == 'Invited' and lastClearedCase == None):
  131. lastClearedCase = case
  132. print(bcolors.OKGREEN + str(lastClearedCase))
  133. break
  134. print(bcolors.ENDC + 'Total cases found: ' + str(len(cases)))
  135. print(bcolors.OKBLUE + ' Our own case is: ' + str(ownCase))
  136. print(bcolors.OKGREEN + 'Last cleared case: ' + str(lastClearedCase))
  137. print(bcolors.HEADER + ' Cases in front: ' + str(casesInFront))
  138. print(bcolors.ENDC + 'Programm finished...')
  139. ## Startup.
  140. #
  141. ###############################################################################
  142. if __name__ == "__main__":
  143. main()