PageRenderTime 67ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/4chan_downloader.py

https://gitlab.com/8wiw/4chan_downloader
Python | 147 lines | 137 code | 5 blank | 5 comment | 6 complexity | 36b408d414275a2320348a5c888116d8 MD5 | raw file
  1. #!/usr/bin/env python3
  2. # Made by r4v10l1 (ch0colate)
  3. # https://github.com/r4v10l1
  4. try:
  5. import requests, time, sys, os
  6. from bs4 import BeautifulSoup
  7. from colorama import Fore, Style
  8. except Exception:
  9. exit(" [!] Error importing necesary modules: requests, time, bs4, sys, os, colorama")
  10. ############ EDIT ME ############
  11. useTorProxy = False # << Put here True or False
  12. debugPrint = False # << Put here True or False
  13. sessionMode = False # << Put here True or False
  14. #################################
  15. def check_variable_types():
  16. if type(useTorProxy) is not bool:
  17. exit(" [!] The variable 'useTorProxy' must be a boolean.")
  18. elif type(debugPrint) is not bool:
  19. exit(" [!] The variable 'debugPrint' must be a boolean.")
  20. def banner():
  21. print(f"{Style.BRIGHT}{Fore.GREEN} __ __ __ ")
  22. print(" / // / _____/ /_ ____ _____ ")
  23. print(" / // /_/ ___/ __ \\/ __ `/ __ \\")
  24. print("/__ __/ /__/ / / / /_/ / / / /")
  25. print(f" /_/ \\___/_/ /_/\\__,_/_/ /_/ {Style.RESET_ALL}{Fore.GREEN}Downloader{Style.RESET_ALL}")
  26. print()
  27. def log_user_start(board):
  28. with open("4chan_debug.log", "a") as DebugLog: # Append to the log file
  29. DebugLog.write("[%s] User started board: %s\n" % (time.strftime("%d %b %Y - %H:%M:%S", time.gmtime()), board))
  30. def log_user_stop(board):
  31. with open("4chan_debug.log", "a") as DebugLog: # Append to the log file
  32. DebugLog.write("[%s] User stopped board: %s\n" % (time.strftime("%d %b %Y - %H:%M:%S", time.gmtime()), board))
  33. def log_error_stop(error):
  34. with open("4chan_debug.log", "a") as DebugLog: # Append to the log file
  35. DebugLog.write("[%s] Error: %s \n" % (time.strftime("%d %b %Y - %H:%M:%S", time.gmtime()), error))
  36. def main():
  37. check_variable_types()
  38. banner()
  39. if not os.path.exists(os.path.abspath(os.path.dirname(__file__)).replace("\\", "/") + "/4chan_downloads"):
  40. os.makedirs(os.path.abspath(os.path.dirname(__file__)).replace("\\", "/") + "/4chan_downloads")
  41. s = requests.Session() # Create a session because of the cookie
  42. if useTorProxy == False:
  43. proxies = ""
  44. elif useTorProxy == True:
  45. proxies = {'http': 'socks5://127.0.0.1:9150', 'https': 'socks5://127.0.0.1:9150'}
  46. try:
  47. if sessionMode:
  48. s.get("https://4chan.org/", proxies=proxies)
  49. else:
  50. requests.get("https://4chan.org/", proxies=proxies)
  51. except Exception:
  52. exit(f" {Style.RESET_ALL}{Fore.RED}[!] We could not verify the proxies. Make sure tor is running.{Style.RESET_ALL}")
  53. try:
  54. board = input(f" {Style.BRIGHT}{Fore.BLUE}[i] Welcome to 4chan downloader! Type the board name: {Style.RESET_ALL}").lower()
  55. except KeyboardInterrupt:
  56. print()
  57. print(f"{Style.RESET_ALL}{Fore.RED} [!] Detected Ctrl+C. Exiting...{Style.RESET_ALL}")
  58. print()
  59. log_user_stop("None")
  60. exit(1)
  61. print(f"{Style.RESET_ALL}{Fore.BLUE} Starting at "+ time.strftime("%d %b %Y - %H:%M:%S", time.gmtime()) + f"{Style.RESET_ALL}")
  62. log_user_start(board)
  63. print()
  64. pageNumber = 1
  65. img_scr_old = ""
  66. double_count = 0
  67. extension = ".jpg"
  68. images = {"image/jpeg", "image/png"}
  69. try:
  70. while True:
  71. URL = "https://boards.4channel.org/" + board + "/" + str(pageNumber)
  72. if pageNumber == 1:
  73. URL = "https://boards.4channel.org/" + board + "/"
  74. if sessionMode:
  75. r = s.get(URL, proxies=proxies, allow_redirects=True)
  76. else:
  77. r = requests.get(URL, proxies=proxies, allow_redirects=True)
  78. if "Attention Required! | Cloudflare" in r.text:
  79. exit(f" {Style.RESET_ALL}{Fore.RED}[!] Cloudflare captcha needed. Exiting...{Style.RESET_ALL}")
  80. log_user_stop(board)
  81. souped = BeautifulSoup(r.text, 'html.parser')
  82. img_tags = souped.find_all('img')
  83. for img in img_tags:
  84. img_scr = img.get('src')
  85. if img_scr_old == img_scr:
  86. if double_count > 2:
  87. sys.stdout.flush()
  88. print()
  89. print(f"{Style.RESET_ALL}{Fore.BLUE} All done.{Style.RESET_ALL}")
  90. print(f"{Style.RESET_ALL}{Fore.BLUE} Stopping at "+ time.strftime("%d %b %Y - %H:%M:%S", time.gmtime()) + f"{Style.RESET_ALL}")
  91. log_user_stop(board)
  92. exit(1)
  93. else:
  94. double_count += 1
  95. if debugPrint:
  96. print(f"IMG_SCR: {img_scr}\n")
  97. if f"i.4cdn.org/{board}" in img_scr:
  98. if "http" not in img_scr:
  99. img_scr = f"https:{img_scr}"
  100. img_id = img_scr.split("/")[-1].split(".")[0].replace("s", "")
  101. sys.stdout.write(f"\r {Style.RESET_ALL}{Style.BRIGHT}[{Fore.GREEN}+{Style.RESET_ALL}{Style.BRIGHT}] Downloading {Style.RESET_ALL}{Fore.GREEN}{img_id}{Style.RESET_ALL}")
  102. sys.stdout.flush()
  103. extension = ".jpg"
  104. img_url = "https://i.4cdn.org/" + board + "/" + img_id + extension
  105. if debugPrint:
  106. print(f"IMG_URL: {img_url}\n")
  107. if sessionMode:
  108. r2 = s.get(img_url)
  109. else:
  110. r2 = requests.get(img_url)
  111. # print(r2.text) # Inspect every request for debug
  112. if "Checking your browser" in r2.text:
  113. sys.stdout.write(f"\r {Style.RESET_ALL}{Style.BRIGHT}[{Fore.RED}!{Style.RESET_ALL}{Style.BRIGHT}] 4chan is requesting a human verification for image: {Fore.RED}{img_id}{Style.RESET_ALL}")
  114. sys.stdout.flush()
  115. print()
  116. elif "404 Not Found" in r2.text:
  117. extension = ".png"
  118. if r2.headers["content-type"] in images:
  119. with open("4chan_downloads/" + img_id + extension, "wb") as i:
  120. i.write(r2.content)
  121. img_scr_old = img_scr
  122. pageNumber += 1
  123. except KeyboardInterrupt:
  124. print()
  125. print(f"{Style.RESET_ALL}{Fore.RED} [!] Detected Ctrl+C. Exiting...{Style.RESET_ALL}")
  126. print()
  127. log_user_stop(board)
  128. exit(1)
  129. except Exception as e:
  130. print(" Faltal error. Check the log for details.")
  131. log_error_stop(e)
  132. main()