PageRenderTime 59ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/mangafox.py

https://bitbucket.org/antoinealb/mangafox.py
Python | 150 lines | 98 code | 23 blank | 29 comment | 17 complexity | d15829c8be4d8f89cf680270c1c2f0a6 MD5 | raw file
  1. #!/usr/bin/env python3
  2. """
  3. Provides a command line interface to download scans on mangafox.com.
  4. (c) 2013 Antoine Albertelli
  5. """
  6. import requests
  7. from bs4 import BeautifulSoup
  8. import os
  9. import argparse
  10. import re
  11. import threading
  12. total_images = 0
  13. finished_images = 0
  14. def make_progress_bar(current, total, length=200):
  15. """Returns a string representing a progress bar, wget style."""
  16. if current > total:
  17. raise ValueError("Current > total")
  18. length = length - 2
  19. sharp_count = int(length * current / total)
  20. progress_bar = "["
  21. progress_bar += "#" * sharp_count
  22. progress_bar += ">"
  23. progress_bar += " " * (length - sharp_count - 1)
  24. progress_bar += "]"
  25. progress_bar += " {0} / {1}".format(current, total)
  26. return progress_bar
  27. def get_soup(url):
  28. """
  29. Returns a BeautifulSoup instance made with the HTML of the page at url.
  30. """
  31. page = requests.get(url)
  32. return BeautifulSoup(page.text)
  33. def clean_chapter_num(c):
  34. """
  35. Cleans the chapter number (converts it back to int if it is a round
  36. number).
  37. """
  38. if int(c)==float(c): # maybe we can do better ?
  39. return int(c)
  40. else:
  41. return c
  42. def get_page_count(soup):
  43. """
  44. Returns the number of page for a manga based on the soup from one page
  45. of this chapter.
  46. """
  47. pages = soup.find(id="top_bar").find(onchange="change_page(this)")
  48. pages = pages.find_all("option")
  49. return len(pages)-1 # -1 is for comment page
  50. def make_url(manga, chapter, page):
  51. """
  52. Returns a valid URL for a given page of a given chapter of a given manga.
  53. """
  54. # Seems that we can get pretty crazy when it comes to url formatting and
  55. # mangafox will just redirect (301) us. This allows us to always ask for
  56. # something in the 1st volume, even if asking chapter 9001 :) There is also
  57. # no need to worry about leading zeroes.
  58. url_template = "http://mangafox.me/manga/{0}/v01/c{1}/{2}.html"
  59. return url_template.format(manga, chapter, page)
  60. def save_image(url, path):
  61. """url is the url of the main page."""
  62. global total_images, finished_images
  63. total_images += 1
  64. soup = get_soup(url)
  65. image_data = requests.get(soup.find(id="image")["src"])
  66. if image_data.status_code == 200:
  67. with open(path, "wb") as outfile:
  68. for chunk in image_data.iter_content():
  69. outfile.write(chunk)
  70. else:
  71. print("Cannot open image, dafuq ?")
  72. finished_images += 1
  73. def main():
  74. """Entry point of the program."""
  75. global finished_images, total_images
  76. parser = argparse.ArgumentParser(
  77. description="Image downloader for mangafox.com")
  78. parser.add_argument("manga_name",
  79. help="Manga name on Mangafox.com. Ex : fairy_tail")
  80. parser.add_argument("chapters", action='append', nargs="+",
  81. help="Chapter number. Ex : 1 or 3-50")
  82. parser.add_argument("--no-threads", help="Use only a single thread.",
  83. action="store_true")
  84. args = parser.parse_args()
  85. range_exp = re.compile("[0123456789]+-[0123456789]+")
  86. chapters = []
  87. for expr in args.chapters[0]:
  88. print(expr)
  89. if range_exp.search(expr):
  90. start, end = map(int, expr.split("-"))
  91. chapters += range(start, end + 1)
  92. else:
  93. chapters += [float(expr)]
  94. manga = args.manga_name
  95. threads = []
  96. for chapter in chapters:
  97. os.mkdir("{0}_{1}".format(manga, clean_chapter_num(chapter)))
  98. soup = get_soup(make_url(manga, chapter, 1))
  99. page_count = get_page_count(soup)
  100. for page in range(1, page_count+1):
  101. # The leading zeroes are needed so viewing apps dont get confused.
  102. image_path = "{0}_{1}/{2:03}.jpg".format(manga, clean_chapter_num(chapter), page)
  103. page_url = make_url(manga, clean_chapter_num(chapter), page)
  104. if args.no_threads:
  105. save_image(page_url, image_path)
  106. else:
  107. thread = threading.Thread(None, save_image, None, (),
  108. {'url':page_url, 'path':image_path})
  109. thread.start()
  110. threads.append(thread)
  111. if not args.no_threads:
  112. prev_finished = finished_images
  113. while finished_images != total_images:
  114. if prev_finished != finished_images:
  115. # end="" means no line feed at the end
  116. print("\r"+make_progress_bar(finished_images, total_images), end="")
  117. prev_finished = finished_images
  118. print("\r"+make_progress_bar(finished_images, total_images))
  119. for thread in threads:
  120. thread.join()
  121. if __name__ == "__main__":
  122. main()