/mangafox.py
Python | 150 lines | 98 code | 23 blank | 29 comment | 17 complexity | d15829c8be4d8f89cf680270c1c2f0a6 MD5 | raw file
- #!/usr/bin/env python3
- """
- Provides a command line interface to download scans on mangafox.com.
- (c) 2013 Antoine Albertelli
- """
- import requests
- from bs4 import BeautifulSoup
- import os
- import argparse
- import re
- import threading
- total_images = 0
- finished_images = 0
- def make_progress_bar(current, total, length=200):
- """Returns a string representing a progress bar, wget style."""
- if current > total:
- raise ValueError("Current > total")
- length = length - 2
- sharp_count = int(length * current / total)
- progress_bar = "["
- progress_bar += "#" * sharp_count
- progress_bar += ">"
- progress_bar += " " * (length - sharp_count - 1)
- progress_bar += "]"
- progress_bar += " {0} / {1}".format(current, total)
- return progress_bar
- def get_soup(url):
- """
- Returns a BeautifulSoup instance made with the HTML of the page at url.
- """
- page = requests.get(url)
- return BeautifulSoup(page.text)
- def clean_chapter_num(c):
- """
- Cleans the chapter number (converts it back to int if it is a round
- number).
- """
- if int(c)==float(c): # maybe we can do better ?
- return int(c)
- else:
- return c
- def get_page_count(soup):
- """
- Returns the number of page for a manga based on the soup from one page
- of this chapter.
- """
- pages = soup.find(id="top_bar").find(onchange="change_page(this)")
- pages = pages.find_all("option")
- return len(pages)-1 # -1 is for comment page
- def make_url(manga, chapter, page):
- """
- Returns a valid URL for a given page of a given chapter of a given manga.
- """
- # Seems that we can get pretty crazy when it comes to url formatting and
- # mangafox will just redirect (301) us. This allows us to always ask for
- # something in the 1st volume, even if asking chapter 9001 :) There is also
- # no need to worry about leading zeroes.
- url_template = "http://mangafox.me/manga/{0}/v01/c{1}/{2}.html"
- return url_template.format(manga, chapter, page)
- def save_image(url, path):
- """url is the url of the main page."""
- global total_images, finished_images
- total_images += 1
- soup = get_soup(url)
- image_data = requests.get(soup.find(id="image")["src"])
- if image_data.status_code == 200:
- with open(path, "wb") as outfile:
- for chunk in image_data.iter_content():
- outfile.write(chunk)
- else:
- print("Cannot open image, dafuq ?")
- finished_images += 1
- def main():
- """Entry point of the program."""
- global finished_images, total_images
- parser = argparse.ArgumentParser(
- description="Image downloader for mangafox.com")
- parser.add_argument("manga_name",
- help="Manga name on Mangafox.com. Ex : fairy_tail")
- parser.add_argument("chapters", action='append', nargs="+",
- help="Chapter number. Ex : 1 or 3-50")
- parser.add_argument("--no-threads", help="Use only a single thread.",
- action="store_true")
- args = parser.parse_args()
- range_exp = re.compile("[0123456789]+-[0123456789]+")
- chapters = []
- for expr in args.chapters[0]:
- print(expr)
- if range_exp.search(expr):
- start, end = map(int, expr.split("-"))
- chapters += range(start, end + 1)
- else:
- chapters += [float(expr)]
-
- manga = args.manga_name
- threads = []
- for chapter in chapters:
- os.mkdir("{0}_{1}".format(manga, clean_chapter_num(chapter)))
- soup = get_soup(make_url(manga, chapter, 1))
- page_count = get_page_count(soup)
- for page in range(1, page_count+1):
- # The leading zeroes are needed so viewing apps dont get confused.
- image_path = "{0}_{1}/{2:03}.jpg".format(manga, clean_chapter_num(chapter), page)
- page_url = make_url(manga, clean_chapter_num(chapter), page)
- if args.no_threads:
- save_image(page_url, image_path)
- else:
- thread = threading.Thread(None, save_image, None, (),
- {'url':page_url, 'path':image_path})
- thread.start()
- threads.append(thread)
- if not args.no_threads:
- prev_finished = finished_images
- while finished_images != total_images:
- if prev_finished != finished_images:
- # end="" means no line feed at the end
- print("\r"+make_progress_bar(finished_images, total_images), end="")
- prev_finished = finished_images
- print("\r"+make_progress_bar(finished_images, total_images))
- for thread in threads:
- thread.join()
- if __name__ == "__main__":
- main()