mangafox.py | searchcode

/mangafox.py

https://bitbucket.org/antoinealb/mangafox.py · Python · 150 lines · 94 code · 27 blank · 29 comment · 19 complexity · d15829c8be4d8f89cf680270c1c2f0a6 MD5 · raw file

#!/usr/bin/env python3
"""
Provides a command line interface to download scans on mangafox.com.

(c) 2013 Antoine Albertelli
"""

import requests
from bs4 import BeautifulSoup
import os
import argparse
import re
import threading

total_images = 0
finished_images = 0

def make_progress_bar(current, total, length=200):
    """Returns a string representing a progress bar, wget style."""
    if current > total:
        raise ValueError("Current > total")

    length = length - 2
    sharp_count = int(length * current / total)
    progress_bar = "["
    progress_bar += "#" * sharp_count
    progress_bar += ">" 
    progress_bar += " " * (length - sharp_count - 1)
    progress_bar += "]"
    progress_bar += " {0} / {1}".format(current, total)
    return progress_bar

def get_soup(url):
    """
    Returns a BeautifulSoup instance made with the HTML of the page at url.
    """
    page = requests.get(url)
    return BeautifulSoup(page.text)



def clean_chapter_num(c):
    """
    Cleans the chapter number (converts it back to int if it is a round
    number).
    """
    if int(c)==float(c): # maybe we can do better ?
        return int(c)
    else:
        return c

def get_page_count(soup):
    """
    Returns the number of page for a manga based on the soup from one page 
    of this chapter.
    """
    pages = soup.find(id="top_bar").find(onchange="change_page(this)")
    pages = pages.find_all("option")
    return len(pages)-1 # -1 is for comment page

def make_url(manga, chapter, page):
    """
    Returns a valid URL for a given page of a given chapter of a given manga.
    """
    # Seems that we can get pretty crazy when it comes to url formatting and 
    # mangafox will just redirect (301) us. This allows us to always ask for
    # something in the 1st volume, even if asking chapter 9001 :) There is also
    # no need to worry about leading zeroes.
    url_template = "http://mangafox.me/manga/{0}/v01/c{1}/{2}.html"
    return url_template.format(manga, chapter, page)

def save_image(url, path):
    """url is the url of the main page."""
    global total_images, finished_images
    total_images += 1
    soup = get_soup(url)
    image_data = requests.get(soup.find(id="image")["src"])
    if image_data.status_code == 200:
        with open(path, "wb") as outfile:
            for chunk in image_data.iter_content():
                outfile.write(chunk)
    else:
        print("Cannot open image, dafuq ?")

    finished_images += 1

def main():
    """Entry point of the program."""
    global finished_images, total_images

    parser = argparse.ArgumentParser(
                            description="Image downloader for mangafox.com")
    parser.add_argument("manga_name", 
            help="Manga name on Mangafox.com. Ex : fairy_tail")
    parser.add_argument("chapters", action='append', nargs="+", 
            help="Chapter number. Ex : 1 or 3-50")
    parser.add_argument("--no-threads", help="Use only a single thread.",
            action="store_true")
    args = parser.parse_args()


    range_exp = re.compile("[0123456789]+-[0123456789]+")

    chapters = []

    for expr in args.chapters[0]:
        print(expr)
        if range_exp.search(expr):
            start, end = map(int, expr.split("-"))
            chapters += range(start, end + 1)
        else:
            chapters += [float(expr)]
        
    manga = args.manga_name

    threads = []


    for chapter in chapters:
        os.mkdir("{0}_{1}".format(manga, clean_chapter_num(chapter)))
        soup = get_soup(make_url(manga, chapter, 1))
        page_count = get_page_count(soup)
        for page in range(1, page_count+1):
            # The leading zeroes are needed so viewing apps dont get confused.
            image_path = "{0}_{1}/{2:03}.jpg".format(manga, clean_chapter_num(chapter), page)
            page_url = make_url(manga, clean_chapter_num(chapter), page)
            if args.no_threads:
                save_image(page_url, image_path)
            else:
                thread = threading.Thread(None, save_image, None, (), 
                                {'url':page_url, 'path':image_path})
                thread.start()
                threads.append(thread)

    if not args.no_threads:
        prev_finished = finished_images
        while finished_images != total_images:
            if prev_finished != finished_images:
                # end="" means no line feed at the end
                print("\r"+make_progress_bar(finished_images, total_images), end="")
                prev_finished = finished_images

        print("\r"+make_progress_bar(finished_images, total_images)) 

        for thread in threads:
            thread.join()

if __name__ == "__main__":
    main()