/moefetch.sh
Shell | 420 lines | 318 code | 39 blank | 63 comment | 54 complexity | d72d7e2a964e806864663b3cd929ebea MD5 | raw file
- #!/bin/sh
- # Copyright (c) 2009, edogawaconan <me@myconan.net>
- #
- # Permission to use, copy, modify, and/or distribute this software for any
- # purpose with or without fee is hereby granted, provided that the above
- # copyright notice and this permission notice appear in all copies.
- #
- # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- #
- # Lots of bugs here. Use with care
- # USE WITH CARE
- #
- # what it does: fetch every picture that has the specified TAGS.
- # requirement: wget, libxslt, openssl
- # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep
- ADDITIONAL_PATH=
- # default server address. Danbooru only! I do not take responsibility of stupidity.
- DEFAULT_SITE="moe.imouto.org"
- # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
- # Structure is ${BASE_DIR}/<TAGS>
- # Absolute path only.
- # Leave empty to use whatever folder you're running this at
- BASE_DIR=
- # not user modifiable from here
- # useless welcome message. Also version
- msg_welcome() {
- echo "moefetch ${_version}
- Copyright (c) 2009 edogawaconan <me@myconan.net>
- "
- }
- # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")"
- safe_path()
- {
- # It all depends on the first character.
- start=$(printf "%s" "$*" | cut -c 1)
- path=
- case "${start}" in
- .|/) path="$*";; # . and / is safe. No change.
- *) path="./$*";; # Anything else must be prefixed with ./
- esac
- printf "%s" "${path}" # Return.
- }
- # Checks md5. OpenSSL should be available on anything usable.
- get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; }
- # Safely get basename.
- get_basename() { basename "$(safe_path "${1}")"; }
- # Safely get filename (basename without the extension).
- get_filename() { get_basename "${1%.*}"; }
- # Transformation for tag url.
- get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
- # Returns something if not an md5 value.
- is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
- # fatal error handler
- Err_Fatal() {
- echo "
- Fatal error: ${1}"
- exit 1
- }
- Err_Impossible() {
- echo "
- Impossible error. Or you modified content of the working directories when the script is running.
- Please report to moefetch.googlecode.com if you see this message (complete with entire run log)"
- exit 1
- }
- # help message
- Err_Help() {
- echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS
- COMMAND:
- (quick)fetch:
- Do a complete update. Add prefix quick to skip file checking
- check:
- Get list of new files, clean up local folder and print total new files
- OPTIONS:
- -n:
- Skip checking repository directory.
- -p PASSWORD:
- Specifies password for login.
- -s SITE_URL:
- Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}.
- -u USERNAME:
- Specifies username for login.
- TAGS:
- Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme."
- exit 2
- }
- # generate link by transforming xml
- Generate_Link() {
- echo "
- Fetching XML file"
- tempnum=1000
- iternum=1
- > "${TEMP_PREFIX}-list"
- while [ "${tempnum}" -ge 1000 ]; do
- url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}"
- [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}"
- wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" -e continue=off || Err_Fatal "Failed download catalog file"
- printf "Processing XML file... "
- # xslt evilry
- xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
- <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
- <xsl:output method="xml" indent="yes"/>
- <xsl:template match="post">
- <xsl:value-of select="@file_url" />
- </xsl:template>
- </xsl:stylesheet>
- EOF
- tempnum=$(grep -c . "${TEMP_PREFIX}-templist")
- iternum=$((iternum + 1))
- cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
- echo "${tempnum} file(s) available"
- done
- numfiles=$(grep -c . "${TEMP_PREFIX}-list")
- echo "${numfiles} file(s) available on server"
- [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
- }
- progress_init() {
- _last="-"
- printf "${_last}"
- }
- progress_anim() {
- case "${_last}" in
- /) _last="-";;
- -) _last=\\;;
- \\) _last=\|;;
- \|) _last="/";;
- esac
- printf "\b${_last}"
- }
- progress_done() { printf "\bdone\n"; }
- # getting rid of ls (as per suggestion)
- Count_Files() {
- numfiles=0
- for dircontent in "${*}/"* "${*}/".*; do
- if [ "${dircontent}" != "${*}/*" ] || [ -e "${dircontent}" ]; then
- numfiles=$((numfiles + 1))
- fi
- done
- echo $((numfiles - 2))
- }
- # check tools availability
- Check_Tools() {
- # verify all programs required do indeed exist
- commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl"
- for cmd in ${commands}
- do
- [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}"
- done
- }
- # verify required folders exist and writeable
- Check_Folders(){
- [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory."
- for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do
- if [ ! -d "${BASE_DIR}/${directory}" ]; then
- mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible
- fi
- if [ ! -O "${BASE_DIR}/${directory}" ]; then
- echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it"
- chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
- fi
- done
- [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1
- for i in error ok list newlist templist; do
- touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen"
- done
- #
- }
- # Do some cleanup
- Cleanup_Repository() {
- # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
- printf "Cleaning up repository folder... "
- progress_init
- trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")"
- trashes="These files have been moved to ${trash_dir}:"
- has_trash=
- if [ ! -d "${trash_dir}" ]; then
- mkdir -p "${trash_dir}" || Err_Impossible
- else
- if [ ! -O "${trash_dir}" ]; then
- chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
- fi
- fi
- for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
- do
- is_trash=
- if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
- is_trash=1
- has_trash=1
- mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
- trashes="${trashes}
- $(get_basename "${trash}")"
- fi
- progress_anim
- done
- rmdir "${trash_dir}" 2>/dev/null
- progress_done
- [ -n "${has_trash}" ] && echo "${trashes}"
- }
- # check files correctness
- Check_Files() {
- if [ ! -n "${ISNEW}" ]; then
- [ -z "${NOCLEAN}" ] && Cleanup_Repository
- printf "Checking for errors... "
- progress_init
- files_error="These files do not match its md5:"
- files_notdanbooru="These files are not checked:"
- has_err_filename=
- has_err_md5=
- > "${TEMP_PREFIX}-error"
- > "${TEMP_PREFIX}-ok"
- for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
- do
- if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then
- if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then
- files_notdanbooru="${files_notdanbooru}
- $(get_basename "${file}")"
- has_err_filename=1
- else
- if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
- echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
- else
- rm "${file}" || Err_Fatal "Error removing ${file}"
- echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
- files_error="${files_error}
- $(get_basename "${file}")"
- has_err_md5=1
- fi
- fi
- fi
- progress_anim
- done
- progress_done
- if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then
- echo "All files OK"
- else
- if [ -n "${has_err_md5}" ]; then
- echo "${files_error}"
- echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed"
- fi
- [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}"
- fi
- echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally"
- printf "Generating list of new files... "
- progress_init
- cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist"
- while read -r is_ok; do
- grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
- cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible
- progress_anim
- done < "${TEMP_PREFIX}-ok"
- progress_done
- echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded"
- else
- if [ -n "${ISQUICK}" ]; then
- echo "Quick mode selected. Skipping check"
- else
- echo "Empty local repository"
- fi
- cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"
- fi
- }
- # start downloading the images
- Fetch_Images() {
- if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then
- echo "No new file"
- else
- printf "Downloading files... "
- cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
- wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log"
- fi
- }
- # initialize base variables and initial command check
- init()
- {
- # path initialization
- # check if additional path is specified
- if [ -n "${ADDITIONAL_PATH}" ]
- then
- # insert the additional path
- PATH="${ADDITIONAL_PATH}:${PATH}"
- export PATH
- fi
-
- # misc variables
- ISQUICK=
- ISNEW=
-
- # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message
- [ $# -lt 2 ] && Err_Help
- case "$1" in
- check|fetch|quickfetch)
- echo "Starting..."
- JOB="$1"
- ;;
- *)
- Err_Help
- ;;
- esac
- shift
- SITE=
- TAGS=
- has_pass=0
- has_user=0
- x=1
- while getopts "s:nu:p:" opt
- do
- case "$opt" in
- s) SITE="$OPTARG";;
- n) NOCLEAN=1;;
- p)
- LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/')
- has_pass=1
- ;;
- u)
- LOGIN_USER="$OPTARG"
- has_user=1
- ;;
- esac
- x=$OPTIND
- done
- shift $(($x-1))
- if [ "$1" = -- ]; then shift; fi
- TAGS="$@"
- [ -n "${SITE}" ] || SITE=${DEFAULT_SITE}
- [ -n "${TAGS}" ] || Err_Fatal "No tag specified"
- # Get base folder - default, current folder or fallback to ${HOME}
- [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD}
- [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME}
- [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}"
- # see if both pass and use are set. If they're set, switch _use_login variable content to 1.
- [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1
- echo "Tags: ${TAGS}"
- # slash is not wanted for folder name
- TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
- SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
- TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
- }
- # global variables goes here
- init_globals()
- {
- _version="1.0-rc2" # version of this script
- _use_login=0 # variable to check whether a login is used or not
- }
- main()
- {
- # removing GNU-ism as much as possible
- POSIXLY_CORRECT=1
- #initialize global variables
- init_globals
- #print welcome message
- msg_welcome
- # initialization
- init "$@"
- Check_Tools
- Check_Folders
- # let's do the job!
- case "${JOB}" in
- check)
- Generate_Link
- Check_Files
- ;;
- fetch)
- Generate_Link
- Check_Files
- Fetch_Images
- ;;
- quickfetch)
- ISNEW=1
- ISQUICK=1
- Generate_Link
- Check_Files
- Fetch_Images
- ;;
- esac
- }
- # call the main routine!
- main "$@"