PageRenderTime 25ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/moefetch.sh

https://code.google.com/p/moefetch/
Shell | 420 lines | 318 code | 39 blank | 63 comment | 54 complexity | d72d7e2a964e806864663b3cd929ebea MD5 | raw file
  1. #!/bin/sh
  2. # Copyright (c) 2009, edogawaconan <me@myconan.net>
  3. #
  4. # Permission to use, copy, modify, and/or distribute this software for any
  5. # purpose with or without fee is hereby granted, provided that the above
  6. # copyright notice and this permission notice appear in all copies.
  7. #
  8. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. #
  16. # Lots of bugs here. Use with care
  17. # USE WITH CARE
  18. #
  19. # what it does: fetch every picture that has the specified TAGS.
  20. # requirement: wget, libxslt, openssl
  21. # program additional paths for: cut, sed, wc, openssl, wget, xsltproc, grep
  22. ADDITIONAL_PATH=
  23. # default server address. Danbooru only! I do not take responsibility of stupidity.
  24. DEFAULT_SITE="moe.imouto.org"
  25. # base directory. make sure it's writeable. I do not take responsibility if you don't own the folder and files as no check is done for this one.
  26. # Structure is ${BASE_DIR}/<TAGS>
  27. # Absolute path only.
  28. # Leave empty to use whatever folder you're running this at
  29. BASE_DIR=
  30. # not user modifiable from here
  31. # useless welcome message. Also version
  32. msg_welcome() {
  33. echo "moefetch ${_version}
  34. Copyright (c) 2009 edogawaconan <me@myconan.net>
  35. "
  36. }
  37. # Sanitize path. Totally safe. Usage: cmd "$(safe_path "${filename}")"
  38. safe_path()
  39. {
  40. # It all depends on the first character.
  41. start=$(printf "%s" "$*" | cut -c 1)
  42. path=
  43. case "${start}" in
  44. .|/) path="$*";; # . and / is safe. No change.
  45. *) path="./$*";; # Anything else must be prefixed with ./
  46. esac
  47. printf "%s" "${path}" # Return.
  48. }
  49. # Checks md5. OpenSSL should be available on anything usable.
  50. get_md5() { cat "$(safe_path "${1}")" | openssl dgst -md5 | tail -n 1 | sed -e 's/.*\([[:xdigit:]]\{32\}\).*/\1/'; }
  51. # Safely get basename.
  52. get_basename() { basename "$(safe_path "${1}")"; }
  53. # Safely get filename (basename without the extension).
  54. get_filename() { get_basename "${1%.*}"; }
  55. # Transformation for tag url.
  56. get_cleantags() { printf "%s " "$*" | sed -e 's/\&/%26/g;s/=/%3D/g'; }
  57. # Returns something if not an md5 value.
  58. is_not_md5() { get_filename "$1" | sed -e 's/\([0-9a-f]\{32\}\)//g'; }
  59. # fatal error handler
  60. Err_Fatal() {
  61. echo "
  62. Fatal error: ${1}"
  63. exit 1
  64. }
  65. Err_Impossible() {
  66. echo "
  67. Impossible error. Or you modified content of the working directories when the script is running.
  68. Please report to moefetch.googlecode.com if you see this message (complete with entire run log)"
  69. exit 1
  70. }
  71. # help message
  72. Err_Help() {
  73. echo "moefetch.sh COMMAND [-n] [-p PASSWORD] [-s SITE_URL] [-u USERNAME] TAGS
  74. COMMAND:
  75. (quick)fetch:
  76. Do a complete update. Add prefix quick to skip file checking
  77. check:
  78. Get list of new files, clean up local folder and print total new files
  79. OPTIONS:
  80. -n:
  81. Skip checking repository directory.
  82. -p PASSWORD:
  83. Specifies password for login.
  84. -s SITE_URL:
  85. Specify URL of the Danbooru powered site you want to leech from. Default is ${DEFAULT_SITE}.
  86. -u USERNAME:
  87. Specifies username for login.
  88. TAGS:
  89. Tags you want to download. Separated by spaces. Tag name follows standard Danbooru tagging scheme."
  90. exit 2
  91. }
  92. # generate link by transforming xml
  93. Generate_Link() {
  94. echo "
  95. Fetching XML file"
  96. tempnum=1000
  97. iternum=1
  98. > "${TEMP_PREFIX}-list"
  99. while [ "${tempnum}" -ge 1000 ]; do
  100. url="http://${SITE}/post/index.xml?tags=$(get_cleantags "${TAGS}")&offset=0&limit=1000&page=${iternum}"
  101. [ ${_use_login} -eq 1 ] && url="${url}&login=${LOGIN_USER}&password_hash=${LOGIN_PASS}"
  102. wget --quiet "${url}" -O "${TEMP_PREFIX}-xml" -e continue=off || Err_Fatal "Failed download catalog file"
  103. printf "Processing XML file... "
  104. # xslt evilry
  105. xsltproc - "${TEMP_PREFIX}-xml" <<EOF | sed 's/.*\(http.*\)\(\/[a-f0-9]\{32\}\).*\.\([^\.]*\)/\1\2.\3/g' | grep ^http > "${TEMP_PREFIX}-templist"
  106. <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
  107. <xsl:output method="xml" indent="yes"/>
  108. <xsl:template match="post">
  109. <xsl:value-of select="@file_url" />
  110. </xsl:template>
  111. </xsl:stylesheet>
  112. EOF
  113. tempnum=$(grep -c . "${TEMP_PREFIX}-templist")
  114. iternum=$((iternum + 1))
  115. cat "${TEMP_PREFIX}-templist" >> "${TEMP_PREFIX}-list"
  116. echo "${tempnum} file(s) available"
  117. done
  118. numfiles=$(grep -c . "${TEMP_PREFIX}-list")
  119. echo "${numfiles} file(s) available on server"
  120. [ "${numfiles}" -gt 0 ] || Err_Fatal "Error in processing list or no files can be found with specified tag(s) or site."
  121. }
  122. progress_init() {
  123. _last="-"
  124. printf "${_last}"
  125. }
  126. progress_anim() {
  127. case "${_last}" in
  128. /) _last="-";;
  129. -) _last=\\;;
  130. \\) _last=\|;;
  131. \|) _last="/";;
  132. esac
  133. printf "\b${_last}"
  134. }
  135. progress_done() { printf "\bdone\n"; }
  136. # getting rid of ls (as per suggestion)
  137. Count_Files() {
  138. numfiles=0
  139. for dircontent in "${*}/"* "${*}/".*; do
  140. if [ "${dircontent}" != "${*}/*" ] || [ -e "${dircontent}" ]; then
  141. numfiles=$((numfiles + 1))
  142. fi
  143. done
  144. echo $((numfiles - 2))
  145. }
  146. # check tools availability
  147. Check_Tools() {
  148. # verify all programs required do indeed exist
  149. commands="cut sed wc wget xsltproc xargs rm mkdir chown comm grep date openssl"
  150. for cmd in ${commands}
  151. do
  152. [ "$(command -v "${cmd}")" ] || Err_Fatal "${cmd} doesn't exist in ${PATH}"
  153. done
  154. }
  155. # verify required folders exist and writeable
  156. Check_Folders(){
  157. [ -O "${BASE_DIR}" ] || Err_Fatal "You don't own ${BASE_DIR}. Please fix ${BASE_DIR} or run this script in your own directory."
  158. for directory in temp trash deleted "${SITE_DIR}/${TARGET_DIR}"; do
  159. if [ ! -d "${BASE_DIR}/${directory}" ]; then
  160. mkdir -p "${BASE_DIR}/${directory}" || Err_Impossible
  161. fi
  162. if [ ! -O "${BASE_DIR}/${directory}" ]; then
  163. echo "You don't own the ${BASE_DIR}/${directory}, applying globally writeable permission on it"
  164. chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
  165. fi
  166. done
  167. [ "$(Count_Files "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}")" -eq 0 ] && ISNEW=1
  168. for i in error ok list newlist templist; do
  169. touch "${TEMP_PREFIX}-${i}" || Fatal_Err "Error creating ${TEMP_PREFIX}-${i}. This shouldn't happen"
  170. done
  171. #
  172. }
  173. # Do some cleanup
  174. Cleanup_Repository() {
  175. # current dir: ${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}
  176. printf "Cleaning up repository folder... "
  177. progress_init
  178. trash_dir="${BASE_DIR}/trash/${trash_dir}/$(date -u "+${SITE_DIR}-${TARGET_DIR}-%Y%m%d-%H.%M")"
  179. trashes="These files have been moved to ${trash_dir}:"
  180. has_trash=
  181. if [ ! -d "${trash_dir}" ]; then
  182. mkdir -p "${trash_dir}" || Err_Impossible
  183. else
  184. if [ ! -O "${trash_dir}" ]; then
  185. chmod -R u=rwX,g=rwX,o=rwX "${BASE_DIR}/${directory}" || Err_Impossible
  186. fi
  187. fi
  188. for trash in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
  189. do
  190. is_trash=
  191. if [ -d "${trash}" ] || [ -n "$(is_not_md5 "${trash}")" ] || [ -z "$(grep "$(get_basename "${trash}")" "${TEMP_PREFIX}-list")" ]; then
  192. is_trash=1
  193. has_trash=1
  194. mv -f -- "${trash}" "${trash_dir}" || Err_Impossible
  195. trashes="${trashes}
  196. $(get_basename "${trash}")"
  197. fi
  198. progress_anim
  199. done
  200. rmdir "${trash_dir}" 2>/dev/null
  201. progress_done
  202. [ -n "${has_trash}" ] && echo "${trashes}"
  203. }
  204. # check files correctness
  205. Check_Files() {
  206. if [ ! -n "${ISNEW}" ]; then
  207. [ -z "${NOCLEAN}" ] && Cleanup_Repository
  208. printf "Checking for errors... "
  209. progress_init
  210. files_error="These files do not match its md5:"
  211. files_notdanbooru="These files are not checked:"
  212. has_err_filename=
  213. has_err_md5=
  214. > "${TEMP_PREFIX}-error"
  215. > "${TEMP_PREFIX}-ok"
  216. for file in "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/"*
  217. do
  218. if [ "${file}" != "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}/*" ]; then
  219. if [ -n "$(is_not_md5 "${file}")" ] || [ -d "${file}" ]; then
  220. files_notdanbooru="${files_notdanbooru}
  221. $(get_basename "${file}")"
  222. has_err_filename=1
  223. else
  224. if [ "$(get_md5 "${file}")" = "$(get_filename "${file}")" ]; then
  225. echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-ok"
  226. else
  227. rm "${file}" || Err_Fatal "Error removing ${file}"
  228. echo "$(get_basename "${file}")" >> "${TEMP_PREFIX}-error"
  229. files_error="${files_error}
  230. $(get_basename "${file}")"
  231. has_err_md5=1
  232. fi
  233. fi
  234. fi
  235. progress_anim
  236. done
  237. progress_done
  238. if [ ! -n "${has_err_md5}" ] && [ ! -n "${has_err_filename}" ]; then
  239. echo "All files OK"
  240. else
  241. if [ -n "${has_err_md5}" ]; then
  242. echo "${files_error}"
  243. echo "$(grep -c . "${TEMP_PREFIX}-error") file(s) removed"
  244. fi
  245. [ -n "${has_err_filename}" ] && echo "${files_notdanbooru}"
  246. fi
  247. echo "$(grep -c . "${TEMP_PREFIX}-ok") file(s) available locally"
  248. printf "Generating list of new files... "
  249. progress_init
  250. cp -f "${TEMP_PREFIX}-list" "${TEMP_PREFIX}-templist"
  251. while read -r is_ok; do
  252. grep -v "${is_ok}" "${TEMP_PREFIX}-templist" > "${TEMP_PREFIX}-newlist"
  253. cp -f "${TEMP_PREFIX}-newlist" "${TEMP_PREFIX}-templist" || Err_Impossible
  254. progress_anim
  255. done < "${TEMP_PREFIX}-ok"
  256. progress_done
  257. echo "$(grep -c . "${TEMP_PREFIX}-newlist") file(s) to be downloaded"
  258. else
  259. if [ -n "${ISQUICK}" ]; then
  260. echo "Quick mode selected. Skipping check"
  261. else
  262. echo "Empty local repository"
  263. fi
  264. cat "${TEMP_PREFIX}-list" > "${TEMP_PREFIX}-newlist"
  265. fi
  266. }
  267. # start downloading the images
  268. Fetch_Images() {
  269. if [ "$(grep -c . "${TEMP_PREFIX}-newlist")" -eq 0 ]; then
  270. echo "No new file"
  271. else
  272. printf "Downloading files... "
  273. cd "${BASE_DIR}/${SITE_DIR}/${TARGET_DIR}"
  274. wget -e continue=on -i "${TEMP_PREFIX}-newlist" -o "${TEMP_PREFIX}.log"
  275. fi
  276. }
  277. # initialize base variables and initial command check
  278. init()
  279. {
  280. # path initialization
  281. # check if additional path is specified
  282. if [ -n "${ADDITIONAL_PATH}" ]
  283. then
  284. # insert the additional path
  285. PATH="${ADDITIONAL_PATH}:${PATH}"
  286. export PATH
  287. fi
  288. # misc variables
  289. ISQUICK=
  290. ISNEW=
  291. # minimum number of arguments: 2 (command and tag). If less than two, exit and print help message
  292. [ $# -lt 2 ] && Err_Help
  293. case "$1" in
  294. check|fetch|quickfetch)
  295. echo "Starting..."
  296. JOB="$1"
  297. ;;
  298. *)
  299. Err_Help
  300. ;;
  301. esac
  302. shift
  303. SITE=
  304. TAGS=
  305. has_pass=0
  306. has_user=0
  307. x=1
  308. while getopts "s:nu:p:" opt
  309. do
  310. case "$opt" in
  311. s) SITE="$OPTARG";;
  312. n) NOCLEAN=1;;
  313. p)
  314. LOGIN_PASS=$(printf "%s" "$OPTARG" | openssl dgst -sha1 | sed -e 's/.*\([[:xdigit:]]\{40\}\).*/\1/')
  315. has_pass=1
  316. ;;
  317. u)
  318. LOGIN_USER="$OPTARG"
  319. has_user=1
  320. ;;
  321. esac
  322. x=$OPTIND
  323. done
  324. shift $(($x-1))
  325. if [ "$1" = -- ]; then shift; fi
  326. TAGS="$@"
  327. [ -n "${SITE}" ] || SITE=${DEFAULT_SITE}
  328. [ -n "${TAGS}" ] || Err_Fatal "No tag specified"
  329. # Get base folder - default, current folder or fallback to ${HOME}
  330. [ -n "${BASE_DIR}" ] || BASE_DIR=${PWD}
  331. [ -n "${BASE_DIR}" ] || BASE_DIR=${HOME}
  332. [ -n "$(echo "${BASE_DIR}" | cut -c1 | grep \/)" ] || BASE_DIR="/${BASE_DIR}"
  333. # see if both pass and use are set. If they're set, switch _use_login variable content to 1.
  334. [ ${has_pass} -eq 1 -a ${has_user} -eq 1 ] && _use_login=1
  335. echo "Tags: ${TAGS}"
  336. # slash is not wanted for folder name
  337. TARGET_DIR=$(echo "${TAGS}" | sed -e 's/\//_/g')
  338. SITE_DIR=$(echo "${SITE}" | sed -e 's/\/$//g;s/\//_/g')
  339. TEMP_PREFIX="${BASE_DIR}/temp/${SITE_DIR}-${TARGET_DIR}"
  340. }
  341. # global variables goes here
  342. init_globals()
  343. {
  344. _version="1.0-rc2" # version of this script
  345. _use_login=0 # variable to check whether a login is used or not
  346. }
  347. main()
  348. {
  349. # removing GNU-ism as much as possible
  350. POSIXLY_CORRECT=1
  351. #initialize global variables
  352. init_globals
  353. #print welcome message
  354. msg_welcome
  355. # initialization
  356. init "$@"
  357. Check_Tools
  358. Check_Folders
  359. # let's do the job!
  360. case "${JOB}" in
  361. check)
  362. Generate_Link
  363. Check_Files
  364. ;;
  365. fetch)
  366. Generate_Link
  367. Check_Files
  368. Fetch_Images
  369. ;;
  370. quickfetch)
  371. ISNEW=1
  372. ISQUICK=1
  373. Generate_Link
  374. Check_Files
  375. Fetch_Images
  376. ;;
  377. esac
  378. }
  379. # call the main routine!
  380. main "$@"