PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/verify_deep.rb

https://bitbucket.org/torresj/hashar-deep
Ruby | 297 lines | 186 code | 50 blank | 61 comment | 16 complexity | d221575e56003b92c72c671fd77bd867 MD5 | raw file
  1. #
  2. # verify_deep.rb
  3. # https://bitbucket.org/torresj/hashar-deep
  4. # Licensed under the terms of the MIT License, as specified below.
  5. #
  6. # Copyright (c) 2012 Jeremy Torres, https://bitbucket.org/torresj/hashar-deep
  7. #
  8. # Permission is hereby granted, free of charge, to any person obtaining
  9. # a copy of this software and associated documentation files (the
  10. # "Software"), to deal in the Software without restriction, including
  11. # without limitation the rights to use, copy, modify, merge, publish,
  12. # distribute, sublicense, and/or sell copies of the Software, and to
  13. # permit persons to whom the Software is furnished to do so, subject to
  14. # the following conditions:
  15. #
  16. # The above copyright notice and this permission notice shall be
  17. # included in all copies or substantial portions of the Software.
  18. #
  19. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  23. # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  24. # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  25. # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26. #
  27. require 'logger'
  28. require 'active_record'
  29. require 'benchmark'
  30. require 'colorize'
  31. $debug = false
  32. # Utility module for printing debug, info, and error messages.
  33. module Printer
  34. # Displays a progress bar to STDOUT. A percent-complete and '==' are
  35. # output.
  36. #
  37. # @param [Integer] cnt The current count being processed.
  38. # @param [Integer] total_cnt The total count to be processed.
  39. # @param [Boolean] is_last Indicates cnt is the last item to be processed.
  40. # @return [void]
  41. # width of the progress meter
  42. TOTAL_DOTS = 40
  43. def self.progress_bar(cnt, total_cnt, is_last = false)
  44. do_progress_bar(cnt, total_cnt, is_last)
  45. end
  46. def self.is_string?(msg)
  47. msg.kind_of?(String)
  48. end
  49. def self.info(msg)
  50. puts msg.to_s.colorize :light_blue
  51. end
  52. def self.debug(msg)
  53. puts msg.to_s.colorize :yellow if $debug
  54. end
  55. def self.err(msg)
  56. puts msg.to_s.colorize :red
  57. end
  58. def self.success(msg)
  59. puts msg.to_s.colorize :green
  60. end
  61. private
  62. def self.do_progress_bar(cnt, total_cnt, is_last)
  63. fraction_downloaded = cnt.to_f / total_cnt.to_f
  64. # part of the progress meter that's already "full"
  65. dots = (fraction_downloaded.to_f * TOTAL_DOTS.to_f).round
  66. # create the "meter"
  67. printf("%3.0f%%".colorize(:light_green) +
  68. " (%3.0f/%3.0f) ".colorize(:green),
  69. fraction_downloaded * 100, cnt.to_f, total_cnt.to_f)
  70. printf "\t[".colorize :yellow
  71. # part that's full already
  72. dots.times do
  73. printf "=".colorize :yellow
  74. end
  75. # remaining part (spaces)
  76. (TOTAL_DOTS - dots).times do
  77. printf " "
  78. end
  79. if is_last
  80. # last ... ensure we move to the next line
  81. printf "]\n".colorize :yellow
  82. else
  83. # and back to line begin - do not forget the flush to avoid output
  84. # buffering problems!
  85. printf "]\r".colorize :yellow
  86. end
  87. STDOUT.flush
  88. end
  89. end
  90. module VerifyDeep
  91. include Printer
  92. ALGORITHMS = { :sha1 => '/usr/local/bin/sha1deep',
  93. :sha256 => '/usr/local/bin/sha256deep' }
  94. def self.algos
  95. ALGORITHMS
  96. end
  97. def self.hash_dir(directory, glob, algo, show_progress=true)
  98. Printer.info "Directory:\t#{directory}\nGlob Patterns:\t#{glob}\n" \
  99. "Show Progress:\t#{show_progress}"
  100. @show_progress = show_progress
  101. # verify directory is valid
  102. unless File.directory? directory
  103. raise "ERROR: '#{directory}' does not exist!"
  104. end
  105. # verify valid glob pattern
  106. if glob.is_a?(String) && glob.empty?
  107. raise "ERROR: Glob is not valid!"
  108. end
  109. file_patterns = []
  110. glob.split(',').each do |pattern|
  111. file_patterns << pattern
  112. end
  113. Printer.info "File Patterns:\t#{file_patterns}"
  114. if algo.eql? :sha1
  115. @verify_prog = ALGORITHMS[:sha1]
  116. elsif algo.eql? :sha256
  117. @verify_prog = ALGORITHMS[:sha256]
  118. else
  119. raise "ERROR: invalid algorithm: #{algo}"
  120. end
  121. find_cmd = "find '#{directory}' -type f \\( -name "
  122. file_patterns.each_index do |i|
  123. find_cmd << "'#{file_patterns.at(i)}'"
  124. find_cmd << " -o -name " unless i + 1 == file_patterns.length
  125. end
  126. find_cmd += " \\)"
  127. # determine number of files to be processing
  128. num_files = file_cnt(find_cmd)
  129. Printer.info "Processing #{num_files} files"
  130. # build find command
  131. # -k delimits hash and file name with '*'
  132. cmd = "#{find_cmd} -exec '#@verify_prog' -k -- '{}' +"
  133. Printer.debug "Executing CMD:\n#{cmd}\n"
  134. file_hash = { }
  135. cnt = 0
  136. IO.popen(cmd) do |lines|
  137. lines.each do |line|
  138. hash, file_name = line.split("*")
  139. #Printer.debug "[ #{hash} => #{file_name} ]"
  140. file_hash[hash.chop] = file_name.chop
  141. if @show_progress
  142. # update progress meter
  143. cnt += 1
  144. Printer.progress_bar(cnt, num_files, (true if cnt == num_files))
  145. end
  146. end
  147. end
  148. file_hash
  149. end
  150. private
  151. def self.file_cnt(find_cmd)
  152. cnt_cmd = "#{find_cmd} | wc -l"
  153. Printer.debug "Count Cmd: #{cnt_cmd}"
  154. `#{cnt_cmd}`.to_i
  155. end
  156. end
  157. ### Active Record Class ###
  158. class HasharFile < ActiveRecord::Base
  159. end
  160. ### Performs hash generation/verification of files ###
  161. class Hashar
  162. include VerifyDeep
  163. include Printer
  164. DB_CONFIG_FILE = 'db/database.yml'
  165. DB_LOG_FILE = 'db/database.log'
  166. def initialize(algo=:sha256)
  167. @algo = algo
  168. @hash_map = { }
  169. Printer.debug "ENV=#{ENV['ENV']}. Hashar using hash algorithm #@algo"
  170. # config active record database setup, etc
  171. @dbconf = YAML::load(File.open(DB_CONFIG_FILE))
  172. @dbconn = ActiveRecord::Base.establish_connection(
  173. @dbconf[ENV['ENV'] ? ENV['ENV'] : 'development'])
  174. if $debug
  175. ActiveRecord::Base.logger = Logger.new(File.open(DB_LOG_FILE, 'a'))
  176. # what console colors are available?
  177. Printer.debug "Colorize String Colors:\n#{String.colors}"
  178. end
  179. end
  180. def hash_dir(directory, glob, show_progress=false)
  181. @hash_map = VerifyDeep.hash_dir(directory, glob, @algo, show_progress)
  182. do_process
  183. end
  184. # begin private methods
  185. private
  186. def do_process
  187. @hash_map.each do |file_hash, file_name|
  188. #Printer.debug("Processing file: #{file_name}")
  189. existing_file_info = HasharFile.find_by_file_name(file_name)
  190. # check if file name exists
  191. if existing_file_info.nil?
  192. # check if the hash exists in db
  193. existing_file_hash = HasharFile.find_by_file_hash(file_hash)
  194. unless existing_file_hash.nil?
  195. # db contains file with same hash, different name!
  196. Printer.err "ERROR: File '#{file_name} hash matches " \
  197. "#{existing_file_hash.file_name}"
  198. next
  199. end
  200. # create entry in db
  201. HasharFile.create(:file_name => file_name,
  202. :file_hash => file_hash,
  203. :processed_at => Time.now)
  204. else
  205. # file exists in db...verify hashes
  206. if file_hash == existing_file_info.file_hash
  207. Printer.success "'#{file_name}' *OK*"
  208. # update the processed timestamp
  209. begin
  210. existing_file_info.transaction do
  211. existing_file_info.update_attribute(:processed_at, Time.now)
  212. existing_file_info.save!
  213. end
  214. rescue ActiveRecord::RecordInvalid => invalid
  215. Printer.err "!! ERROR: Unable to update processed timestamp for " \
  216. "#{file_name}:\n#{invalid.record.errors}"
  217. end
  218. else
  219. Printer.err "!! ERROR: File '#{file_name}' has different " \
  220. "hash!".colorize :red
  221. end
  222. end
  223. end
  224. end
  225. def self.main
  226. raise "Usage: #{$0} 'directory' 'glob_str' algorithm=sha1|sha256 " \
  227. "[show_progress=true|false]" if ARGV.length < 3
  228. puts "Hashar:\tbegin".colorize :light_magenta
  229. Printer.debug "Available Algorithms: #{VerifyDeep.algos}"
  230. # get command line args
  231. dir, glob_str, algo, show_progress = ARGV[0], ARGV[1], ARGV[2], ARGV[3]
  232. hashar = Hashar.new(algo.to_sym)
  233. bm = Benchmark.measure do
  234. hashar.hash_dir(dir.to_s, glob_str.to_s, show_progress == "true")
  235. puts "Hashar:\tdone".colorize :light_magenta
  236. end
  237. bm_data = bm.to_a
  238. printf "Completed in %.3f seconds".colorize(:light_magenta),
  239. bm_data[5].to_f
  240. printf " (%.3f minutes)\n".colorize(:light_magenta),
  241. bm_data[5].to_f / 60.to_f
  242. end
  243. end
  244. # invoke 'main' if a standalone program
  245. if __FILE__ == $0
  246. Hashar.main
  247. end