/verify_deep.rb
Ruby | 297 lines | 186 code | 50 blank | 61 comment | 16 complexity | d221575e56003b92c72c671fd77bd867 MD5 | raw file
- #
- # verify_deep.rb
- # https://bitbucket.org/torresj/hashar-deep
- # Licensed under the terms of the MIT License, as specified below.
- #
- # Copyright (c) 2012 Jeremy Torres, https://bitbucket.org/torresj/hashar-deep
- #
- # Permission is hereby granted, free of charge, to any person obtaining
- # a copy of this software and associated documentation files (the
- # "Software"), to deal in the Software without restriction, including
- # without limitation the rights to use, copy, modify, merge, publish,
- # distribute, sublicense, and/or sell copies of the Software, and to
- # permit persons to whom the Software is furnished to do so, subject to
- # the following conditions:
- #
- # The above copyright notice and this permission notice shall be
- # included in all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #
- require 'logger'
- require 'active_record'
- require 'benchmark'
- require 'colorize'
- $debug = false
- # Utility module for printing debug, info, and error messages.
- module Printer
- # Displays a progress bar to STDOUT. A percent-complete and '==' are
- # output.
- #
- # @param [Integer] cnt The current count being processed.
- # @param [Integer] total_cnt The total count to be processed.
- # @param [Boolean] is_last Indicates cnt is the last item to be processed.
- # @return [void]
- # width of the progress meter
- TOTAL_DOTS = 40
- def self.progress_bar(cnt, total_cnt, is_last = false)
- do_progress_bar(cnt, total_cnt, is_last)
- end
- def self.is_string?(msg)
- msg.kind_of?(String)
- end
- def self.info(msg)
- puts msg.to_s.colorize :light_blue
- end
- def self.debug(msg)
- puts msg.to_s.colorize :yellow if $debug
- end
- def self.err(msg)
- puts msg.to_s.colorize :red
- end
- def self.success(msg)
- puts msg.to_s.colorize :green
- end
- private
- def self.do_progress_bar(cnt, total_cnt, is_last)
- fraction_downloaded = cnt.to_f / total_cnt.to_f
- # part of the progress meter that's already "full"
- dots = (fraction_downloaded.to_f * TOTAL_DOTS.to_f).round
- # create the "meter"
- printf("%3.0f%%".colorize(:light_green) +
- " (%3.0f/%3.0f) ".colorize(:green),
- fraction_downloaded * 100, cnt.to_f, total_cnt.to_f)
- printf "\t[".colorize :yellow
- # part that's full already
- dots.times do
- printf "=".colorize :yellow
- end
- # remaining part (spaces)
- (TOTAL_DOTS - dots).times do
- printf " "
- end
- if is_last
- # last ... ensure we move to the next line
- printf "]\n".colorize :yellow
- else
- # and back to line begin - do not forget the flush to avoid output
- # buffering problems!
- printf "]\r".colorize :yellow
- end
- STDOUT.flush
- end
- end
- module VerifyDeep
- include Printer
- ALGORITHMS = { :sha1 => '/usr/local/bin/sha1deep',
- :sha256 => '/usr/local/bin/sha256deep' }
- def self.algos
- ALGORITHMS
- end
- def self.hash_dir(directory, glob, algo, show_progress=true)
- Printer.info "Directory:\t#{directory}\nGlob Patterns:\t#{glob}\n" \
- "Show Progress:\t#{show_progress}"
- @show_progress = show_progress
- # verify directory is valid
- unless File.directory? directory
- raise "ERROR: '#{directory}' does not exist!"
- end
- # verify valid glob pattern
- if glob.is_a?(String) && glob.empty?
- raise "ERROR: Glob is not valid!"
- end
- file_patterns = []
- glob.split(',').each do |pattern|
- file_patterns << pattern
- end
- Printer.info "File Patterns:\t#{file_patterns}"
- if algo.eql? :sha1
- @verify_prog = ALGORITHMS[:sha1]
- elsif algo.eql? :sha256
- @verify_prog = ALGORITHMS[:sha256]
- else
- raise "ERROR: invalid algorithm: #{algo}"
- end
- find_cmd = "find '#{directory}' -type f \\( -name "
- file_patterns.each_index do |i|
- find_cmd << "'#{file_patterns.at(i)}'"
- find_cmd << " -o -name " unless i + 1 == file_patterns.length
- end
- find_cmd += " \\)"
- # determine number of files to be processing
- num_files = file_cnt(find_cmd)
- Printer.info "Processing #{num_files} files"
- # build find command
- # -k delimits hash and file name with '*'
- cmd = "#{find_cmd} -exec '#@verify_prog' -k -- '{}' +"
- Printer.debug "Executing CMD:\n#{cmd}\n"
- file_hash = { }
- cnt = 0
- IO.popen(cmd) do |lines|
- lines.each do |line|
- hash, file_name = line.split("*")
- #Printer.debug "[ #{hash} => #{file_name} ]"
- file_hash[hash.chop] = file_name.chop
- if @show_progress
- # update progress meter
- cnt += 1
- Printer.progress_bar(cnt, num_files, (true if cnt == num_files))
- end
- end
- end
- file_hash
- end
- private
- def self.file_cnt(find_cmd)
- cnt_cmd = "#{find_cmd} | wc -l"
- Printer.debug "Count Cmd: #{cnt_cmd}"
- `#{cnt_cmd}`.to_i
- end
- end
- ### Active Record Class ###
- class HasharFile < ActiveRecord::Base
- end
- ### Performs hash generation/verification of files ###
- class Hashar
- include VerifyDeep
- include Printer
- DB_CONFIG_FILE = 'db/database.yml'
- DB_LOG_FILE = 'db/database.log'
- def initialize(algo=:sha256)
- @algo = algo
- @hash_map = { }
- Printer.debug "ENV=#{ENV['ENV']}. Hashar using hash algorithm #@algo"
- # config active record database setup, etc
- @dbconf = YAML::load(File.open(DB_CONFIG_FILE))
- @dbconn = ActiveRecord::Base.establish_connection(
- @dbconf[ENV['ENV'] ? ENV['ENV'] : 'development'])
- if $debug
- ActiveRecord::Base.logger = Logger.new(File.open(DB_LOG_FILE, 'a'))
- # what console colors are available?
- Printer.debug "Colorize String Colors:\n#{String.colors}"
- end
- end
- def hash_dir(directory, glob, show_progress=false)
- @hash_map = VerifyDeep.hash_dir(directory, glob, @algo, show_progress)
- do_process
- end
- # begin private methods
- private
- def do_process
- @hash_map.each do |file_hash, file_name|
- #Printer.debug("Processing file: #{file_name}")
- existing_file_info = HasharFile.find_by_file_name(file_name)
- # check if file name exists
- if existing_file_info.nil?
- # check if the hash exists in db
- existing_file_hash = HasharFile.find_by_file_hash(file_hash)
- unless existing_file_hash.nil?
- # db contains file with same hash, different name!
- Printer.err "ERROR: File '#{file_name} hash matches " \
- "#{existing_file_hash.file_name}"
- next
- end
- # create entry in db
- HasharFile.create(:file_name => file_name,
- :file_hash => file_hash,
- :processed_at => Time.now)
- else
- # file exists in db...verify hashes
- if file_hash == existing_file_info.file_hash
- Printer.success "'#{file_name}' *OK*"
- # update the processed timestamp
- begin
- existing_file_info.transaction do
- existing_file_info.update_attribute(:processed_at, Time.now)
- existing_file_info.save!
- end
- rescue ActiveRecord::RecordInvalid => invalid
- Printer.err "!! ERROR: Unable to update processed timestamp for " \
- "#{file_name}:\n#{invalid.record.errors}"
- end
- else
- Printer.err "!! ERROR: File '#{file_name}' has different " \
- "hash!".colorize :red
- end
- end
- end
- end
- def self.main
- raise "Usage: #{$0} 'directory' 'glob_str' algorithm=sha1|sha256 " \
- "[show_progress=true|false]" if ARGV.length < 3
- puts "Hashar:\tbegin".colorize :light_magenta
- Printer.debug "Available Algorithms: #{VerifyDeep.algos}"
- # get command line args
- dir, glob_str, algo, show_progress = ARGV[0], ARGV[1], ARGV[2], ARGV[3]
- hashar = Hashar.new(algo.to_sym)
- bm = Benchmark.measure do
- hashar.hash_dir(dir.to_s, glob_str.to_s, show_progress == "true")
- puts "Hashar:\tdone".colorize :light_magenta
- end
- bm_data = bm.to_a
- printf "Completed in %.3f seconds".colorize(:light_magenta),
- bm_data[5].to_f
- printf " (%.3f minutes)\n".colorize(:light_magenta),
- bm_data[5].to_f / 60.to_f
- end
- end
- # invoke 'main' if a standalone program
- if __FILE__ == $0
- Hashar.main
- end