PageRenderTime 51ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/finddup_suite/dupselect.php

http://rtoss.googlecode.com/
PHP | 164 lines | 133 code | 22 blank | 9 comment | 33 complexity | be888b47a8bceb4de7cfb27a7ee1cbb6 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, GPL-3.0, LGPL-3.0, GPL-2.0
  1. <?php
  2. // Settings
  3. $excludes_order = array('detail');
  4. $includes_order = array('/\d{8}(_p\d+)?','/waren','/kaberen','/moeren','/kabeura','/moeura');
  5. $deselects = array('this-one-needs-duplicate');
  6. $normal_depth = 2;
  7. if(!isset($_GET['file'])) select_file();
  8. elseif(isset($_POST['generate'])) generate_new_list();
  9. elseif(isset($_GET['calctotal'])) calc_total();
  10. else dupselect_ui();
  11. function proper_size($size,$maxcnt=0) {
  12. $suffix=''; $suxAry=array("KB","MB","GB","TB");
  13. $ccnt=$maxcnt?$maxcnt:count($suxAry);
  14. for($cnt=0;$cnt<$ccnt;$cnt++)
  15. if($size > 1024) {$size/=1024; $suffix=$suxAry[$cnt];}
  16. return $suffix?sprintf("%.1f",$size).$suffix:$size.'B';
  17. }
  18. function countchar($str,$chr) {
  19. return (strlen($str) - strlen(str_replace($chr,'',$str))) / strlen($chr);
  20. }
  21. function mysort($a, $b) { // row(md5,size,path) $a,$b
  22. global $excludes_order,$includes_order,$normal_depth;
  23. // basic sort first
  24. if($a['size'] != $b['size']) return $a['size'] > $b['size'] ? -1 : 1;
  25. if($a['md5'] != $b['md5']) return strnatcmp($a['md5'], $b['md5']);
  26. // $a_pathdepth = countchar($a['path'],'/');
  27. // $b_pathdepth = countchar($b['path'],'/');
  28. // deeper path first
  29. if(($a['depth'] + $b['depth']) > $normal_depth*2) {
  30. if($a['depth'] == $b['depth']) return strnatcmp($a['path'], $b['path']);
  31. return $a['depth'] > $b['depth'] ? -1 : 1;
  32. }
  33. // exclude
  34. foreach($excludes_order as $excl) {
  35. $a_exclude = !!strstr($a['path'],$excl);
  36. $b_exclude = !!strstr($b['path'],$excl);
  37. if($a_exclude + $b_exclude) {
  38. if($a_exclude == $b_exclude) return strnatcmp(basename($a['path']), basename($b['path']));
  39. else return $a_exclude ? 1 : -1;
  40. }
  41. }
  42. // include
  43. foreach($includes_order as $incl) {
  44. $a_include = preg_match('|'.$incl.'|i',$a['path']);
  45. $b_include = preg_match('|'.$incl.'|i',$b['path']);
  46. if($a_include + $b_include) {
  47. if($a_include == $b_include) return strnatcmp(basename($a['path']), basename($b['path']));
  48. else return $a_include ? -1 : 1;
  49. }
  50. }
  51. // last: strnatcmp
  52. return strnatcmp($a['path'], $b['path']);
  53. }
  54. function read_and_sort($file, &$ary,$names_only=false) {
  55. $ary = array('files'=>array(),'names'=>array());
  56. $f = file(basename($file));
  57. foreach($f as $line) {
  58. $file = array();
  59. list($file['md5'],$file['size'],$file['path']) = explode("\t",rtrim($line));
  60. if(!$names_only) {
  61. $file['depth'] = countchar($file['path'],'/');
  62. $ary['files'][] = $file;
  63. }
  64. $ary['names'][$file['path']] = array('md5'=>$file['md5'],'size'=>$file['size']);
  65. }
  66. if(!$names_only) usort($ary['files'],'mysort');
  67. }
  68. function hdr() {
  69. // header('Content-Type: text/html; charset=big5');
  70. echo '<html>
  71. <head>
  72. <meta http-equiv="content-type" content="text/html; charset=big5"/>
  73. <title>Dup Selector</title>
  74. </head>
  75. <body>';
  76. }
  77. function select_file() {
  78. $fs = glob('*.txt');
  79. hdr();
  80. foreach ($fs as $f)
  81. echo $f.' <a href="'.$_SERVER['PHP_SELF'].'?file='.$f.'">DupSelect</a> <a href="'.$_SERVER['PHP_SELF'].'?file='.$f.'&calctotal=1">CalcTotal</a><br/>';
  82. echo '</body></html>';
  83. }
  84. function generate_new_list() {
  85. $my_ary = array();
  86. $fp = fopen(substr(basename($_GET['file']),0,-4).'-delete.lst','wb+');
  87. fwrite($fp,implode("\n",$_POST['files']));
  88. fclose($fp);
  89. read_and_sort($_GET['file'],$my_ary,true);
  90. $fp = fopen(substr(basename($_GET['file']),0,-4).'-delete.txt','wb+');
  91. foreach($_POST['files'] as $f)
  92. fwrite($fp,$my_ary['names'][$f]['md5']."\t".$my_ary['names'][$f]['size']."\t$f\n");
  93. fclose($fp);
  94. hdr();
  95. echo '<pre>';
  96. print_r($_POST['files']);
  97. echo '</pre></body></html>';
  98. }
  99. function calc_total() {
  100. $my_ary = array();
  101. read_and_sort($_GET['file'],$my_ary,true);
  102. $fsize = 0;
  103. hdr();
  104. echo '<pre>';
  105. echo " Size MD5 Filename\n";
  106. echo "--------------------------------------------------------------\n";
  107. foreach($my_ary['names'] as $f=>$g) {
  108. $fsize += $g['size'];
  109. echo str_pad(number_format($g['size'],0,'.',','),15,' ',STR_PAD_LEFT).' '.$g['md5']." $f\n";
  110. }
  111. echo 'Total: '.proper_size($fsize).' ('.number_format($fsize,0,'.',',').')<br/>';
  112. echo '</pre></body></html>';
  113. }
  114. function dupselect_ui() {
  115. global $deselects,$normal_depth;
  116. $my_ary = array();
  117. read_and_sort($_GET['file'],$my_ary);
  118. $perv_md5='';$perv_depth=0; $twostar=false; $deselect=false;
  119. hdr();
  120. echo '<form action="'.$_SERVER['PHP_SELF'].'?file='.$_GET['file'].'" method="post">'."\n";
  121. echo '<input type="submit" name="generate" value="generate"/>'."\n";
  122. foreach($my_ary['files'] as $f) {
  123. if($perv_md5 != $f['md5']) {
  124. $twostar=false;
  125. $deselect=true;
  126. $perv_depth=0;
  127. echo "<hr/>\n";
  128. } else {
  129. $deselect=false;
  130. }
  131. foreach($deselects as $d) {
  132. if(strstr($f['path'],$d)) {
  133. $deselect=true;
  134. break;
  135. }
  136. }
  137. echo '<label><input type="checkbox" name="files[]" value="'.$f['path'].'"'.(!$deselect ? ' checked=checked':'').'/>'.$f['path'].' ('.$f['md5'].($perv_md5 != $f['md5'] && $f['depth']>$normal_depth?') <b>(*)</b>':($perv_md5 == $f['md5'] && $twostar==false && $perv_depth>$normal_depth && $f['depth']>$normal_depth?') <b>(**)</b>':')'))."</label><br/>\n";
  138. if($perv_depth>$normal_depth && $f['depth']>$normal_depth) $twostar=true;
  139. $perv_md5 = $f['md5']; $perv_depth = $f['depth'];
  140. }
  141. echo '</form></body></html>';
  142. }