PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/maintenance/storage/testCompression.php

https://bitbucket.org/ghostfreeman/freeside-wiki
PHP | 101 lines | 69 code | 10 blank | 22 comment | 11 complexity | bc8f1dc6bbd37d445f831b92fdccae52 MD5 | raw file
Possible License(s): GPL-2.0, Apache-2.0, LGPL-3.0
  1. <?php
  2. /**
  3. * This program is free software; you can redistribute it and/or modify
  4. * it under the terms of the GNU General Public License as published by
  5. * the Free Software Foundation; either version 2 of the License, or
  6. * (at your option) any later version.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. * GNU General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public License along
  14. * with this program; if not, write to the Free Software Foundation, Inc.,
  15. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16. * http://www.gnu.org/copyleft/gpl.html
  17. *
  18. * @file
  19. * @ingroup Maintenance
  20. * @see wfWaitForSlaves()
  21. */
  22. $optionsWithArgs = array( 'start', 'limit', 'type' );
  23. require( __DIR__ . '/../commandLine.inc' );
  24. if ( !isset( $args[0] ) ) {
  25. echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n";
  26. exit( 1 );
  27. }
  28. $title = Title::newFromText( $args[0] );
  29. if ( isset( $options['start'] ) ) {
  30. $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
  31. echo "Starting from " . $wgLang->timeanddate( $start ) . "\n";
  32. } else {
  33. $start = '19700101000000';
  34. }
  35. if ( isset( $options['limit'] ) ) {
  36. $limit = $options['limit'];
  37. $untilHappy = false;
  38. } else {
  39. $limit = 1000;
  40. $untilHappy = true;
  41. }
  42. $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob';
  43. $dbr = wfGetDB( DB_SLAVE );
  44. $res = $dbr->select(
  45. array( 'page', 'revision', 'text' ),
  46. '*',
  47. array(
  48. 'page_namespace' => $title->getNamespace(),
  49. 'page_title' => $title->getDBkey(),
  50. 'page_id=rev_page',
  51. 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
  52. 'rev_text_id=old_id'
  53. ), __FILE__, array( 'LIMIT' => $limit )
  54. );
  55. $blob = new $type;
  56. $hashes = array();
  57. $keys = array();
  58. $uncompressedSize = 0;
  59. $t = -microtime( true );
  60. foreach ( $res as $row ) {
  61. $revision = new Revision( $row );
  62. $text = $revision->getText();
  63. $uncompressedSize += strlen( $text );
  64. $hashes[$row->rev_id] = md5( $text );
  65. $keys[$row->rev_id] = $blob->addItem( $text );
  66. if ( $untilHappy && !$blob->isHappy() ) {
  67. break;
  68. }
  69. }
  70. $serialized = serialize( $blob );
  71. $t += microtime( true );
  72. # print_r( $blob->mDiffMap );
  73. printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
  74. $type,
  75. count( $hashes ),
  76. $uncompressedSize / strlen( $serialized ),
  77. $wgLang->formatSize( $uncompressedSize ),
  78. strlen( $serialized )
  79. );
  80. printf( "Compression time: %5.2f ms\n", $t * 1000 );
  81. $t = -microtime( true );
  82. $blob = unserialize( $serialized );
  83. foreach ( $keys as $id => $key ) {
  84. $text = $blob->getItem( $key );
  85. if ( md5( $text ) != $hashes[$id] ) {
  86. echo "Content hash mismatch for rev_id $id\n";
  87. # var_dump( $text );
  88. }
  89. }
  90. $t += microtime( true );
  91. printf( "Decompression time: %5.2f ms\n", $t * 1000 );