/framework/Support/lib/Horde/Support/Numerizer/Locale/Base.php
https://github.com/finger2000/horde · PHP · 164 lines · 120 code · 14 blank · 30 comment · 4 complexity · 4818721a38e7750dba3c987995f83996 MD5 · raw file
- <?php
- /**
- * Copyright 2010-2011 Horde LLC (http://www.horde.org/)
- *
- * @author Chuck Hagenbuch <chuck@horde.org>
- * @license http://www.horde.org/licenses/bsd BSD
- * @category Horde
- * @package Support
- */
- /**
- * @author Chuck Hagenbuch <chuck@horde.org>
- * @license http://www.horde.org/licenses/bsd BSD
- * @category Horde
- * @package Support
- */
- class Horde_Support_Numerizer_Locale_Base
- {
- public $DIRECT_NUMS = array(
- 'eleven' => '11',
- 'twelve' => '12',
- 'thirteen' => '13',
- 'fourteen' => '14',
- 'fifteen' => '15',
- 'sixteen' => '16',
- 'seventeen' => '17',
- 'eighteen' => '18',
- 'nineteen' => '19',
- 'ninteen' => '19', // Common mis-spelling
- 'zero' => '0',
- 'one' => '1',
- 'two' => '2',
- 'three' => '3',
- 'four(\W|$)' => '4$1', // The weird regex is so that it matches four but not fourty
- 'five' => '5',
- 'six(\W|$)' => '6$1',
- 'seven(\W|$)' => '7$1',
- 'eight(\W|$)' => '8$1',
- 'nine(\W|$)' => '9$1',
- 'ten' => '10',
- '\ba[\b^$]' => '1', // doesn't make sense for an 'a' at the end to be a 1
- );
- public $TEN_PREFIXES = array(
- 'twenty' => 20,
- 'thirty' => 30,
- 'fourty' => 40,
- 'fifty' => 50,
- 'sixty' => 60,
- 'seventy' => 70,
- 'eighty' => 80,
- 'ninety' => 90,
- );
- public $BIG_PREFIXES = array(
- 'hundred' => 100,
- 'thousand' => 1000,
- 'million' => 1000000,
- 'billion' => 1000000000,
- 'trillion' => 1000000000000,
- );
- public function numerize($string)
- {
- // preprocess
- $string = $this->_splitHyphenatedWords($string);
- $string = $this->_hideAHalf($string);
- $string = $this->_directReplacements($string);
- $string = $this->_replaceTenPrefixes($string);
- $string = $this->_replaceBigPrefixes($string);
- $string = $this->_fractionalAddition($string);
- return $string;
- }
- /**
- * will mutilate hyphenated-words but shouldn't matter for date extraction
- */
- protected function _splitHyphenatedWords($string)
- {
- return preg_replace('/ +|([^\d])-([^d])/', '$1 $2', $string);
- }
- /**
- * take the 'a' out so it doesn't turn into a 1, save the half for the end
- */
- protected function _hideAHalf($string)
- {
- return str_replace('a half', 'haAlf', $string);
- }
- /**
- * easy/direct replacements
- */
- protected function _directReplacements($string)
- {
- foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) {
- $string = preg_replace("/$dn/i", $dn_replacement, $string);
- }
- return $string;
- }
- /**
- * ten, twenty, etc.
- */
- protected function _replaceTenPrefixes($string)
- {
- foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
- $string = preg_replace_callback(
- "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
- create_function(
- '$m',
- 'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
- ),
- $string);
- }
- return $string;
- }
- /**
- * hundreds, thousands, millions, etc.
- */
- protected function _replaceBigPrefixes($string)
- {
- foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
- $string = preg_replace_callback(
- '/(\d*) *' . $bp . '/i',
- create_function(
- '$m',
- 'return ' . $bp_replacement . ' * (int)$m[1];'
- ),
- $string);
- $string = $this->_andition($string);
- }
- return $string;
- }
- protected function _andition($string)
- {
- while (true) {
- if (preg_match('/(\d+)( | and )(\d+)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
- if (preg_match('/and/', $sc[2][0]) || (strlen($sc[1][0]) > strlen($sc[3][0]))) {
- $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
- continue;
- }
- }
- break;
- }
- return $string;
- }
- protected function _fractionalAddition($string)
- {
- return preg_replace_callback(
- '/(\d+)(?: | and |-)*haAlf/i',
- create_function(
- '$m',
- 'return (string)((float)$m[1] + 0.5);'
- ),
- $string);
- }
- }