PageRenderTime 48ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/classes/fGrammar.php

https://bitbucket.org/wbond/flourish/
PHP | 838 lines | 498 code | 116 blank | 224 comment | 77 complexity | bc620e15997f0bf756c0c641c89ce089 MD5 | raw file
  1. <?php
  2. /**
  3. * Provides english word inflection, notation conversion, grammar helpers and internationlization support
  4. *
  5. * @copyright Copyright (c) 2007-2011 Will Bond
  6. * @author Will Bond [wb] <will@flourishlib.com>
  7. * @license http://flourishlib.com/license
  8. *
  9. * @package Flourish
  10. * @link http://flourishlib.com/fGrammar
  11. *
  12. * @version 1.0.0b15
  13. * @changes 1.0.0b15 Added length checking to ensure blank strings are not being passed to various methods [wb, 2011-06-20]
  14. * @changes 1.0.0b14 Fixed a bug in singularization that would affect words containing the substring `mice` or `lice` [wb, 2011-02-24]
  15. * @changes 1.0.0b13 Fixed the pluralization of video [wb, 2010-08-10]
  16. * @changes 1.0.0b12 Updated ::singularize() and ::pluralize() to be able to handle underscore_CamelCase [wb, 2010-08-06]
  17. * @changes 1.0.0b11 Fixed custom camelCase to underscore_notation rules [wb, 2010-06-23]
  18. * @changes 1.0.0b10 Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
  19. * @changes 1.0.0b9 Fixed a bug with ::camelize() and human-friendly strings [wb, 2010-06-08]
  20. * @changes 1.0.0b8 Added the ::stem() method [wb, 2010-05-27]
  21. * @changes 1.0.0b7 Added the `$return_error` parameter to ::pluralize() and ::singularize() [wb, 2010-03-30]
  22. * @changes 1.0.0b6 Added missing ::compose() method [wb, 2010-03-03]
  23. * @changes 1.0.0b5 Fixed ::reset() to properly reset the singularization and pluralization rules [wb, 2009-10-28]
  24. * @changes 1.0.0b4 Added caching for various methods - provided significant performance boost to ORM [wb, 2009-06-15]
  25. * @changes 1.0.0b3 Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
  26. * @changes 1.0.0b2 Fixed a bug where some words would lose capitalization with ::pluralize() and ::singularize() [wb, 2009-01-25]
  27. * @changes 1.0.0b The initial implementation [wb, 2007-09-25]
  28. */
  29. class fGrammar
  30. {
  31. // The following constants allow for nice looking callbacks to static methods
  32. const addCamelUnderscoreRule = 'fGrammar::addCamelUnderscoreRule';
  33. const addHumanizeRule = 'fGrammar::addHumanizeRule';
  34. const addSingularPluralRule = 'fGrammar::addSingularPluralRule';
  35. const camelize = 'fGrammar::camelize';
  36. const humanize = 'fGrammar::humanize';
  37. const inflectOnQuantity = 'fGrammar::inflectOnQuantity';
  38. const joinArray = 'fGrammar::joinArray';
  39. const pluralize = 'fGrammar::pluralize';
  40. const registerJoinArrayCallback = 'fGrammar::registerJoinArrayCallback';
  41. const reset = 'fGrammar::reset';
  42. const singularize = 'fGrammar::singularize';
  43. const stem = 'fGrammar::stem';
  44. const underscorize = 'fGrammar::underscorize';
  45. /**
  46. * Cache for plural <-> singular and underscore <-> camelcase
  47. *
  48. * @var array
  49. */
  50. static private $cache = array(
  51. 'camelize' => array(0 => array(), 1 => array()),
  52. 'humanize' => array(),
  53. 'pluralize' => array(),
  54. 'singularize' => array(),
  55. 'underscorize' => array()
  56. );
  57. /**
  58. * Custom rules for camelizing a string
  59. *
  60. * @var array
  61. */
  62. static private $camelize_rules = array();
  63. /**
  64. * Custom rules for humanizing a string
  65. *
  66. * @var array
  67. */
  68. static private $humanize_rules = array();
  69. /**
  70. * The callback to replace ::joinArray()
  71. *
  72. * @var callback
  73. */
  74. static private $join_array_callback = NULL;
  75. /**
  76. * Rules for plural to singular inflection of nouns
  77. *
  78. * @var array
  79. */
  80. static private $plural_to_singular_rules = array(
  81. '([ml])ice$' => '\1ouse',
  82. '(media|info(rmation)?|news)$' => '\1',
  83. '(q)uizzes$' => '\1uiz',
  84. '(c)hildren$' => '\1hild',
  85. '(p)eople$' => '\1erson',
  86. '(m)en$' => '\1an',
  87. '((?!sh).)oes$' => '\1o',
  88. '((?<!o)[ieu]s|[ieuo]x)es$' => '\1',
  89. '([cs]h)es$' => '\1',
  90. '(ss)es$' => '\1',
  91. '([aeo]l)ves$' => '\1f',
  92. '([^d]ea)ves$' => '\1f',
  93. '(ar)ves$' => '\1f',
  94. '([nlw]i)ves$' => '\1fe',
  95. '([aeiou]y)s$' => '\1',
  96. '([^aeiou])ies$' => '\1y',
  97. '(la)ses$' => '\1s',
  98. '(.)s$' => '\1'
  99. );
  100. /**
  101. * Rules for singular to plural inflection of nouns
  102. *
  103. * @var array
  104. */
  105. static private $singular_to_plural_rules = array(
  106. '([ml])ouse$' => '\1ice',
  107. '(media|info(rmation)?|news)$' => '\1',
  108. '(phot|log|vide)o$' => '\1os',
  109. '^(q)uiz$' => '\1uizzes',
  110. '(c)hild$' => '\1hildren',
  111. '(p)erson$' => '\1eople',
  112. '(m)an$' => '\1en',
  113. '([ieu]s|[ieuo]x)$' => '\1es',
  114. '([cs]h)$' => '\1es',
  115. '(ss)$' => '\1es',
  116. '([aeo]l)f$' => '\1ves',
  117. '([^d]ea)f$' => '\1ves',
  118. '(ar)f$' => '\1ves',
  119. '([nlw]i)fe$' => '\1ves',
  120. '([aeiou]y)$' => '\1s',
  121. '([^aeiou])y$' => '\1ies',
  122. '([^o])o$' => '\1oes',
  123. 's$' => 'ses',
  124. '(.)$' => '\1s'
  125. );
  126. /**
  127. * Custom rules for underscorizing a string
  128. *
  129. * @var array
  130. */
  131. static private $underscorize_rules = array();
  132. /**
  133. * Adds a custom mapping of a non-humanized string to a humanized string for ::humanize()
  134. *
  135. * @param string $non_humanized_string The non-humanized string
  136. * @param string $humanized_string The humanized string
  137. * @return void
  138. */
  139. static public function addHumanizeRule($non_humanized_string, $humanized_string)
  140. {
  141. self::$humanize_rules[$non_humanized_string] = $humanized_string;
  142. self::$cache['humanize'] = array();
  143. }
  144. /**
  145. * Adds a custom `camelCase` to `underscore_notation` and `underscore_notation` to `camelCase` rule
  146. *
  147. * @param string $camel_case The lower `camelCase` version of the string
  148. * @param string $underscore_notation The `underscore_notation` version of the string
  149. * @return void
  150. */
  151. static public function addCamelUnderscoreRule($camel_case, $underscore_notation)
  152. {
  153. $camel_case = strtolower($camel_case[0]) . substr($camel_case, 1);
  154. self::$underscorize_rules[$camel_case] = $underscore_notation;
  155. self::$camelize_rules[$underscore_notation] = $camel_case;
  156. self::$cache['camelize'] = array(0 => array(), 1 => array());
  157. self::$cache['underscorize'] = array();
  158. }
  159. /**
  160. * Adds a custom singular to plural and plural to singular rule for ::pluralize() and ::singularize()
  161. *
  162. * @param string $singular The singular version of the noun
  163. * @param string $plural The plural version of the noun
  164. * @return void
  165. */
  166. static public function addSingularPluralRule($singular, $plural)
  167. {
  168. self::$singular_to_plural_rules = array_merge(
  169. array(
  170. '^(' . preg_quote($singular[0], '#') . ')' . preg_quote(substr($singular, 1), '#') . '$' =>
  171. '\1' . strtr(substr($plural, 1), array('\\' => '\\\\', '$' => '\\$'))
  172. ),
  173. self::$singular_to_plural_rules
  174. );
  175. self::$plural_to_singular_rules = array_merge(
  176. array(
  177. '^(' . preg_quote($plural[0], '#') . ')' . preg_quote(substr($plural, 1), '#') . '$' =>
  178. '\1' . strtr(substr($singular, 1), array('\\' => '\\\\', '$' => '\\$'))
  179. ),
  180. self::$plural_to_singular_rules
  181. );
  182. self::$cache['pluralize'] = array();
  183. self::$cache['singularize'] = array();
  184. }
  185. /**
  186. * Converts an `underscore_notation`, human-friendly or `camelCase` string to `camelCase`
  187. *
  188. * @param string $string The string to convert
  189. * @param boolean $upper If the camel case should be `UpperCamelCase`
  190. * @return string The converted string
  191. */
  192. static public function camelize($string, $upper)
  193. {
  194. if (!strlen($string)) {
  195. throw new fProgrammerException(
  196. "An empty string was passed to %s",
  197. __CLASS__ . '::camelize()'
  198. );
  199. }
  200. $upper = (int)(bool) $upper;
  201. if (isset(self::$cache['camelize'][$upper][$string])) {
  202. return self::$cache['camelize'][$upper][$string];
  203. }
  204. $original = $string;
  205. // Handle custom rules
  206. if (isset(self::$camelize_rules[$string])) {
  207. $string = self::$camelize_rules[$string];
  208. if ($upper) {
  209. $string = ucfirst($string);
  210. }
  211. } else {
  212. // Make a humanized string like underscore notation
  213. if (strpos($string, ' ') !== FALSE) {
  214. $string = strtolower(preg_replace('#\s+#', '_', $string));
  215. }
  216. // Check to make sure this is not already camel case
  217. if (strpos($string, '_') === FALSE) {
  218. if ($upper) {
  219. $string = ucfirst($string);
  220. }
  221. // Handle underscore notation
  222. } else {
  223. $string[0] = strtolower($string[0]);
  224. if ($upper) {
  225. $string = ucfirst($string);
  226. }
  227. $string = preg_replace_callback('#_([a-z0-9])#i', array('self', 'camelizeCallback'), $string);
  228. }
  229. }
  230. self::$cache['camelize'][$upper][$original] = $string;
  231. return $string;
  232. }
  233. /**
  234. * A callback used by ::camelize() to handle converting underscore to camelCase
  235. *
  236. * @param array $match The regular expression match
  237. * @return string The value to replace the string with
  238. */
  239. static private function camelizeCallback($match)
  240. {
  241. return strtoupper($match[1]);
  242. }
  243. /**
  244. * Composes text using fText if loaded
  245. *
  246. * @param string $message The message to compose
  247. * @param mixed $component A string or number to insert into the message
  248. * @param mixed ...
  249. * @return string The composed and possible translated message
  250. */
  251. static protected function compose($message)
  252. {
  253. $args = array_slice(func_get_args(), 1);
  254. if (class_exists('fText', FALSE)) {
  255. return call_user_func_array(
  256. array('fText', 'compose'),
  257. array($message, $args)
  258. );
  259. } else {
  260. return vsprintf($message, $args);
  261. }
  262. }
  263. /**
  264. * Makes an `underscore_notation`, `camelCase`, or human-friendly string into a human-friendly string
  265. *
  266. * @param string $string The string to humanize
  267. * @return string The converted string
  268. */
  269. static public function humanize($string)
  270. {
  271. if (!strlen($string)) {
  272. throw new fProgrammerException(
  273. "An empty string was passed to %s",
  274. __CLASS__ . '::humanize()'
  275. );
  276. }
  277. if (isset(self::$cache['humanize'][$string])) {
  278. return self::$cache['humanize'][$string];
  279. }
  280. $original = $string;
  281. if (isset(self::$humanize_rules[$string])) {
  282. $string = self::$humanize_rules[$string];
  283. // If there is no space, it isn't already humanized
  284. } elseif (strpos($string, ' ') === FALSE) {
  285. // If we don't have an underscore we probably have camelCase
  286. if (strpos($string, '_') === FALSE) {
  287. $string = self::underscorize($string);
  288. }
  289. $string = preg_replace_callback(
  290. '/(\b(api|css|gif|html|id|jpg|js|mp3|pdf|php|png|sql|swf|url|xhtml|xml)\b|\b\w)/',
  291. array('self', 'camelizeCallback'),
  292. str_replace('_', ' ', $string)
  293. );
  294. }
  295. self::$cache['humanize'][$original] = $string;
  296. return $string;
  297. }
  298. /**
  299. * Returns the singular or plural form of the word or based on the quantity specified
  300. *
  301. * @param mixed $quantity The quantity (integer) or an array of objects to count
  302. * @param string $singular_form The string to be returned for when `$quantity = 1`
  303. * @param string $plural_form The string to be returned for when `$quantity != 1`, use `%d` to place the quantity in the string
  304. * @param boolean $use_words_for_single_digits If the numbers 0 to 9 should be written out as words
  305. * @return string
  306. */
  307. static public function inflectOnQuantity($quantity, $singular_form, $plural_form=NULL, $use_words_for_single_digits=FALSE)
  308. {
  309. if ($plural_form === NULL) {
  310. $plural_form = self::pluralize($singular_form);
  311. }
  312. if (is_array($quantity)) {
  313. $quantity = sizeof($quantity);
  314. }
  315. if ($quantity == 1) {
  316. return $singular_form;
  317. } else {
  318. $output = $plural_form;
  319. // Handle placement of the quantity into the output
  320. if (strpos($output, '%d') !== FALSE) {
  321. if ($use_words_for_single_digits && $quantity < 10) {
  322. static $replacements = array();
  323. if (!$replacements) {
  324. $replacements = array(
  325. 0 => self::compose('zero'),
  326. 1 => self::compose('one'),
  327. 2 => self::compose('two'),
  328. 3 => self::compose('three'),
  329. 4 => self::compose('four'),
  330. 5 => self::compose('five'),
  331. 6 => self::compose('six'),
  332. 7 => self::compose('seven'),
  333. 8 => self::compose('eight'),
  334. 9 => self::compose('nine')
  335. );
  336. }
  337. $quantity = $replacements[$quantity];
  338. }
  339. $output = str_replace('%d', $quantity, $output);
  340. }
  341. return $output;
  342. }
  343. }
  344. /**
  345. * Returns the passed terms joined together using rule 2 from Strunk & White's 'The Elements of Style'
  346. *
  347. * @param array $strings An array of strings to be joined together
  348. * @param string $type The type of join to perform, `'and'` or `'or'`
  349. * @return string The terms joined together
  350. */
  351. static public function joinArray($strings, $type)
  352. {
  353. $valid_types = array('and', 'or');
  354. if (!in_array($type, $valid_types)) {
  355. throw new fProgrammerException(
  356. 'The type specified, %1$s, is invalid. Must be one of: %2$s.',
  357. $type,
  358. join(', ', $valid_types)
  359. );
  360. }
  361. if (self::$join_array_callback) {
  362. return call_user_func(self::$join_array_callback, $strings, $type);
  363. }
  364. settype($strings, 'array');
  365. $strings = array_values($strings);
  366. switch (sizeof($strings)) {
  367. case 0:
  368. return '';
  369. break;
  370. case 1:
  371. return $strings[0];
  372. break;
  373. case 2:
  374. return $strings[0] . ' ' . $type . ' ' . $strings[1];
  375. break;
  376. default:
  377. $last_string = array_pop($strings);
  378. return join(', ', $strings) . ' ' . $type . ' ' . $last_string;
  379. break;
  380. }
  381. }
  382. /**
  383. * Returns the plural version of a singular noun
  384. *
  385. * @param string $singular_noun The singular noun to pluralize
  386. * @param boolean $return_error If this is `TRUE` and the noun can't be pluralized, `FALSE` will be returned instead
  387. * @return string The pluralized noun
  388. */
  389. static public function pluralize($singular_noun, $return_error=FALSE)
  390. {
  391. if (!strlen($singular_noun)) {
  392. throw new fProgrammerException(
  393. "An empty string was passed to %s",
  394. __CLASS__ . '::pluralize()'
  395. );
  396. }
  397. if (isset(self::$cache['pluralize'][$singular_noun])) {
  398. return self::$cache['pluralize'][$singular_noun];
  399. }
  400. $original = $singular_noun;
  401. $plural_noun = NULL;
  402. list ($beginning, $singular_noun) = self::splitLastWord($singular_noun);
  403. foreach (self::$singular_to_plural_rules as $from => $to) {
  404. if (preg_match('#' . $from . '#iD', $singular_noun)) {
  405. $plural_noun = $beginning . preg_replace('#' . $from . '#iD', $to, $singular_noun);
  406. break;
  407. }
  408. }
  409. if (!$plural_noun) {
  410. if ($return_error) {
  411. self::$cache['pluralize'][$singular_noun] = FALSE;
  412. return FALSE;
  413. }
  414. throw new fProgrammerException('The noun specified could not be pluralized');
  415. }
  416. self::$cache['pluralize'][$original] = $plural_noun;
  417. return $plural_noun;
  418. }
  419. /**
  420. * Allows replacing the ::joinArray() function with a user defined function
  421. *
  422. * This would be most useful for changing ::joinArray() to work with
  423. * languages other than English.
  424. *
  425. * @param callback $callback The function to replace ::joinArray() with - should accept the same parameters and return the same type
  426. * @return void
  427. */
  428. static public function registerJoinArrayCallback($callback)
  429. {
  430. if (is_string($callback) && strpos($callback, '::') !== FALSE) {
  431. $callback = explode('::', $callback);
  432. }
  433. self::$join_array_callback = $callback;
  434. }
  435. /**
  436. * Resets the configuration of the class
  437. *
  438. * @internal
  439. *
  440. * @return void
  441. */
  442. static public function reset()
  443. {
  444. self::$cache = array(
  445. 'camelize' => array(0 => array(), 1 => array()),
  446. 'humanize' => array(),
  447. 'pluralize' => array(),
  448. 'singularize' => array(),
  449. 'underscorize' => array()
  450. );
  451. self::$camelize_rules = array();
  452. self::$humanize_rules = array();
  453. self::$join_array_callback = NULL;
  454. self::$plural_to_singular_rules = array(
  455. '([ml])ice$' => '\1ouse',
  456. '(media|info(rmation)?|news)$' => '\1',
  457. '(q)uizzes$' => '\1uiz',
  458. '(c)hildren$' => '\1hild',
  459. '(p)eople$' => '\1erson',
  460. '(m)en$' => '\1an',
  461. '((?!sh).)oes$' => '\1o',
  462. '((?<!o)[ieu]s|[ieuo]x)es$' => '\1',
  463. '([cs]h)es$' => '\1',
  464. '(ss)es$' => '\1',
  465. '([aeo]l)ves$' => '\1f',
  466. '([^d]ea)ves$' => '\1f',
  467. '(ar)ves$' => '\1f',
  468. '([nlw]i)ves$' => '\1fe',
  469. '([aeiou]y)s$' => '\1',
  470. '([^aeiou])ies$' => '\1y',
  471. '(la)ses$' => '\1s',
  472. '(.)s$' => '\1'
  473. );
  474. self::$singular_to_plural_rules = array(
  475. '([ml])ouse$' => '\1ice',
  476. '(media|info(rmation)?|news)$' => '\1',
  477. '(phot|log|vide)o$' => '\1os',
  478. '^(q)uiz$' => '\1uizzes',
  479. '(c)hild$' => '\1hildren',
  480. '(p)erson$' => '\1eople',
  481. '(m)an$' => '\1en',
  482. '([ieu]s|[ieuo]x)$' => '\1es',
  483. '([cs]h)$' => '\1es',
  484. '(ss)$' => '\1es',
  485. '([aeo]l)f$' => '\1ves',
  486. '([^d]ea)f$' => '\1ves',
  487. '(ar)f$' => '\1ves',
  488. '([nlw]i)fe$' => '\1ves',
  489. '([aeiou]y)$' => '\1s',
  490. '([^aeiou])y$' => '\1ies',
  491. '([^o])o$' => '\1oes',
  492. 's$' => 'ses',
  493. '(.)$' => '\1s'
  494. );
  495. }
  496. /**
  497. * Returns the singular version of a plural noun
  498. *
  499. * @param string $plural_noun The plural noun to singularize
  500. * @param boolean $return_error If this is `TRUE` and the noun can't be pluralized, `FALSE` will be returned instead
  501. * @return string The singularized noun
  502. */
  503. static public function singularize($plural_noun, $return_error=FALSE)
  504. {
  505. if (!strlen($plural_noun)) {
  506. throw new fProgrammerException(
  507. "An empty string was passed to %s",
  508. __CLASS__ . '::singularize()'
  509. );
  510. }
  511. if (isset(self::$cache['singularize'][$plural_noun])) {
  512. return self::$cache['singularize'][$plural_noun];
  513. }
  514. $original = $plural_noun;
  515. $singular_noun = NULL;
  516. list ($beginning, $plural_noun) = self::splitLastWord($plural_noun);
  517. foreach (self::$plural_to_singular_rules as $from => $to) {
  518. if (preg_match('#' . $from . '#iD', $plural_noun)) {
  519. $singular_noun = $beginning . preg_replace('#' . $from . '#iD', $to, $plural_noun);
  520. break;
  521. }
  522. }
  523. if (!$singular_noun) {
  524. if ($return_error) {
  525. self::$cache['singularize'][$plural_noun] = FALSE;
  526. return FALSE;
  527. }
  528. throw new fProgrammerException('The noun specified could not be singularized');
  529. }
  530. self::$cache['singularize'][$original] = $singular_noun;
  531. return $singular_noun;
  532. }
  533. /**
  534. * Splits the last word off of a `camelCase` or `underscore_notation` string
  535. *
  536. * @param string $string The string to split the word from
  537. * @return array The first element is the beginning part of the string, the second element is the last word
  538. */
  539. static private function splitLastWord($string)
  540. {
  541. // Handle strings with spaces in them
  542. if (strpos($string, ' ') !== FALSE) {
  543. return array(substr($string, 0, strrpos($string, ' ')+1), substr($string, strrpos($string, ' ')+1));
  544. }
  545. // Handle underscore notation
  546. if ($string == self::underscorize($string)) {
  547. if (strpos($string, '_') === FALSE) { return array('', $string); }
  548. return array(substr($string, 0, strrpos($string, '_')+1), substr($string, strrpos($string, '_')+1));
  549. }
  550. // Handle camel case
  551. if (preg_match('#(.*)((?<=[a-zA-Z_]|^)(?:[0-9]+|[A-Z][a-z]*)|(?<=[0-9A-Z_]|^)(?:[A-Z][a-z]*))$#D', $string, $match)) {
  552. return array($match[1], $match[2]);
  553. }
  554. return array('', $string);
  555. }
  556. /**
  557. * Uses the Porter Stemming algorithm to create the stem of a word, which is useful for searching
  558. *
  559. * See http://tartarus.org/~martin/PorterStemmer/ for details about the
  560. * algorithm.
  561. *
  562. * @param string $word The word to get the stem of
  563. * @return string The stem of the word
  564. */
  565. static public function stem($word)
  566. {
  567. $s_v = '^([^aeiou][^aeiouy]*)?[aeiouy]';
  568. $mgr0 = $s_v . '[aeiou]*[^aeiou][^aeiouy]*';
  569. $s_v_regex = '#' . $s_v . '#';
  570. $mgr0_regex = '#' . $mgr0 . '#';
  571. $meq1_regex = '#' . $mgr0 . '([aeiouy][aeiou]*)?$#';
  572. $mgr1_regex = '#' . $mgr0 . '[aeiouy][aeiou]*[^aeiou][^aeiouy]*#';
  573. $word = fUTF8::ascii($word);
  574. $word = strtolower($word);
  575. if (strlen($word) < 3) {
  576. return $word;
  577. }
  578. if ($word[0] == 'y') {
  579. $word = 'Y' . substr($word, 1);
  580. }
  581. // Step 1a
  582. $word = preg_replace('#^(.+?)(?:(ss|i)es|([^s])s)$#', '\1\2\3', $word);
  583. // Step 1b
  584. if (preg_match('#^(.+?)eed$#', $word, $match)) {
  585. if (preg_match($mgr0_regex, $match[1])) {
  586. $word = substr($word, 0, -1);
  587. }
  588. } elseif (preg_match('#^(.+?)(ed|ing)$#', $word, $match)) {
  589. if (preg_match($s_v_regex, $match[1])) {
  590. $word = $match[1];
  591. if (preg_match('#(at|bl|iz)$#', $word)) {
  592. $word .= 'e';
  593. } elseif (preg_match('#([^aeiouylsz])\1$#', $word)) {
  594. $word = substr($word, 0, -1);
  595. } elseif (preg_match('#^[^aeiou][^aeiouy]*[aeiouy][^aeiouwxy]$#', $word)) {
  596. $word .= 'e';
  597. }
  598. }
  599. }
  600. // Step 1c
  601. if (substr($word, -1) == 'y') {
  602. $stem = substr($word, 0, -1);
  603. if (preg_match($s_v_regex, $stem)) {
  604. $word = $stem . 'i';
  605. }
  606. }
  607. // Step 2
  608. if (preg_match('#^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$#', $word, $match)) {
  609. if (preg_match($mgr0_regex, $match[1])) {
  610. $word = $match[1] . strtr(
  611. $match[2],
  612. array(
  613. 'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence',
  614. 'anci' => 'ance', 'izer' => 'ize', 'bli' => 'ble',
  615. 'alli' => 'al', 'entli' => 'ent', 'eli' => 'e',
  616. 'ousli' => 'ous', 'ization' => 'ize', 'ation' => 'ate',
  617. 'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive',
  618. 'fulness' => 'ful', 'ousness' => 'ous', 'aliti' => 'al',
  619. 'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
  620. )
  621. );
  622. }
  623. }
  624. // Step 3
  625. if (preg_match('#^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$#', $word, $match)) {
  626. if (preg_match($mgr0_regex, $match[1])) {
  627. $word = $match[1] . strtr(
  628. $match[2],
  629. array(
  630. 'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
  631. 'ical' => 'ic', 'ful' => '', 'ness' => ''
  632. )
  633. );
  634. }
  635. }
  636. // Step 4
  637. if (preg_match('#^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize|(?<=[st])ion)$#', $word, $match) && preg_match($mgr1_regex, $match[1])) {
  638. $word = $match[1];
  639. }
  640. // Step 5
  641. if (substr($word, -1) == 'e') {
  642. $stem = substr($word, 0, -1);
  643. if (preg_match($mgr1_regex, $stem)) {
  644. $word = $stem;
  645. } elseif (preg_match($meq1_regex, $stem) && !preg_match('#^[^aeiou][^aeiouy]*[aeiouy][^aeiouwxy]$#', $stem)) {
  646. $word = $stem;
  647. }
  648. }
  649. if (preg_match('#ll$#', $word) && preg_match($mgr1_regex, $word)) {
  650. $word = substr($word, 0, -1);
  651. }
  652. if ($word[0] == 'Y') {
  653. $word = 'y' . substr($word, 1);
  654. }
  655. return $word;
  656. }
  657. /**
  658. * Converts a `camelCase`, human-friendly or `underscore_notation` string to `underscore_notation`
  659. *
  660. * @param string $string The string to convert
  661. * @return string The converted string
  662. */
  663. static public function underscorize($string)
  664. {
  665. if (!strlen($string)) {
  666. throw new fProgrammerException(
  667. "An empty string was passed to %s",
  668. __CLASS__ . '::underscorize()'
  669. );
  670. }
  671. if (isset(self::$cache['underscorize'][$string])) {
  672. return self::$cache['underscorize'][$string];
  673. }
  674. $original = $string;
  675. $string = strtolower($string[0]) . substr($string, 1);
  676. // Handle custom rules
  677. if (isset(self::$underscorize_rules[$string])) {
  678. $string = self::$underscorize_rules[$string];
  679. // If the string is already underscore notation then leave it
  680. } elseif (strpos($string, '_') !== FALSE && strtolower($string) == $string) {
  681. // Allow humanized string to be passed in
  682. } elseif (strpos($string, ' ') !== FALSE) {
  683. $string = strtolower(preg_replace('#\s+#', '_', $string));
  684. } else {
  685. do {
  686. $old_string = $string;
  687. $string = preg_replace('/([a-zA-Z])([0-9])/', '\1_\2', $string);
  688. $string = preg_replace('/([a-z0-9A-Z])([A-Z])/', '\1_\2', $string);
  689. } while ($old_string != $string);
  690. $string = strtolower($string);
  691. }
  692. self::$cache['underscorize'][$original] = $string;
  693. return $string;
  694. }
  695. /**
  696. * Forces use as a static class
  697. *
  698. * @return fGrammar
  699. */
  700. private function __construct() { }
  701. }
  702. /**
  703. * Copyright (c) 2007-2011 Will Bond <will@flourishlib.com>
  704. *
  705. * Permission is hereby granted, free of charge, to any person obtaining a copy
  706. * of this software and associated documentation files (the "Software"), to deal
  707. * in the Software without restriction, including without limitation the rights
  708. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  709. * copies of the Software, and to permit persons to whom the Software is
  710. * furnished to do so, subject to the following conditions:
  711. *
  712. * The above copyright notice and this permission notice shall be included in
  713. * all copies or substantial portions of the Software.
  714. *
  715. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  716. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  717. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  718. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  719. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  720. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  721. * THE SOFTWARE.
  722. */