PageRenderTime 45ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Zend/Search/Lucene/Index/Term.php

https://bitbucket.org/mercysam/zfs
PHP | 143 lines | 61 code | 16 blank | 66 comment | 13 complexity | 1042e44ee443ffbd3a32f31e2db44087 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * A Term represents a word from text. This is the unit of search. It is
  23. * composed of two elements, the text of the word, as a string, and the name of
  24. * the field that the text occured in, an interned string.
  25. *
  26. * Note that terms may represent more than words from text fields, but also
  27. * things like dates, email addresses, urls, etc.
  28. *
  29. * @category Zend
  30. * @package Zend_Search_Lucene
  31. * @subpackage Index
  32. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  33. * @license http://framework.zend.com/license/new-bsd New BSD License
  34. */
  35. class Zend_Search_Lucene_Index_Term
  36. {
  37. /**
  38. * Field name or field number (depending from context)
  39. *
  40. * @var mixed
  41. */
  42. public $field;
  43. /**
  44. * Term value
  45. *
  46. * @var string
  47. */
  48. public $text;
  49. /**
  50. * Object constructor
  51. */
  52. public function __construct($text, $field = null)
  53. {
  54. $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
  55. $this->text = $text;
  56. }
  57. /**
  58. * Returns term key
  59. *
  60. * @return string
  61. */
  62. public function key()
  63. {
  64. return $this->field . chr(0) . $this->text;
  65. }
  66. /**
  67. * Get term prefix
  68. *
  69. * @param string $str
  70. * @param integer $length
  71. * @return string
  72. */
  73. public static function getPrefix($str, $length)
  74. {
  75. $prefixBytes = 0;
  76. $prefixChars = 0;
  77. while ($prefixBytes < strlen($str) && $prefixChars < $length) {
  78. $charBytes = 1;
  79. if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
  80. $charBytes++;
  81. if (ord($str[$prefixBytes]) & 0x20 ) {
  82. $charBytes++;
  83. if (ord($str[$prefixBytes]) & 0x10 ) {
  84. $charBytes++;
  85. }
  86. }
  87. }
  88. if ($prefixBytes + $charBytes > strlen($str)) {
  89. // wrong character
  90. break;
  91. }
  92. $prefixChars++;
  93. $prefixBytes += $charBytes;
  94. }
  95. return substr($str, 0, $prefixBytes);
  96. }
  97. /**
  98. * Get UTF-8 string length
  99. *
  100. * @param string $str
  101. * @return string
  102. */
  103. public static function getLength($str)
  104. {
  105. $bytes = 0;
  106. $chars = 0;
  107. while ($bytes < strlen($str)) {
  108. $charBytes = 1;
  109. if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
  110. $charBytes++;
  111. if (ord($str[$bytes]) & 0x20 ) {
  112. $charBytes++;
  113. if (ord($str[$bytes]) & 0x10 ) {
  114. $charBytes++;
  115. }
  116. }
  117. }
  118. if ($bytes + $charBytes > strlen($str)) {
  119. // wrong character
  120. break;
  121. }
  122. $chars++;
  123. $bytes += $charBytes;
  124. }
  125. return $chars;
  126. }
  127. }