PageRenderTime 41ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/Search/Lucene/Index/Term.php

https://bitbucket.org/jfrubiom/zendframework-1.x
PHP | 144 lines | 61 code | 16 blank | 67 comment | 13 complexity | 177ecfbc2b5136b06c033b1a0ad8dbcf MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Term.php 24593 2012-01-05 20:35:02Z matthew $
  21. */
  22. /**
  23. * A Term represents a word from text. This is the unit of search. It is
  24. * composed of two elements, the text of the word, as a string, and the name of
  25. * the field that the text occured in, an interned string.
  26. *
  27. * Note that terms may represent more than words from text fields, but also
  28. * things like dates, email addresses, urls, etc.
  29. *
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage Index
  33. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. class Zend_Search_Lucene_Index_Term
  37. {
  38. /**
  39. * Field name or field number (depending from context)
  40. *
  41. * @var mixed
  42. */
  43. public $field;
  44. /**
  45. * Term value
  46. *
  47. * @var string
  48. */
  49. public $text;
  50. /**
  51. * Object constructor
  52. */
  53. public function __construct($text, $field = null)
  54. {
  55. $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field;
  56. $this->text = $text;
  57. }
  58. /**
  59. * Returns term key
  60. *
  61. * @return string
  62. */
  63. public function key()
  64. {
  65. return $this->field . chr(0) . $this->text;
  66. }
  67. /**
  68. * Get term prefix
  69. *
  70. * @param string $str
  71. * @param integer $length
  72. * @return string
  73. */
  74. public static function getPrefix($str, $length)
  75. {
  76. $prefixBytes = 0;
  77. $prefixChars = 0;
  78. while ($prefixBytes < strlen($str) && $prefixChars < $length) {
  79. $charBytes = 1;
  80. if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
  81. $charBytes++;
  82. if (ord($str[$prefixBytes]) & 0x20 ) {
  83. $charBytes++;
  84. if (ord($str[$prefixBytes]) & 0x10 ) {
  85. $charBytes++;
  86. }
  87. }
  88. }
  89. if ($prefixBytes + $charBytes > strlen($str)) {
  90. // wrong character
  91. break;
  92. }
  93. $prefixChars++;
  94. $prefixBytes += $charBytes;
  95. }
  96. return substr($str, 0, $prefixBytes);
  97. }
  98. /**
  99. * Get UTF-8 string length
  100. *
  101. * @param string $str
  102. * @return string
  103. */
  104. public static function getLength($str)
  105. {
  106. $bytes = 0;
  107. $chars = 0;
  108. while ($bytes < strlen($str)) {
  109. $charBytes = 1;
  110. if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
  111. $charBytes++;
  112. if (ord($str[$bytes]) & 0x20 ) {
  113. $charBytes++;
  114. if (ord($str[$bytes]) & 0x10 ) {
  115. $charBytes++;
  116. }
  117. }
  118. }
  119. if ($bytes + $charBytes > strlen($str)) {
  120. // wrong character
  121. break;
  122. }
  123. $chars++;
  124. $bytes += $charBytes;
  125. }
  126. return $chars;
  127. }
  128. }