/Python/libraries/recognizers-number/recognizers_number/number/spanish/extractors.py

https://github.com/Microsoft/Recognizers-Text · Python · 224 lines · 184 code · 38 blank · 2 comment · 6 complexity · c0b1d8e1713ab66b5e486dc4d01c9ef0 MD5 · raw file

  1. from typing import Pattern, List, NamedTuple
  2. from recognizers_text.utilities import RegExpUtility
  3. from recognizers_number.number.models import NumberMode, LongFormatMode
  4. from recognizers_number.resources import BaseNumbers
  5. from recognizers_number.resources.spanish_numeric import SpanishNumeric
  6. from recognizers_number.number.extractors import ReVal, ReRe, BaseNumberExtractor, BasePercentageExtractor
  7. from recognizers_number.number.constants import Constants
  8. class SpanishNumberExtractor(BaseNumberExtractor):
  9. @property
  10. def regexes(self) -> List[ReVal]:
  11. return self.__regexes
  12. @property
  13. def ambiguity_filters_dict(self) -> List[ReRe]:
  14. return self.__ambiguity_filters_dict
  15. @property
  16. def _extract_type(self) -> str:
  17. return Constants.SYS_NUM
  18. def __init__(self, mode: NumberMode = NumberMode.DEFAULT):
  19. self.__regexes: List[ReVal] = list()
  20. cardinal_ex: SpanishCardinalExtractor = None
  21. if mode is NumberMode.PURE_NUMBER:
  22. cardinal_ex = SpanishCardinalExtractor(
  23. SpanishNumeric.PlaceHolderPureNumber)
  24. elif mode is NumberMode.CURRENCY:
  25. self.__regexes.append(
  26. ReVal(re=SpanishNumeric.CurrencyRegex, val='IntegerNum'))
  27. if cardinal_ex is None:
  28. cardinal_ex = SpanishCardinalExtractor()
  29. self.__regexes.extend(cardinal_ex.regexes)
  30. fraction_ex = SpanishFractionExtractor(mode)
  31. self.__regexes.extend(fraction_ex.regexes)
  32. ambiguity_filters_dict: List[ReRe] = list()
  33. if mode != NumberMode.Unit:
  34. for key, value in SpanishNumeric.AmbiguityFiltersDict.items():
  35. ambiguity_filters_dict.append(ReRe(reKey=RegExpUtility.get_safe_reg_exp(key),
  36. reVal=RegExpUtility.get_safe_reg_exp(value)))
  37. self.__ambiguity_filters_dict = ambiguity_filters_dict
  38. class SpanishCardinalExtractor(BaseNumberExtractor):
  39. @property
  40. def regexes(self) -> List[ReVal]:
  41. return self.__regexes
  42. @property
  43. def _extract_type(self) -> str:
  44. return Constants.SYS_NUM_CARDINAL
  45. def __init__(self, placeholder: str = SpanishNumeric.PlaceHolderDefault):
  46. self.__regexes: List[ReVal] = list()
  47. # Add integer regexes
  48. integer_ex = SpanishIntegerExtractor(placeholder)
  49. self.__regexes.extend(integer_ex.regexes)
  50. # Add double regexes
  51. double_ex = SpanishDoubleExtractor(placeholder)
  52. self.__regexes.extend(double_ex.regexes)
  53. class SpanishIntegerExtractor(BaseNumberExtractor):
  54. @property
  55. def regexes(self) -> List[
  56. NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  57. return self.__regexes
  58. @property
  59. def _extract_type(self) -> str:
  60. return Constants.SYS_NUM_INTEGER
  61. def __init__(self, placeholder: str = SpanishNumeric.PlaceHolderDefault):
  62. self.__regexes = [
  63. ReVal(
  64. re=SpanishNumeric.NumbersWithPlaceHolder(placeholder),
  65. val='IntegerNum'),
  66. ReVal(
  67. re=SpanishNumeric.NumbersWithSuffix,
  68. val='IntegerNum'),
  69. ReVal(
  70. re=self._generate_format_regex(LongFormatMode.INTEGER_DOT,
  71. placeholder),
  72. val='IntegerNum'),
  73. ReVal(
  74. re=self._generate_format_regex(LongFormatMode.INTEGER_BLANK,
  75. placeholder),
  76. val='IntegerNum'),
  77. ReVal(
  78. re=self._generate_format_regex(
  79. LongFormatMode.INTEGER_NO_BREAK_SPACE, placeholder),
  80. val='IntegerNum'),
  81. ReVal(
  82. re=SpanishNumeric.RoundNumberIntegerRegexWithLocks,
  83. val='IntegerNum'),
  84. ReVal(
  85. re=SpanishNumeric.NumbersWithDozenSuffix,
  86. val='IntegerNum'),
  87. ReVal(
  88. re=SpanishNumeric.AllIntRegexWithLocks,
  89. val='IntegerSpa'),
  90. ReVal(
  91. re=SpanishNumeric.AllIntRegexWithDozenSuffixLocks,
  92. val='IntegerSpa')
  93. ]
  94. class SpanishDoubleExtractor(BaseNumberExtractor):
  95. @property
  96. def regexes(self) -> List[
  97. NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  98. return self.__regexes
  99. @property
  100. def _extract_type(self) -> str:
  101. return Constants.SYS_NUM_DOUBLE
  102. def __init__(self, placeholder: str = SpanishNumeric.PlaceHolderDefault):
  103. self.__regexes = [
  104. ReVal(
  105. re=SpanishNumeric.DoubleDecimalPointRegex(placeholder),
  106. val='DoubleNum'),
  107. ReVal(
  108. re=SpanishNumeric.DoubleWithoutIntegralRegex(placeholder),
  109. val='DoubleNum'),
  110. ReVal(
  111. re=SpanishNumeric.DoubleWithMultiplierRegex,
  112. val='DoubleNum'),
  113. ReVal(
  114. re=SpanishNumeric.DoubleWithRoundNumber,
  115. val='DoubleNum'),
  116. ReVal(
  117. re=SpanishNumeric.DoubleAllFloatRegex,
  118. val='DoubleSpa'),
  119. ReVal(
  120. re=SpanishNumeric.DoubleExponentialNotationRegex,
  121. val='DoublePow'),
  122. ReVal(
  123. re=SpanishNumeric.DoubleCaretExponentialNotationRegex,
  124. val='DoublePow'),
  125. ReVal(
  126. re=self._generate_format_regex(LongFormatMode.DOUBLE_DOT_COMMA,
  127. placeholder),
  128. val='DoubleNum'),
  129. ReVal(
  130. re=self._generate_format_regex(
  131. LongFormatMode.DOUBLE_NO_BREAK_SPACE_COMMA,
  132. placeholder),
  133. val='DoubleNum')
  134. ]
  135. class SpanishFractionExtractor(BaseNumberExtractor):
  136. @property
  137. def regexes(self) -> List[
  138. NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  139. return self.__regexes
  140. @property
  141. def _extract_type(self) -> str:
  142. return Constants.SYS_NUM_FRACTION
  143. def __init__(self, mode):
  144. self.__regexes = [
  145. ReVal(
  146. re=SpanishNumeric.FractionNotationRegex,
  147. val='FracNum'),
  148. ReVal(
  149. re=SpanishNumeric.FractionNotationWithSpacesRegex,
  150. val='FracNum'),
  151. ReVal(
  152. re=SpanishNumeric.FractionNounRegex,
  153. val='FracSpa'),
  154. ReVal(
  155. re=SpanishNumeric.FractionNounWithArticleRegex,
  156. val='FracSpa')
  157. ]
  158. if mode != NumberMode.Unit:
  159. self.__regexes.append(
  160. ReVal(
  161. re=SpanishNumeric.FractionPrepositionRegex,
  162. val='FracSpa'))
  163. class SpanishOrdinalExtractor(BaseNumberExtractor):
  164. @property
  165. def regexes(self) -> List[
  166. NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  167. return self.__regexes
  168. @property
  169. def _extract_type(self) -> str:
  170. return Constants.SYS_NUM_ORDINAL
  171. def __init__(self):
  172. self.__regexes = [
  173. ReVal(
  174. re=SpanishNumeric.OrdinalSuffixRegex,
  175. val='OrdinalNum'),
  176. ReVal(
  177. re=SpanishNumeric.OrdinalNounRegex,
  178. val='OrdSpa')
  179. ]
  180. class SpanishPercentageExtractor(BasePercentageExtractor):
  181. def __init__(self):
  182. super().__init__(SpanishNumberExtractor())
  183. def get_definitions(self) -> List[str]:
  184. return [
  185. SpanishNumeric.NumberWithPrefixPercentage
  186. ]