/Python/libraries/recognizers-number/recognizers_number/number/portuguese/extractors.py

https://github.com/Microsoft/Recognizers-Text · Python · 245 lines · 203 code · 40 blank · 2 comment · 6 complexity · 6e28aa9d92dcb6109b9b47118ddbd1df MD5 · raw file

  1. import regex
  2. from typing import Pattern, List, NamedTuple
  3. from recognizers_text.utilities import RegExpUtility
  4. from recognizers_number.number.models import NumberMode, LongFormatMode
  5. from recognizers_number.resources import BaseNumbers
  6. from recognizers_number.resources.portuguese_numeric import PortugueseNumeric
  7. from recognizers_number.number.extractors import ReVal, ReRe, BaseNumberExtractor, BasePercentageExtractor
  8. from recognizers_number.number.constants import Constants
  9. class PortugueseNumberExtractor(BaseNumberExtractor):
  10. @property
  11. def regexes(self) -> List[ReVal]:
  12. return self.__regexes
  13. @property
  14. def ambiguity_filters_dict(self) -> List[ReRe]:
  15. return self.__ambiguity_filters_dict
  16. @property
  17. def _extract_type(self) -> str:
  18. return Constants.SYS_NUM
  19. @property
  20. def _negative_number_terms(self) -> Pattern:
  21. return self.__negative_number_terms
  22. def __init__(self, mode: NumberMode = NumberMode.DEFAULT):
  23. self.__negative_number_terms = RegExpUtility.get_safe_reg_exp(
  24. PortugueseNumeric.NegativeNumberTermsRegex)
  25. self.__regexes: List[ReVal] = list()
  26. cardinal_ex: PortugueseCardinalExtractor = None
  27. if mode is NumberMode.PURE_NUMBER:
  28. cardinal_ex = PortugueseCardinalExtractor(
  29. PortugueseNumeric.PlaceHolderPureNumber)
  30. elif mode is NumberMode.CURRENCY:
  31. self.__regexes.append(ReVal(re=RegExpUtility.get_safe_reg_exp(
  32. PortugueseNumeric.CurrencyRegex), val='IntegerNum'))
  33. if cardinal_ex is None:
  34. cardinal_ex = PortugueseCardinalExtractor()
  35. self.__regexes.extend(cardinal_ex.regexes)
  36. fraction_ex = PortugueseFractionExtractor(mode)
  37. self.__regexes.extend(fraction_ex.regexes)
  38. ambiguity_filters_dict: List[ReRe] = list()
  39. if mode != NumberMode.Unit:
  40. for key, value in PortugueseNumeric.AmbiguityFiltersDict.items():
  41. ambiguity_filters_dict.append(ReRe(reKey=RegExpUtility.get_safe_reg_exp(key),
  42. reVal=RegExpUtility.get_safe_reg_exp(value)))
  43. self.__ambiguity_filters_dict = ambiguity_filters_dict
  44. class PortugueseCardinalExtractor(BaseNumberExtractor):
  45. @property
  46. def regexes(self) -> List[ReVal]:
  47. return self.__regexes
  48. @property
  49. def _extract_type(self) -> str:
  50. return Constants.SYS_NUM_CARDINAL
  51. def __init__(self, placeholder: str = PortugueseNumeric.PlaceHolderDefault):
  52. self.__regexes: List[ReVal] = list()
  53. # Add integer regexes
  54. integer_ex = PortugueseIntegerExtractor(placeholder)
  55. self.__regexes.extend(integer_ex.regexes)
  56. # Add double regexes
  57. double_ex = PortugueseDoubleExtractor(placeholder)
  58. self.__regexes.extend(double_ex.regexes)
  59. class PortugueseIntegerExtractor(BaseNumberExtractor):
  60. @property
  61. def regexes(self) -> List[NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  62. return self.__regexes
  63. @property
  64. def _extract_type(self) -> str:
  65. return Constants.SYS_NUM_INTEGER
  66. def __init__(self, placeholder: str = PortugueseNumeric.PlaceHolderDefault):
  67. self.__regexes = [
  68. ReVal(
  69. re=RegExpUtility.get_safe_reg_exp(
  70. PortugueseNumeric.NumbersWithPlaceHolder(placeholder)),
  71. val='IntegerNum'),
  72. ReVal(
  73. re=RegExpUtility.get_safe_reg_exp(
  74. PortugueseNumeric.NumbersWithSuffix, regex.S),
  75. val='IntegerNum'),
  76. ReVal(
  77. re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
  78. LongFormatMode.INTEGER_DOT, placeholder)),
  79. val='IntegerNum'),
  80. ReVal(
  81. re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
  82. LongFormatMode.INTEGER_BLANK, placeholder)),
  83. val='IntegerNum'),
  84. ReVal(
  85. re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
  86. LongFormatMode.INTEGER_NO_BREAK_SPACE, placeholder)),
  87. val='IntegerNum'),
  88. ReVal(
  89. re=RegExpUtility.get_safe_reg_exp(
  90. PortugueseNumeric.RoundNumberIntegerRegexWithLocks),
  91. val='IntegerNum'),
  92. ReVal(
  93. re=RegExpUtility.get_safe_reg_exp(
  94. PortugueseNumeric.NumbersWithDozenSuffix),
  95. val='IntegerNum'),
  96. ReVal(
  97. re=RegExpUtility.get_safe_reg_exp(
  98. PortugueseNumeric.AllIntRegexWithLocks),
  99. val='IntegerPor'),
  100. ReVal(
  101. re=RegExpUtility.get_safe_reg_exp(
  102. PortugueseNumeric.AllIntRegexWithDozenSuffixLocks),
  103. val='IntegerPor')
  104. ]
  105. class PortugueseDoubleExtractor(BaseNumberExtractor):
  106. @property
  107. def regexes(self) -> List[NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  108. return self.__regexes
  109. @property
  110. def _extract_type(self) -> str:
  111. return Constants.SYS_NUM_DOUBLE
  112. def __init__(self, placeholder):
  113. self.__regexes = [
  114. ReVal(
  115. re=RegExpUtility.get_safe_reg_exp(
  116. PortugueseNumeric.DoubleDecimalPointRegex(placeholder)),
  117. val='DoubleNum'),
  118. ReVal(
  119. re=RegExpUtility.get_safe_reg_exp(
  120. PortugueseNumeric.DoubleWithoutIntegralRegex(placeholder)),
  121. val='DoubleNum'),
  122. ReVal(
  123. re=PortugueseNumeric.DoubleWithMultiplierRegex,
  124. val='DoubleNum'),
  125. ReVal(
  126. re=RegExpUtility.get_safe_reg_exp(
  127. PortugueseNumeric.DoubleWithRoundNumber),
  128. val='DoubleNum'),
  129. ReVal(
  130. re=RegExpUtility.get_safe_reg_exp(
  131. PortugueseNumeric.DoubleAllFloatRegex),
  132. val='DoublePor'),
  133. ReVal(
  134. re=RegExpUtility.get_safe_reg_exp(
  135. PortugueseNumeric.DoubleExponentialNotationRegex),
  136. val='DoublePow'),
  137. ReVal(
  138. re=RegExpUtility.get_safe_reg_exp(
  139. PortugueseNumeric.DoubleCaretExponentialNotationRegex),
  140. val='DoublePow'),
  141. ReVal(
  142. re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
  143. LongFormatMode.DOUBLE_DOT_COMMA, placeholder)),
  144. val='DoubleNum'),
  145. ReVal(
  146. re=RegExpUtility.get_safe_reg_exp(self._generate_format_regex(
  147. LongFormatMode.DOUBLE_NO_BREAK_SPACE_COMMA, placeholder)),
  148. val='DoubleNum')
  149. ]
  150. class PortugueseFractionExtractor(BaseNumberExtractor):
  151. @property
  152. def regexes(self) -> List[NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  153. return self.__regexes
  154. @property
  155. def _extract_type(self) -> str:
  156. return Constants.SYS_NUM_FRACTION
  157. def __init__(self, mode):
  158. self.__regexes = [
  159. ReVal(
  160. re=RegExpUtility.get_safe_reg_exp(
  161. PortugueseNumeric.FractionNotationWithSpacesRegex),
  162. val='FracNum'),
  163. ReVal(
  164. re=RegExpUtility.get_safe_reg_exp(
  165. PortugueseNumeric.FractionNotationRegex),
  166. val='FracNum'),
  167. ReVal(
  168. re=RegExpUtility.get_safe_reg_exp(
  169. PortugueseNumeric.FractionNounRegex),
  170. val='FracPor'),
  171. ReVal(
  172. re=RegExpUtility.get_safe_reg_exp(
  173. PortugueseNumeric.FractionNounWithArticleRegex),
  174. val='FracPor')
  175. ]
  176. if mode != NumberMode.Unit:
  177. self.__regexes.append(ReVal(
  178. re=RegExpUtility.get_safe_reg_exp(
  179. PortugueseNumeric.FractionPrepositionRegex),
  180. val='FracPor'))
  181. class PortugueseOrdinalExtractor(BaseNumberExtractor):
  182. @property
  183. def regexes(self) -> List[NamedTuple('re_val', [('re', Pattern), ('val', str)])]:
  184. return self.__regexes
  185. @property
  186. def _extract_type(self) -> str:
  187. return Constants.SYS_NUM_ORDINAL
  188. def __init__(self):
  189. self.__regexes = [
  190. ReVal(
  191. re=RegExpUtility.get_safe_reg_exp(
  192. PortugueseNumeric.OrdinalSuffixRegex),
  193. val='OrdinalNum'),
  194. ReVal(
  195. re=RegExpUtility.get_safe_reg_exp(
  196. PortugueseNumeric.OrdinalEnglishRegex),
  197. val='OrdinalPor')
  198. ]
  199. class PortuguesePercentageExtractor(BasePercentageExtractor):
  200. def __init__(self):
  201. super().__init__(PortugueseNumberExtractor(NumberMode.DEFAULT))
  202. def get_definitions(self) -> List[str]:
  203. return [
  204. PortugueseNumeric.NumberWithSuffixPercentage
  205. ]