/lib/galaxy/util/inflection.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 388 lines · 365 code · 10 blank · 13 comment · 18 complexity · 648ab339f0132d70a807585674fb6ba6 MD5 · raw file

  1. #!/usr/bin/env python
  2. # Copyright (c) 2006 Bermi Ferrer Martinez
  3. #
  4. # bermi a-t bermilabs - com
  5. # See the end of this file for the free software, open source license (BSD-style).
  6. import re
  7. class Base:
  8. '''Locale inflectors must inherit from this base class inorder to provide
  9. the basic Inflector functionality'''
  10. def cond_plural(self, number_of_records, word) :
  11. '''Returns the plural form of a word if first parameter is greater than 1'''
  12. if number_of_records != 1:
  13. return self.pluralize(word)
  14. else :
  15. return word
  16. def titleize(self, word, uppercase = '') :
  17. '''Converts an underscored or CamelCase word into a English sentence.
  18. The titleize function converts text like "WelcomePage",
  19. "welcome_page" or "welcome page" to this "Welcome Page".
  20. If second parameter is set to 'first' it will only
  21. capitalize the first character of the title.'''
  22. if(uppercase == 'first'):
  23. return self.humanize(self.underscore(word)).capitalize()
  24. else :
  25. return self.humanize(self.underscore(word)).title()
  26. def camelize(self, word):
  27. ''' Returns given word as CamelCased
  28. Converts a word like "send_email" to "SendEmail". It
  29. will remove non alphanumeric character from the word, so
  30. "who's online" will be converted to "WhoSOnline"'''
  31. return ''.join(w[0].upper() + w[1:] for w in re.sub('[^A-Z^a-z^0-9^:]+', ' ', word).split(' '))
  32. def underscore(self, word) :
  33. ''' Converts a word "into_it_s_underscored_version"
  34. Convert any "CamelCased" or "ordinary Word" into an
  35. "underscored_word".
  36. This can be really useful for creating friendly URLs.'''
  37. return re.sub('[^A-Z^a-z^0-9^\/]+','_', \
  38. re.sub('([a-z\d])([A-Z])','\\1_\\2', \
  39. re.sub('([A-Z]+)([A-Z][a-z])','\\1_\\2', re.sub('::', '/',word)))).lower()
  40. def humanize(self, word, uppercase = '') :
  41. '''Returns a human-readable string from word
  42. Returns a human-readable string from word, by replacing
  43. underscores with a space, and by upper-casing the initial
  44. character by default.
  45. If you need to uppercase all the words you just have to
  46. pass 'all' as a second parameter.'''
  47. if(uppercase == 'first'):
  48. return re.sub('_id$', '', word).replace('_',' ').capitalize()
  49. else :
  50. return re.sub('_id$', '', word).replace('_',' ').title()
  51. def variablize(self, word) :
  52. '''Same as camelize but first char is lowercased
  53. Converts a word like "send_email" to "sendEmail". It
  54. will remove non alphanumeric character from the word, so
  55. "who's online" will be converted to "whoSOnline"'''
  56. word = self.camelize(word)
  57. return word[0].lower()+word[1:]
  58. def tableize(self, class_name) :
  59. ''' Converts a class name to its table name according to rails
  60. naming conventions. Example. Converts "Person" to "people" '''
  61. return self.pluralize(self.underscore(class_name))
  62. def classify(self, table_name) :
  63. '''Converts a table name to its class name according to rails
  64. naming conventions. Example: Converts "people" to "Person" '''
  65. return self.camelize(self.singularize(table_name))
  66. def ordinalize(self, number) :
  67. '''Converts number to its ordinal English form.
  68. This method converts 13 to 13th, 2 to 2nd ...'''
  69. tail = 'th'
  70. if number % 100 == 11 or number % 100 == 12 or number % 100 == 13:
  71. tail = 'th'
  72. elif number % 10 == 1 :
  73. tail = 'st'
  74. elif number % 10 == 2 :
  75. tail = 'nd'
  76. elif number % 10 == 3 :
  77. tail = 'rd'
  78. return str(number)+tail
  79. def unaccent(self, text) :
  80. '''Transforms a string to its unaccented version.
  81. This might be useful for generating "friendly" URLs'''
  82. find = u'\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF'
  83. replace = u'AAAAAAACEEEEIIIIDNOOOOOOUUUUYTsaaaaaaaceeeeiiiienoooooouuuuyty'
  84. return self.string_replace(text, find, replace)
  85. def string_replace (self, word, find, replace) :
  86. '''This function returns a copy of word, translating
  87. all occurrences of each character in find to the
  88. corresponding character in replace'''
  89. for k in range(0,len(find)) :
  90. word = re.sub(find[k], replace[k], word)
  91. return word
  92. def urlize(self, text) :
  93. '''Transform a string its unaccented and underscored
  94. version ready to be inserted in friendly URLs'''
  95. return re.sub('^_|_$','',self.underscore(self.unaccent(text)))
  96. def demodulize(self, module_name) :
  97. return self.humanize(self.underscore(re.sub('^.*::','',module_name)))
  98. def modulize(self, module_description) :
  99. return self.camelize(self.singularize(module_description))
  100. def foreignKey(self, class_name, separate_class_name_and_id_with_underscore = 1) :
  101. ''' Returns class_name in underscored form, with "_id" tacked on at the end.
  102. This is for use in dealing with the database.'''
  103. if separate_class_name_and_id_with_underscore :
  104. tail = '_id'
  105. else :
  106. tail = 'id'
  107. return self.underscore(self.demodulize(class_name))+tail;
  108. class English (Base):
  109. """
  110. Inflector for pluralize and singularize English nouns.
  111. This is the default Inflector for the Inflector obj
  112. """
  113. def pluralize(self, word) :
  114. '''Pluralizes English nouns.'''
  115. rules = [
  116. ['(?i)(quiz)$' , '\\1zes'],
  117. ['^(?i)(ox)$' , '\\1en'],
  118. ['(?i)([m|l])ouse$' , '\\1ice'],
  119. ['(?i)(matr|vert|ind)ix|ex$' , '\\1ices'],
  120. ['(?i)(x|ch|ss|sh)$' , '\\1es'],
  121. ['(?i)([^aeiouy]|qu)ies$' , '\\1y'],
  122. ['(?i)([^aeiouy]|qu)y$' , '\\1ies'],
  123. ['(?i)(hive)$' , '\\1s'],
  124. ['(?i)(?:([^f])fe|([lr])f)$' , '\\1\\2ves'],
  125. ['(?i)sis$' , 'ses'],
  126. ['(?i)([ti])um$' , '\\1a'],
  127. ['(?i)(buffal|tomat)o$' , '\\1oes'],
  128. ['(?i)(bu)s$' , '\\1ses'],
  129. ['(?i)(alias|status)' , '\\1es'],
  130. ['(?i)(octop|vir)us$' , '\\1i'],
  131. ['(?i)(ax|test)is$' , '\\1es'],
  132. ['(?i)s$' , 's'],
  133. ['(?i)$' , 's']
  134. ]
  135. uncountable_words = ['equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep']
  136. irregular_words = {
  137. 'person' : 'people',
  138. 'man' : 'men',
  139. 'child' : 'children',
  140. 'sex' : 'sexes',
  141. 'move' : 'moves'
  142. }
  143. lower_cased_word = word.lower();
  144. for uncountable_word in uncountable_words:
  145. if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
  146. return word
  147. for irregular in irregular_words.keys():
  148. match = re.search('('+irregular+')$',word, re.IGNORECASE)
  149. if match:
  150. return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]+irregular_words[irregular][1:], word)
  151. for rule in range(len(rules)):
  152. match = re.search(rules[rule][0], word, re.IGNORECASE)
  153. if match :
  154. groups = match.groups()
  155. for k in range(0,len(groups)) :
  156. if groups[k] == None :
  157. rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
  158. return re.sub(rules[rule][0], rules[rule][1], word)
  159. return word
  160. def singularize (self, word) :
  161. '''Singularizes English nouns.'''
  162. rules = [
  163. ['(?i)(quiz)zes$' , '\\1'],
  164. ['(?i)(matr)ices$' , '\\1ix'],
  165. ['(?i)(vert|ind)ices$' , '\\1ex'],
  166. ['(?i)^(ox)en' , '\\1'],
  167. ['(?i)(alias|status)es$' , '\\1'],
  168. ['(?i)([octop|vir])i$' , '\\1us'],
  169. ['(?i)(cris|ax|test)es$' , '\\1is'],
  170. ['(?i)(shoe)s$' , '\\1'],
  171. ['(?i)(o)es$' , '\\1'],
  172. ['(?i)(bus)es$' , '\\1'],
  173. ['(?i)([m|l])ice$' , '\\1ouse'],
  174. ['(?i)(x|ch|ss|sh)es$' , '\\1'],
  175. ['(?i)(m)ovies$' , '\\1ovie'],
  176. ['(?i)(s)eries$' , '\\1eries'],
  177. ['(?i)([^aeiouy]|qu)ies$' , '\\1y'],
  178. ['(?i)([lr])ves$' , '\\1f'],
  179. ['(?i)(tive)s$' , '\\1'],
  180. ['(?i)(hive)s$' , '\\1'],
  181. ['(?i)([^f])ves$' , '\\1fe'],
  182. ['(?i)(^analy)ses$' , '\\1sis'],
  183. ['(?i)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$' , '\\1\\2sis'],
  184. ['(?i)([ti])a$' , '\\1um'],
  185. ['(?i)(n)ews$' , '\\1ews'],
  186. ['(?i)s$' , ''],
  187. ];
  188. uncountable_words = ['equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep','sms'];
  189. irregular_words = {
  190. 'people' : 'person',
  191. 'men' : 'man',
  192. 'children' : 'child',
  193. 'sexes' : 'sex',
  194. 'moves' : 'move'
  195. }
  196. lower_cased_word = word.lower();
  197. for uncountable_word in uncountable_words:
  198. if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
  199. return word
  200. for irregular in irregular_words.keys():
  201. match = re.search('('+irregular+')$',word, re.IGNORECASE)
  202. if match:
  203. return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]+irregular_words[irregular][1:], word)
  204. for rule in range(len(rules)):
  205. match = re.search(rules[rule][0], word, re.IGNORECASE)
  206. if match :
  207. groups = match.groups()
  208. for k in range(0,len(groups)) :
  209. if groups[k] == None :
  210. rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
  211. return re.sub(rules[rule][0], rules[rule][1], word)
  212. return word
  213. class Inflector:
  214. """
  215. Inflector for pluralizing and singularizing nouns.
  216. It provides methods for helping on creating programs
  217. based on naming conventions like on Ruby on Rails.
  218. """
  219. def __init__( self, Inflector = English ) :
  220. assert callable(Inflector), "Inflector should be a callable obj"
  221. self.Inflector = apply(Inflector);
  222. def pluralize(self, word) :
  223. '''Pluralizes nouns.'''
  224. return self.Inflector.pluralize(word)
  225. def singularize(self, word) :
  226. '''Singularizes nouns.'''
  227. return self.Inflector.singularize(word)
  228. def cond_plural(self, number_of_records, word) :
  229. '''Returns the plural form of a word if first parameter is greater than 1'''
  230. return self.Inflector.cond_plural(number_of_records, word)
  231. def titleize(self, word, uppercase = '') :
  232. '''Converts an underscored or CamelCase word into a sentence.
  233. The titleize function converts text like "WelcomePage",
  234. "welcome_page" or "welcome page" to this "Welcome Page".
  235. If the "uppercase" parameter is set to 'first' it will only
  236. capitalize the first character of the title.'''
  237. return self.Inflector.titleize(word, uppercase)
  238. def camelize(self, word):
  239. ''' Returns given word as CamelCased
  240. Converts a word like "send_email" to "SendEmail". It
  241. will remove non alphanumeric character from the word, so
  242. "who's online" will be converted to "WhoSOnline"'''
  243. return self.Inflector.camelize(word)
  244. def underscore(self, word) :
  245. ''' Converts a word "into_it_s_underscored_version"
  246. Convert any "CamelCased" or "ordinary Word" into an
  247. "underscored_word".
  248. This can be really useful for creating friendly URLs.'''
  249. return self.Inflector.underscore(word)
  250. def humanize(self, word, uppercase = '') :
  251. '''Returns a human-readable string from word
  252. Returns a human-readable string from word, by replacing
  253. underscores with a space, and by upper-casing the initial
  254. character by default.
  255. If you need to uppercase all the words you just have to
  256. pass 'all' as a second parameter.'''
  257. return self.Inflector.humanize(word, uppercase)
  258. def variablize(self, word) :
  259. '''Same as camelize but first char is lowercased
  260. Converts a word like "send_email" to "sendEmail". It
  261. will remove non alphanumeric character from the word, so
  262. "who's online" will be converted to "whoSOnline"'''
  263. return self.Inflector.variablize(word)
  264. def tableize(self, class_name) :
  265. ''' Converts a class name to its table name according to rails
  266. naming conventions. Example. Converts "Person" to "people" '''
  267. return self.Inflector.tableize(class_name)
  268. def classify(self, table_name) :
  269. '''Converts a table name to its class name according to rails
  270. naming conventions. Example: Converts "people" to "Person" '''
  271. return self.Inflector.classify(table_name)
  272. def ordinalize(self, number) :
  273. '''Converts number to its ordinal form.
  274. This method converts 13 to 13th, 2 to 2nd ...'''
  275. return self.Inflector.ordinalize(number)
  276. def unaccent(self, text) :
  277. '''Transforms a string to its unaccented version.
  278. This might be useful for generating "friendly" URLs'''
  279. return self.Inflector.unaccent(text)
  280. def urlize(self, text) :
  281. '''Transform a string its unaccented and underscored
  282. version ready to be inserted in friendly URLs'''
  283. return self.Inflector.urlize(text)
  284. def demodulize(self, module_name) :
  285. return self.Inflector.demodulize(module_name)
  286. def modulize(self, module_description) :
  287. return self.Inflector.modulize(module_description)
  288. def foreignKey(self, class_name, separate_class_name_and_id_with_underscore = 1) :
  289. ''' Returns class_name in underscored form, with "_id" tacked on at the end.
  290. This is for use in dealing with the database.'''
  291. return self.Inflector.foreignKey(class_name, separate_class_name_and_id_with_underscore)
  292. # Copyright (c) 2006 Bermi Ferrer Martinez
  293. # Permission is hereby granted, free of charge, to any person obtaining a copy
  294. # of this software to deal in this software without restriction, including
  295. # without limitation the rights to use, copy, modify, merge, publish,
  296. # distribute, sublicense, and/or sell copies of this software, and to permit
  297. # persons to whom this software is furnished to do so, subject to the following
  298. # condition:
  299. #
  300. # THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  301. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  302. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  303. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  304. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  305. # OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN
  306. # THIS SOFTWARE.