PageRenderTime 38ms CodeModel.GetById 13ms app.highlight 20ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/galaxy/util/inflection.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 388 lines | 373 code | 7 blank | 8 comment | 2 complexity | 648ab339f0132d70a807585674fb6ba6 MD5 | raw file
  1#!/usr/bin/env python
  2
  3# Copyright (c) 2006 Bermi Ferrer Martinez
  4#
  5# bermi a-t bermilabs - com
  6# See the end of this file for the free software, open source license (BSD-style).
  7
  8import re
  9
 10class Base:
 11    '''Locale inflectors must inherit from this base class inorder to provide
 12    the basic Inflector functionality'''
 13
 14    def cond_plural(self, number_of_records, word) :
 15        '''Returns the plural form of a word if first parameter is greater than 1'''
 16
 17        if number_of_records != 1:
 18            return self.pluralize(word)
 19        else :
 20            return word
 21
 22
 23    def titleize(self, word, uppercase = '') :
 24        '''Converts an underscored or CamelCase word into a English sentence.
 25            The titleize function converts text like "WelcomePage",
 26            "welcome_page" or  "welcome page" to this "Welcome Page".
 27            If second parameter is set to 'first' it will only
 28            capitalize the first character of the title.'''
 29
 30        if(uppercase == 'first'):
 31            return self.humanize(self.underscore(word)).capitalize()
 32        else :
 33            return self.humanize(self.underscore(word)).title()
 34
 35
 36    def camelize(self, word):
 37        ''' Returns given word as CamelCased
 38        Converts a word like "send_email" to "SendEmail". It
 39        will remove non alphanumeric character from the word, so
 40        "who's online" will be converted to "WhoSOnline"'''
 41        return ''.join(w[0].upper() + w[1:] for w in re.sub('[^A-Z^a-z^0-9^:]+', ' ', word).split(' '))
 42
 43    def underscore(self, word) :
 44        ''' Converts a word "into_it_s_underscored_version"
 45        Convert any "CamelCased" or "ordinary Word" into an
 46        "underscored_word".
 47        This can be really useful for creating friendly URLs.'''
 48
 49        return  re.sub('[^A-Z^a-z^0-9^\/]+','_', \
 50                re.sub('([a-z\d])([A-Z])','\\1_\\2', \
 51                re.sub('([A-Z]+)([A-Z][a-z])','\\1_\\2', re.sub('::', '/',word)))).lower()
 52
 53
 54    def humanize(self, word, uppercase = '') :
 55        '''Returns a human-readable string from word
 56        Returns a human-readable string from word, by replacing
 57        underscores with a space, and by upper-casing the initial
 58        character by default.
 59        If you need to uppercase all the words you just have to
 60        pass 'all' as a second parameter.'''
 61
 62        if(uppercase == 'first'):
 63            return re.sub('_id$', '', word).replace('_',' ').capitalize()
 64        else :
 65            return re.sub('_id$', '', word).replace('_',' ').title()
 66
 67
 68    def variablize(self, word) :
 69        '''Same as camelize but first char is lowercased
 70        Converts a word like "send_email" to "sendEmail". It
 71        will remove non alphanumeric character from the word, so
 72        "who's online" will be converted to "whoSOnline"'''
 73        word = self.camelize(word)
 74        return word[0].lower()+word[1:]
 75
 76    def tableize(self, class_name) :
 77        ''' Converts a class name to its table name according to rails
 78        naming conventions. Example. Converts "Person" to "people" '''
 79        return self.pluralize(self.underscore(class_name))
 80
 81
 82    def classify(self, table_name) :
 83        '''Converts a table name to its class name according to rails
 84        naming conventions. Example: Converts "people" to "Person" '''
 85        return self.camelize(self.singularize(table_name))
 86
 87
 88    def ordinalize(self, number) :
 89        '''Converts number to its ordinal English form.
 90        This method converts 13 to 13th, 2 to 2nd ...'''
 91        tail = 'th'
 92        if number % 100 == 11 or number % 100 == 12 or number % 100 == 13:
 93            tail = 'th'
 94        elif number % 10 == 1 :
 95            tail = 'st'
 96        elif number % 10 == 2 :
 97            tail = 'nd'
 98        elif number % 10 == 3 :
 99            tail = 'rd'
100
101        return str(number)+tail
102
103
104    def unaccent(self, text) :
105        '''Transforms a string to its unaccented version.
106        This might be useful for generating "friendly" URLs'''
107        find = u'\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5\u00C6\u00C7\u00C8\u00C9\u00CA\u00CB\u00CC\u00CD\u00CE\u00CF\u00D0\u00D1\u00D2\u00D3\u00D4\u00D5\u00D6\u00D8\u00D9\u00DA\u00DB\u00DC\u00DD\u00DE\u00DF\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5\u00E6\u00E7\u00E8\u00E9\u00EA\u00EB\u00EC\u00ED\u00EE\u00EF\u00F0\u00F1\u00F2\u00F3\u00F4\u00F5\u00F6\u00F8\u00F9\u00FA\u00FB\u00FC\u00FD\u00FE\u00FF'
108        replace = u'AAAAAAACEEEEIIIIDNOOOOOOUUUUYTsaaaaaaaceeeeiiiienoooooouuuuyty'
109        return self.string_replace(text, find, replace)
110
111    def string_replace (self, word, find, replace) :
112        '''This function returns a copy of word, translating
113        all occurrences of each character in find to the
114        corresponding character in replace'''
115        for k in range(0,len(find)) :
116            word = re.sub(find[k], replace[k], word)
117
118        return word
119
120    def urlize(self, text) :
121        '''Transform a string its unaccented and underscored
122        version ready to be inserted in friendly URLs'''
123        return re.sub('^_|_$','',self.underscore(self.unaccent(text)))
124
125
126    def demodulize(self, module_name) :
127        return self.humanize(self.underscore(re.sub('^.*::','',module_name)))
128
129    def modulize(self, module_description) :
130        return self.camelize(self.singularize(module_description))
131
132    def foreignKey(self, class_name, separate_class_name_and_id_with_underscore = 1) :
133        ''' Returns class_name in underscored form, with "_id" tacked on at the end.
134        This is for use in dealing with the database.'''
135        if separate_class_name_and_id_with_underscore :
136            tail = '_id'
137        else :
138            tail = 'id'
139        return self.underscore(self.demodulize(class_name))+tail;
140
141
142class English (Base):
143    """
144    Inflector for pluralize and singularize English nouns.
145
146    This is the default Inflector for the Inflector obj
147    """
148
149    def pluralize(self, word) :
150        '''Pluralizes English nouns.'''
151
152        rules = [
153            ['(?i)(quiz)$' , '\\1zes'],
154            ['^(?i)(ox)$' , '\\1en'],
155            ['(?i)([m|l])ouse$' , '\\1ice'],
156            ['(?i)(matr|vert|ind)ix|ex$' , '\\1ices'],
157            ['(?i)(x|ch|ss|sh)$' , '\\1es'],
158            ['(?i)([^aeiouy]|qu)ies$' , '\\1y'],
159            ['(?i)([^aeiouy]|qu)y$' , '\\1ies'],
160            ['(?i)(hive)$' , '\\1s'],
161            ['(?i)(?:([^f])fe|([lr])f)$' , '\\1\\2ves'],
162            ['(?i)sis$' , 'ses'],
163            ['(?i)([ti])um$' , '\\1a'],
164            ['(?i)(buffal|tomat)o$' , '\\1oes'],
165            ['(?i)(bu)s$' , '\\1ses'],
166            ['(?i)(alias|status)' , '\\1es'],
167            ['(?i)(octop|vir)us$' , '\\1i'],
168            ['(?i)(ax|test)is$' , '\\1es'],
169            ['(?i)s$' , 's'],
170            ['(?i)$' , 's']
171        ]
172
173        uncountable_words = ['equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep']
174
175        irregular_words = {
176            'person' : 'people',
177            'man' : 'men',
178            'child' : 'children',
179            'sex' : 'sexes',
180            'move' : 'moves'
181        }
182
183        lower_cased_word = word.lower();
184
185        for uncountable_word in uncountable_words:
186            if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
187                return word
188
189        for irregular in irregular_words.keys():
190            match = re.search('('+irregular+')$',word, re.IGNORECASE)
191            if match:
192                return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]+irregular_words[irregular][1:], word)
193
194        for rule in range(len(rules)):
195            match = re.search(rules[rule][0], word, re.IGNORECASE)
196            if match :
197                groups = match.groups()
198                for k in range(0,len(groups)) :
199                    if groups[k] == None :
200                        rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
201
202                return re.sub(rules[rule][0], rules[rule][1], word)
203
204        return word
205
206
207    def singularize (self, word) :
208        '''Singularizes English nouns.'''
209
210        rules = [
211            ['(?i)(quiz)zes$' , '\\1'],
212            ['(?i)(matr)ices$' , '\\1ix'],
213            ['(?i)(vert|ind)ices$' , '\\1ex'],
214            ['(?i)^(ox)en' , '\\1'],
215            ['(?i)(alias|status)es$' , '\\1'],
216            ['(?i)([octop|vir])i$' , '\\1us'],
217            ['(?i)(cris|ax|test)es$' , '\\1is'],
218            ['(?i)(shoe)s$' , '\\1'],
219            ['(?i)(o)es$' , '\\1'],
220            ['(?i)(bus)es$' , '\\1'],
221            ['(?i)([m|l])ice$' , '\\1ouse'],
222            ['(?i)(x|ch|ss|sh)es$' , '\\1'],
223            ['(?i)(m)ovies$' , '\\1ovie'],
224            ['(?i)(s)eries$' , '\\1eries'],
225            ['(?i)([^aeiouy]|qu)ies$' , '\\1y'],
226            ['(?i)([lr])ves$' , '\\1f'],
227            ['(?i)(tive)s$' , '\\1'],
228            ['(?i)(hive)s$' , '\\1'],
229            ['(?i)([^f])ves$' , '\\1fe'],
230            ['(?i)(^analy)ses$' , '\\1sis'],
231            ['(?i)((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$' , '\\1\\2sis'],
232            ['(?i)([ti])a$' , '\\1um'],
233            ['(?i)(n)ews$' , '\\1ews'],
234            ['(?i)s$' , ''],
235        ];
236
237        uncountable_words = ['equipment', 'information', 'rice', 'money', 'species', 'series', 'fish', 'sheep','sms'];
238
239        irregular_words = {
240            'people' : 'person',
241            'men' : 'man',
242            'children' : 'child',
243            'sexes' : 'sex',
244            'moves' : 'move'
245        }
246
247        lower_cased_word = word.lower();
248
249        for uncountable_word in uncountable_words:
250            if lower_cased_word[-1*len(uncountable_word):] == uncountable_word :
251                return word
252
253        for irregular in irregular_words.keys():
254            match = re.search('('+irregular+')$',word, re.IGNORECASE)
255            if match:
256                return re.sub('(?i)'+irregular+'$', match.expand('\\1')[0]+irregular_words[irregular][1:], word)
257
258
259        for rule in range(len(rules)):
260            match = re.search(rules[rule][0], word, re.IGNORECASE)
261            if match :
262                groups = match.groups()
263                for k in range(0,len(groups)) :
264                    if groups[k] == None :
265                        rules[rule][1] = rules[rule][1].replace('\\'+str(k+1), '')
266
267                return re.sub(rules[rule][0], rules[rule][1], word)
268
269        return word
270
271
272class Inflector:
273    """
274    Inflector for pluralizing and singularizing nouns.
275
276    It provides methods for helping on creating programs
277    based on naming conventions like on Ruby on Rails.
278    """
279
280    def __init__( self, Inflector = English ) :
281        assert callable(Inflector), "Inflector should be a callable obj"
282        self.Inflector = apply(Inflector);
283
284    def pluralize(self, word) :
285        '''Pluralizes nouns.'''
286        return self.Inflector.pluralize(word)
287
288    def singularize(self, word) :
289        '''Singularizes nouns.'''
290        return self.Inflector.singularize(word)
291
292    def cond_plural(self, number_of_records, word) :
293        '''Returns the plural form of a word if first parameter is greater than 1'''
294        return self.Inflector.cond_plural(number_of_records, word)
295
296    def titleize(self, word, uppercase = '') :
297        '''Converts an underscored or CamelCase word into a sentence.
298            The titleize function converts text like "WelcomePage",
299            "welcome_page" or  "welcome page" to this "Welcome Page".
300            If the "uppercase" parameter is set to 'first' it will only
301            capitalize the first character of the title.'''
302        return self.Inflector.titleize(word, uppercase)
303
304    def camelize(self, word):
305        ''' Returns given word as CamelCased
306        Converts a word like "send_email" to "SendEmail". It
307        will remove non alphanumeric character from the word, so
308        "who's online" will be converted to "WhoSOnline"'''
309        return self.Inflector.camelize(word)
310
311    def underscore(self, word) :
312        ''' Converts a word "into_it_s_underscored_version"
313        Convert any "CamelCased" or "ordinary Word" into an
314        "underscored_word".
315        This can be really useful for creating friendly URLs.'''
316        return self.Inflector.underscore(word)
317
318    def humanize(self, word, uppercase = '') :
319        '''Returns a human-readable string from word
320        Returns a human-readable string from word, by replacing
321        underscores with a space, and by upper-casing the initial
322        character by default.
323        If you need to uppercase all the words you just have to
324        pass 'all' as a second parameter.'''
325        return self.Inflector.humanize(word, uppercase)
326
327
328    def variablize(self, word) :
329        '''Same as camelize but first char is lowercased
330        Converts a word like "send_email" to "sendEmail". It
331        will remove non alphanumeric character from the word, so
332        "who's online" will be converted to "whoSOnline"'''
333        return self.Inflector.variablize(word)
334
335    def tableize(self, class_name) :
336        ''' Converts a class name to its table name according to rails
337        naming conventions. Example. Converts "Person" to "people" '''
338        return self.Inflector.tableize(class_name)
339
340    def classify(self, table_name) :
341        '''Converts a table name to its class name according to rails
342        naming conventions. Example: Converts "people" to "Person" '''
343        return self.Inflector.classify(table_name)
344
345    def ordinalize(self, number) :
346        '''Converts number to its ordinal form.
347        This method converts 13 to 13th, 2 to 2nd ...'''
348        return self.Inflector.ordinalize(number)
349
350
351    def unaccent(self, text) :
352        '''Transforms a string to its unaccented version.
353        This might be useful for generating "friendly" URLs'''
354        return self.Inflector.unaccent(text)
355
356    def urlize(self, text) :
357        '''Transform a string its unaccented and underscored
358        version ready to be inserted in friendly URLs'''
359        return self.Inflector.urlize(text)
360
361
362    def demodulize(self, module_name) :
363        return self.Inflector.demodulize(module_name)
364
365    def modulize(self, module_description) :
366        return self.Inflector.modulize(module_description)
367
368    def foreignKey(self, class_name, separate_class_name_and_id_with_underscore = 1) :
369        ''' Returns class_name in underscored form, with "_id" tacked on at the end.
370        This is for use in dealing with the database.'''
371        return self.Inflector.foreignKey(class_name, separate_class_name_and_id_with_underscore)
372
373
374# Copyright (c) 2006 Bermi Ferrer Martinez
375# Permission is hereby granted, free of charge, to any person obtaining a copy
376# of this software to deal in this software without restriction, including
377# without limitation the rights to use, copy, modify, merge, publish,
378# distribute, sublicense, and/or sell copies of this software, and to permit
379# persons to whom this software is furnished to do so, subject to the following
380# condition:
381#
382# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
383# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
384# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
385# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
386# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
387# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN
388# THIS SOFTWARE.