/ svn-cassiopeia --username yuuma.tomita@gmail.com/cas_helper/xldb/utils/parsers/kansuji2arabic.py
Python | 70 lines | 62 code | 5 blank | 3 comment | 22 complexity | 452b943f22fc720ef417c4876228b1d5 MD5 | raw file
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
-
- import re
-
- def kansuji2arabic(text):
- """???????????????"""
-
- KANNUM_PATTERN = re.compile(u'(?P<kansuji>[????????????????????????????????,\d]+)')
-
- index = 0
- while index < len(text):
- matched = KANNUM_PATTERN.search(text[index:])
- if matched:
- kansuji = matched.group('kansuji')
- startindex = matched.start('kansuji') + index
- endindex = matched.end('kansuji') + index
- result = 0
- digit = 1
- numgroup = 1
- kanindex = len(kansuji)
- while kanindex > 0:
- c = kansuji[(kanindex - 1):kanindex]
- kanindex -= 1
- if c == u'??0':
- digit *= 10
- elif c in u'??':
- digit = 10
- elif c == u'?':
- digit = 100
- elif c == u'?':
- digit = 1000
- elif c in u'??':
- numgroup = 10000
- digit = 1
- elif c in u'?':
- numgroup = 10000*10000
- digit = 1
- elif c in u'?':
- numgroup = 10000*10000*10000
- digit = 1
- elif c in u'?,':
- pass
- else:
- if c in u'???1':
- result += digit * numgroup
- elif c in u'???2':
- result += 2 * digit * numgroup
- elif c in u'???3':
- result += 3 * digit * numgroup
- elif c in u'??4':
- result += 4 * digit * numgroup
- elif c in u'??5':
- result += 5 * digit * numgroup
- elif c in u'??6':
- result += 6 * digit * numgroup
- elif c in u'??7':
- result += 7 * digit * numgroup
- elif c in u'??8':
- result += 8 * digit * numgroup
- elif c in u'??9':
- result += 9 * digit * numgroup
- digit *= 10
- text = u'%s%d%s' % (text[:startindex], result, text[endindex:])
- index = startindex + len('%d' % (result,))
- else:
- break
- return text
-
- k2a = lambda str: kansuji2arabic(unicode(str)) #extended shortcut