PageRenderTime 26ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/ svn-cassiopeia --username yuuma.tomita@gmail.com/cas_helper/xldb/utils/parsers/kansuji2arabic.py

http://svn-cassiopeia.googlecode.com/
Python | 70 lines | 62 code | 5 blank | 3 comment | 22 complexity | 452b943f22fc720ef417c4876228b1d5 MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import re
  4. def kansuji2arabic(text):
  5. """???????????????"""
  6. KANNUM_PATTERN = re.compile(u'(?P<kansuji>[????????????????????????????????,\d]+)')
  7. index = 0
  8. while index < len(text):
  9. matched = KANNUM_PATTERN.search(text[index:])
  10. if matched:
  11. kansuji = matched.group('kansuji')
  12. startindex = matched.start('kansuji') + index
  13. endindex = matched.end('kansuji') + index
  14. result = 0
  15. digit = 1
  16. numgroup = 1
  17. kanindex = len(kansuji)
  18. while kanindex > 0:
  19. c = kansuji[(kanindex - 1):kanindex]
  20. kanindex -= 1
  21. if c == u'??0':
  22. digit *= 10
  23. elif c in u'??':
  24. digit = 10
  25. elif c == u'?':
  26. digit = 100
  27. elif c == u'?':
  28. digit = 1000
  29. elif c in u'??':
  30. numgroup = 10000
  31. digit = 1
  32. elif c in u'?':
  33. numgroup = 10000*10000
  34. digit = 1
  35. elif c in u'?':
  36. numgroup = 10000*10000*10000
  37. digit = 1
  38. elif c in u'?,':
  39. pass
  40. else:
  41. if c in u'???1':
  42. result += digit * numgroup
  43. elif c in u'???2':
  44. result += 2 * digit * numgroup
  45. elif c in u'???3':
  46. result += 3 * digit * numgroup
  47. elif c in u'??4':
  48. result += 4 * digit * numgroup
  49. elif c in u'??5':
  50. result += 5 * digit * numgroup
  51. elif c in u'??6':
  52. result += 6 * digit * numgroup
  53. elif c in u'??7':
  54. result += 7 * digit * numgroup
  55. elif c in u'??8':
  56. result += 8 * digit * numgroup
  57. elif c in u'??9':
  58. result += 9 * digit * numgroup
  59. digit *= 10
  60. text = u'%s%d%s' % (text[:startindex], result, text[endindex:])
  61. index = startindex + len('%d' % (result,))
  62. else:
  63. break
  64. return text
  65. k2a = lambda str: kansuji2arabic(unicode(str)) #extended shortcut