PageRenderTime 47ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/Programs/guitar_format_converter/chordpro_parser.py

https://gitlab.com/petern3/guitar_music
Python | 348 lines | 336 code | 6 blank | 6 comment | 0 complexity | 9bcaddfe8c5b9c818483608f48e64a58 MD5 | raw file
  1. '''
  2. '''
  3. import re
  4. BLANK = "Blank"
  5. CHORD = "Chord"
  6. LYRIC = "Lyric"
  7. LABEL = "Label"
  8. INFO = "Info"
  9. TAB = "Tab"
  10. CHORD_REGEX = re.compile("(?P<chord>(?P<root>[A-G][#b]?)(?P<mod>(m|sus|add|maj|dim|aug)?[0-9]?)*(?P<bass>\/[A-G][#b])?)")
  11. INSTRUCTION_REGEX = re.compile("\(?(?P<instruction>[Rr]ing|[Mm]ute|[Rr]ake|PM|pm)\)?")
  12. LABEL_TYPES = [
  13. "intro",
  14. "verse",
  15. "chorus",
  16. "bridge",
  17. "solo",
  18. "tab",
  19. "tag",
  20. "ending",
  21. "outro",
  22. "repeat",
  23. ]
  24. INFO_TYPES = [
  25. "title",
  26. "subtitle",
  27. "author",
  28. "album",
  29. "copyright",
  30. "ccli",
  31. "capo",
  32. "key",
  33. "time",
  34. "tempo",
  35. ]
  36. def chord_regex_gen():
  37. ''' Generates the chord regular expression '''
  38. regex = "( *({0}|{1}))+".format(CHORD_REGEX.pattern, INSTRUCTION_REGEX.pattern)
  39. return re.compile(regex)
  40. def label_regex_gen():
  41. ''' Generates the label regular expression '''
  42. middle = ""
  43. for label in LABEL_TYPES:
  44. middle = middle + label + "|"
  45. regex = " *(?P<pre>pre)?[ \-]?(?P<label>{0}) *(?P<value>x?[0-9]*)".format(middle[:-1])
  46. return re.compile(regex, re.IGNORECASE)
  47. def info_regex_gen():
  48. ''' Generates the info regular expression '''
  49. middle = ""
  50. for info in INFO_TYPES:
  51. middle = middle + info + "|"
  52. regex = " *(?P<info>{0})\W+(?P<value>[0-9]+|.+)".format(middle[:-1])
  53. return re.compile(regex, re.IGNORECASE)
  54. def tab_regex_gen():
  55. ''' Generates the tab regular expression '''
  56. regex = "\s*(\|{0,2}[A-Gb]?\|{0,2}[-x0-9|:]{4,})"
  57. return re.compile(regex)
  58. LINE_TYPES = {
  59. BLANK: re.compile("(\s*)"),
  60. CHORD: chord_regex_gen(),
  61. LABEL: label_regex_gen(),
  62. INFO: info_regex_gen(),
  63. TAB: tab_regex_gen()
  64. }
  65. def find_inline_type(text):
  66. ''' Decides whether text is a chord or instruction '''
  67. if CHORD_REGEX.fullmatch(text) is not None:
  68. return "Chord"
  69. if INSTRUCTION_REGEX.fullmatch(text) is not None:
  70. return "Instruction"
  71. return "Inline"
  72. def find_line_type(text):
  73. ''' Takes an estimate of the type of line it is '''
  74. for (line_type, line_regex) in LINE_TYPES.items():
  75. if line_regex.fullmatch(text):
  76. return line_type
  77. return LYRIC
  78. def create_line(text):
  79. ''' Creates a line of the correct category '''
  80. category = find_line_type(text)
  81. return eval("{}Line({})".format(category, repr(text)))
  82. class Inline(object):
  83. def __init__(self, text, spacing=0, category="Inline"):
  84. self.text = text
  85. self.spacing = spacing
  86. self.category = category
  87. if self.category != "Inline":
  88. assert(find_inline_type(self.text) == self.category)
  89. def __repr__(self):
  90. ''' Representation of the object '''
  91. return "{}('{}', {})".format(self.category, self.text, self.spacing)
  92. def __str__(self):
  93. ''' String representation of the object '''
  94. return self.get_chopro()
  95. def get_chopro(self):
  96. ''' Returns the ChordPro representation of the object '''
  97. return "[{}]".format(self.text)
  98. class Instruction(Inline):
  99. def __init__(self, text, spacing=0):
  100. super().__init__(text, spacing, "Instruction")
  101. def get_plaintext(self):
  102. ''' Returns the plain text representation of the object '''
  103. return "({})".format(self.text)
  104. class Chord(Inline):
  105. def __init__(self, text, spacing=0):
  106. super().__init__(text, spacing, "Chord")
  107. self.find_chord_parts()
  108. def find_chord_parts(self):
  109. ''' Extracts the chord parts '''
  110. group_dict = CHORD_REGEX.fullmatch(self.text).groupdict()
  111. self.root = group_dict['root']
  112. self.mod = group_dict['mod']
  113. self.bass = group_dict['bass']
  114. def transpose(self, semitones):
  115. ''' Transposes the chord '''
  116. pass
  117. def get_plaintext(self):
  118. ''' Returns the plain text representation of the object '''
  119. # TODO: Extract from root, mod and bass
  120. return "{}".format(self.text)
  121. class Line(object):
  122. ''' Contains a line of a song '''
  123. def __init__(self, text="", category=""):
  124. self.text = text.rstrip()
  125. self.category = category # find_line_type(self.text)
  126. if self.category != "":
  127. assert(find_line_type(self.text) == self.category)
  128. def __repr__(self):
  129. ''' Representation of the object '''
  130. return "{}Line('{}')".format(self.category, self.text)
  131. def __str__(self):
  132. ''' String representation of the object '''
  133. return self.get_chopro()
  134. def get_chopro(self):
  135. ''' Placeholder for when is cast into a specific line '''
  136. return self.text
  137. class BlankLine(Line):
  138. def __init__(self, text=""):
  139. super().__init__(text, BLANK)
  140. def __repr__(self):
  141. ''' Representation of the object '''
  142. return "{}Line()".format(self.category)
  143. def get_chopro(self):
  144. ''' Returns the ChordPro representation of the object '''
  145. return self.get_plaintext()
  146. def get_plaintext(self):
  147. ''' Returns the plain text representation of the object '''
  148. return ""
  149. class ChordLine(Line):
  150. def __init__(self, text):
  151. super().__init__(text, CHORD)
  152. self.find_chords()
  153. def find_chords(self):
  154. ''' Extracts the chords '''
  155. #~ line_list = self.text.split(" ")
  156. chord_list = []
  157. count = 0
  158. count_latched = 0
  159. inline_word = ""
  160. for char in self.text + " ":
  161. if char != " ":
  162. if inline_word == "": # Just started a word
  163. count_latched = count
  164. inline_word += char
  165. else:
  166. if inline_word != "": # Just ended a word
  167. # Now process the inline word
  168. chord = CHORD_REGEX.fullmatch(inline_word)
  169. instruction = INSTRUCTION_REGEX.fullmatch(inline_word)
  170. if chord is not None:
  171. chord_list.append(Chord(chord.groupdict()["chord"], count_latched))
  172. elif instruction is not None:
  173. chord_list.append(Instruction(instruction.groupdict()["instruction"].lower(), count))
  174. inline_word = ""
  175. count += 1
  176. self.chord_list = chord_list
  177. def get_chopro(self):
  178. ''' Returns the ChordPro representation of the object '''
  179. return self.text # TODO: Convert this to processed version
  180. def get_plaintext(self):
  181. ''' Returns the plain text representation of the object '''
  182. return self.text # TODO: Convert this to processed version
  183. class LyricLine(Line):
  184. def __init__(self, text, chords=None):
  185. super().__init__(text, LYRIC)
  186. self.chords = chords
  187. def __repr__(self):
  188. ''' Representation of the object '''
  189. return "{}Line('{}', {})".format(self.category, self.text, repr(self.chords))
  190. def set_chords(self, chords):
  191. ''' Sets the chords associated with the lyrics '''
  192. assert(isinstance(chords, ChordLine))
  193. self.chords = chords
  194. def get_chopro(self):
  195. ''' Returns the ChordPro representation of the object '''
  196. ### TODO: Maybe switch this with get_plaintext (and in other Lines)?
  197. new_text = self.text
  198. if self.chords is not None:
  199. for chord in reversed(self.chords.chord_list):
  200. pre_chord = new_text[0:chord.spacing]
  201. post_chord = new_text[chord.spacing:]
  202. new_text = pre_chord + str(chord) + post_chord
  203. return new_text
  204. def get_plaintext(self):
  205. ''' Returns the plain text representation of the object '''
  206. ### TODO: Actually write this function
  207. new_text = self.text
  208. if self.chords is not None:
  209. for chord in reversed(self.chords.chord_list):
  210. pre_chord = new_text[0:chord.spacing]
  211. post_chord = new_text[chord.spacing:]
  212. new_text = pre_chord + str(chord) + post_chord
  213. return new_text
  214. class LabelLine(Line):
  215. def __init__(self, text):
  216. super().__init__(text, LABEL)
  217. self.find_label()
  218. def __repr__(self):
  219. ''' Representation of the object '''
  220. return "{}Line('{} {}')".format(self.category, self.label, self.value)
  221. def find_label(self):
  222. ''' Extracts the label type '''
  223. group_dict = LINE_TYPES[self.category].fullmatch(self.text).groupdict()
  224. ## Get Label Type
  225. if group_dict["pre"] is not None:
  226. self.label = "PRE-" + group_dict["label"].upper()
  227. else:
  228. self.label = group_dict["label"].upper()
  229. ## Get Value
  230. if group_dict["value"].isdigit():
  231. self.value = int(group_dict["value"])
  232. elif group_dict["value"] == "":
  233. self.value = None
  234. else:
  235. self.value = group_dict["value"]
  236. def get_chopro(self):
  237. ''' Returns the ChordPro representation of the object '''
  238. if self.value==None:
  239. return "{{c:{}}}".format(self.label)
  240. else:
  241. return "{{c:{} {}}}".format(self.label, self.value)
  242. def get_plaintext(self):
  243. ''' Returns the plain text representation of the object '''
  244. if self.value==None:
  245. return "{}".format(self.label)
  246. else:
  247. return "{} {}".format(self.label, self.value)
  248. class InfoLine(Line):
  249. def __init__(self, text):
  250. super().__init__(text, INFO)
  251. self.find_info()
  252. def __repr__(self):
  253. ''' Representation of the object '''
  254. return "{}Line('{}:{}')".format(self.category, self.info, self.value)
  255. def find_info(self):
  256. ''' Extracts the info type '''
  257. group_dict = LINE_TYPES[self.category].fullmatch(self.text).groupdict()
  258. ## Get Info
  259. self.info = group_dict["info"].lower()
  260. ## Get Value
  261. if group_dict["value"].isdigit():
  262. self.value = int(group_dict["value"])
  263. else:
  264. self.value = group_dict["value"]
  265. def get_chopro(self):
  266. ''' Returns the ChordPro representation of the object '''
  267. return "{{{}:{}}}".format(self.info, self.value)
  268. def get_plaintext(self):
  269. ''' Returns the plain text representation of the object '''
  270. return "{}: {}".format(self.info, self.value)
  271. class TabLine(Line):
  272. def __init__(self, text):
  273. super().__init__(text, TAB)
  274. if __name__ == "__main__":
  275. import doctest
  276. doctest.testmod(verbose=True)
  277. #~ doctest.testfile("regex_doctests.py")