PageRenderTime 36ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/mnemosyne/mnemosyne/example_scripts/import_rosetta_stone.py

https://gitlab.com/sampablokuper/mnemosyne-proj
Python | 165 lines | 114 code | 22 blank | 29 comment | 30 complexity | 36ca5cfa486658d45f052a5d4bbbd879 MD5 | raw file
  1. #
  2. # import_rosetta_stone.py <Peter.Bienstman>
  3. #
  4. # Script to import audio and pictures from the Rosetta Stone V2 into a Sentence
  5. # card type.
  6. # Adapt it your own need. It uses linux external tools, so it needs to be
  7. # modified to run under Windows.
  8. import os
  9. import shutil
  10. from mnemosyne.script import Mnemosyne
  11. # 'data_dir = None' will use the default sysem location, edit as appropriate.
  12. data_dir = None
  13. mnemosyne = Mnemosyne(data_dir)
  14. # Answer questions coming from libmnemosyne.
  15. def show_question(question, option0, option1, option2):
  16. # Aswer 'no' when adding duplicate cards.
  17. if question.startswith("There is already"):
  18. return 2
  19. # Answer 'yes' for making tag active.
  20. if question.startswith("Make tag"):
  21. return 0
  22. else:
  23. raise NotImplementedError
  24. mnemosyne.main_widget().show_question = show_question
  25. # This script will add tags like TRS Arabic::Unit 1::Lesson 1
  26. tag_prefix = "TRS Arabic"
  27. # Card type.
  28. card_type = mnemosyne.card_type_with_id("6::Arabic MSA sentences")
  29. # Directory containing foreign language, with directories like ARA01_01
  30. # and PCT01_01
  31. foreign_directory = "/home/pbienst/tmp/trs_arabic"
  32. # Directory containing native language, to generate translations.
  33. native_directory = "/home/pbienst/tmp/trs_english"
  34. # Subdirectory in the media directory to used.
  35. media_subdir = "trs_ara"
  36. full_media_subdir = os.path.join(mnemosyne.database().media_dir(), media_subdir)
  37. if not os.path.exists(full_media_subdir):
  38. os.mkdir(full_media_subdir)
  39. # Codec that was used to encode the foreign language.
  40. foreign_codec = "iso-8859-6"
  41. native_codec = "latin-1"
  42. # Extract txt.
  43. def get_txt(directory, codec):
  44. txt = {}
  45. for path in sorted(os.listdir(directory)):
  46. subdir = os.path.join(directory, path)
  47. if os.path.isdir(subdir) and not path.startswith("PCT"):
  48. # Determine unit and lesson number.
  49. unit, lesson = path[3:].split("_")
  50. unit = int(unit)
  51. if unit not in txt:
  52. txt[unit] = {}
  53. lesson = int(lesson)
  54. # Determine sentences.
  55. txt_file = file(os.path.join(subdir,
  56. [x for x in os.listdir(subdir) if x.endswith(".TXT")][0]))
  57. entries = unicode(txt_file.read(), codec, errors="ignore") \
  58. .replace(unichr(336), "\'") \
  59. .replace(unichr(213), "\'") \
  60. .replace(unichr(210), "\"") \
  61. .replace(unichr(211), "\"") \
  62. .split("@")[1:-1]
  63. assert len(entries) == 40
  64. txt[unit][lesson] = entries
  65. return txt
  66. foreign_txt = get_txt(foreign_directory, foreign_codec)
  67. native_txt = get_txt(native_directory, native_codec)
  68. # Extract images.
  69. def extract_images(directory):
  70. images = {}
  71. for path in sorted(os.listdir(directory)):
  72. subdir = os.path.join(directory, path)
  73. if os.path.isdir(subdir) and path.startswith("PCT"):
  74. # Detemine unit and lesson number.
  75. unit, lesson = path[3:].split("_")
  76. unit = int(unit)
  77. if unit not in images:
  78. images[unit] = {}
  79. lesson = int(lesson)
  80. img_dir = os.path.join(subdir,
  81. [x for x in os.listdir(subdir) if x.startswith("P")][0])
  82. img_list = []
  83. for img in sorted(os.listdir(img_dir)):
  84. full_path = os.path.join(img_dir, img)
  85. if img.endswith("JPG"):
  86. shutil.copyfile(full_path, os.path.join(full_media_subdir, img))
  87. img_list.append(media_subdir + "/" + img)
  88. if img.endswith("PCT"):
  89. os.system("convert " + full_path + " " + \
  90. os.path.join(full_media_subdir, img).replace("PCT", "JPG"))
  91. img_list.append(\
  92. media_subdir + "/" + img.replace("PCT", "JPG"))
  93. images[unit][lesson] = img_list
  94. return images
  95. images = extract_images(foreign_directory)
  96. # Extract sound.
  97. def extract_sound(directory):
  98. sound = {}
  99. for path in sorted(os.listdir(directory)):
  100. subdir = os.path.join(directory, path)
  101. if os.path.isdir(subdir) and not path.startswith("PCT"):
  102. # Determine unit and lesson number.
  103. unit, lesson = path[3:].split("_")
  104. unit = int(unit)
  105. if unit not in sound:
  106. sound[unit] = {}
  107. lesson = int(lesson)
  108. snd_dir = os.path.join(subdir,
  109. [x for x in os.listdir(subdir) if x.endswith("S")][0])
  110. snd_list = []
  111. for snd in sorted(os.listdir(snd_dir)):
  112. full_path = os.path.join(snd_dir, snd)
  113. if snd.endswith("SWA"):
  114. os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \
  115. + full_path + " && lame audiodump.wav " + \
  116. os.path.join(full_media_subdir, snd).replace("SWA", "MP3"))
  117. # High bitrate version, not really needed.
  118. #os.system("mplayer -vo null -vc dummy -af resample=44100 -ao pcm:waveheader " \
  119. # + full_path + " && lame -h --resample 44.1 -b 128 audiodump.wav " + \
  120. # os.path.join(full_media_subdir, snd).replace("SWA", "MP3"))
  121. snd_list.append(\
  122. media_subdir + "/" + snd.replace("SWA", "MP3"))
  123. sound[unit][lesson] = snd_list
  124. return sound
  125. sound = extract_sound(foreign_directory)
  126. for unit in foreign_txt:
  127. for lesson in foreign_txt[unit]:
  128. print "unit", unit, "lesson", lesson
  129. for i in range(40):
  130. print foreign_txt[unit][lesson][i]
  131. print native_txt[unit][lesson][i].replace(unichr(336), "\'")
  132. print images[unit][lesson][i]
  133. print sound[unit][lesson][i]
  134. print
  135. fact_data = {"f": "["+foreign_txt[unit][lesson][i] + "]",
  136. "p_1": "<audio src=\"" + sound[unit][lesson][i] + "\">",
  137. "m_1": native_txt[unit][lesson][i] + \
  138. "\n<img src=\"" + images[unit][lesson][i] + "\">"}
  139. mnemosyne.controller().create_new_cards(fact_data,
  140. card_type, grade=-1, tag_names=[tag_prefix + "::Unit " + str(unit)\
  141. + "::Lesson " + str(lesson)])
  142. print
  143. mnemosyne.finalise()