PageRenderTime 28ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/script/import_scripts/phorum.rb

https://gitlab.com/Ruwan-Ranganath/discourse
Ruby | 218 lines | 134 code | 48 blank | 36 comment | 10 complexity | 4fb247a06324a5623203987e391fb305 MD5 | raw file
  1. require "mysql2"
  2. require File.expand_path(File.dirname(__FILE__) + "/base.rb")
  3. class ImportScripts::Phorum < ImportScripts::Base
  4. PHORUM_DB = "piwik"
  5. TABLE_PREFIX = "pw_"
  6. BATCH_SIZE = 1000
  7. def initialize
  8. super
  9. @client = Mysql2::Client.new(
  10. host: "localhost",
  11. username: "root",
  12. password: "pa$$word",
  13. database: PHORUM_DB
  14. )
  15. end
  16. def execute
  17. import_users
  18. import_categories
  19. import_posts
  20. end
  21. def import_users
  22. puts '', "creating users"
  23. total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first['count']
  24. batches(BATCH_SIZE) do |offset|
  25. results = mysql_query(
  26. "SELECT user_id id, username, email, real_name name, date_added created_at,
  27. date_last_active last_seen_at, admin
  28. FROM #{TABLE_PREFIX}users
  29. WHERE #{TABLE_PREFIX}users.active = 1
  30. LIMIT #{BATCH_SIZE}
  31. OFFSET #{offset};")
  32. break if results.size < 1
  33. create_users(results, total: total_count, offset: offset) do |user|
  34. next if user['username'].blank?
  35. { id: user['id'],
  36. email: user['email'],
  37. username: user['username'],
  38. name: user['name'],
  39. created_at: Time.zone.at(user['created_at']),
  40. last_seen_at: Time.zone.at(user['last_seen_at']),
  41. admin: user['admin'] == 1 }
  42. end
  43. end
  44. end
  45. def import_categories
  46. puts "", "importing categories..."
  47. categories = mysql_query("
  48. SELECT forum_id id, name, description, active
  49. FROM #{TABLE_PREFIX}forums
  50. ORDER BY forum_id ASC
  51. ").to_a
  52. create_categories(categories) do |category|
  53. next if category['active'] == 0
  54. {
  55. id: category['id'],
  56. name: category["name"],
  57. description: category["description"]
  58. }
  59. end
  60. # uncomment below lines to create permalink
  61. # categories.each do |category|
  62. # Permalink.create(url: "list.php?#{category['id']}", category_id: category_id_from_imported_category_id(category['id'].to_i))
  63. # end
  64. end
  65. def import_posts
  66. puts "", "creating topics and posts"
  67. total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}messages").first["count"]
  68. batches(BATCH_SIZE) do |offset|
  69. results = mysql_query("
  70. SELECT m.message_id id,
  71. m.parent_id,
  72. m.forum_id category_id,
  73. m.subject title,
  74. m.user_id user_id,
  75. m.body raw,
  76. m.closed closed,
  77. m.datestamp created_at
  78. FROM #{TABLE_PREFIX}messages m
  79. ORDER BY m.datestamp
  80. LIMIT #{BATCH_SIZE}
  81. OFFSET #{offset};
  82. ").to_a
  83. break if results.size < 1
  84. create_posts(results, total: total_count, offset: offset) do |m|
  85. skip = false
  86. mapped = {}
  87. mapped[:id] = m['id']
  88. mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
  89. mapped[:raw] = process_raw_post(m['raw'], m['id'])
  90. mapped[:created_at] = Time.zone.at(m['created_at'])
  91. if m['parent_id'] == 0
  92. mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i)
  93. mapped[:title] = CGI.unescapeHTML(m['title'])
  94. else
  95. parent = topic_lookup_from_imported_post_id(m['parent_id'])
  96. if parent
  97. mapped[:topic_id] = parent[:topic_id]
  98. else
  99. puts "Parent post #{m['parent_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
  100. skip = true
  101. end
  102. end
  103. skip ? nil : mapped
  104. end
  105. # uncomment below lines to create permalink
  106. # results.each do |post|
  107. # if post['parent_id'] == 0
  108. # topic = topic_lookup_from_imported_post_id(post['id'].to_i)
  109. # Permalink.create(url: "read.php?#{post['category_id']},#{post['id']}", topic_id: topic[:topic_id].to_i)
  110. # end
  111. # end
  112. end
  113. end
  114. def process_raw_post(raw, import_id)
  115. s = raw.dup
  116. # :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
  117. s.gsub!(/<!-- s(\S+) --><img (?:[^>]+) \/><!-- s(?:\S+) -->/, '\1')
  118. # Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
  119. s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
  120. # Many phpbb bbcode tags have a hash attached to them. Examples:
  121. # [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
  122. # [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
  123. s.gsub!(/:(?:\w{8})\]/, ']')
  124. # Remove mybb video tags.
  125. s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
  126. s = CGI.unescapeHTML(s)
  127. # phpBB shortens link text like this, which breaks our markdown processing:
  128. # [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
  129. #
  130. # Work around it for now:
  131. s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
  132. # [QUOTE]...[/QUOTE]
  133. s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
  134. # [URL=...]...[/URL]
  135. s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }
  136. # [IMG]...[/IMG]
  137. s.gsub!(/\[\/?img\]/i, "")
  138. # convert list tags to ul and list=1 tags to ol
  139. # (basically, we're only missing list=a here...)
  140. s.gsub!(/\[list\](.*?)\[\/list\]/m, '[ul]\1[/ul]')
  141. s.gsub!(/\[list=1\](.*?)\[\/list\]/m, '[ol]\1[/ol]')
  142. # convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
  143. s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
  144. # [CODE]...[/CODE]
  145. s.gsub!(/\[\/?code\]/i, "\n```\n")
  146. # [HIGHLIGHT]...[/HIGHLIGHT]
  147. s.gsub!(/\[\/?highlight\]/i, "\n```\n")
  148. # [YOUTUBE]<id>[/YOUTUBE]
  149. s.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
  150. # [youtube=425,350]id[/youtube]
  151. s.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }
  152. # [MEDIA=youtube]id[/MEDIA]
  153. s.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
  154. # [ame="youtube_link"]title[/ame]
  155. s.gsub!(/\[ame="?(.+?)"?\](.+)\[\/ame\]/i) { "\n#{$1}\n" }
  156. # [VIDEO=youtube;<id>]...[/VIDEO]
  157. s.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
  158. # [USER=706]@username[/USER]
  159. s.gsub!(/\[user="?(.+?)"?\](.+)\[\/user\]/i) { $2 }
  160. # Remove the color tag
  161. s.gsub!(/\[color=[#a-z0-9]+\]/i, "")
  162. s.gsub!(/\[\/color\]/i, "")
  163. s.gsub!(/\[hr\]/i, "<hr>")
  164. s
  165. end
  166. def mysql_query(sql)
  167. @client.query(sql, cache_rows: false)
  168. end
  169. end
  170. ImportScripts::Phorum.new.perform