PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/getlyric.rb

https://bitbucket.org/eamoon/bin
Ruby | 252 lines | 190 code | 31 blank | 31 comment | 21 complexity | 1ad660b9a8901652d9c1c3e9bdcbcbf6 MD5 | raw file
  1. #!/usr/bin/env ruby
  2. #
  3. # File: getlyric.rb
  4. # Author: eweb
  5. # Copyright eweb, 2013-2021
  6. # Contents:
  7. #
  8. # Date: Author: Comments:
  9. # 5th Nov 2013 eweb #0008 Maintenance
  10. # 29th Nov 2013 eweb #0008 Match divs over lines
  11. # 24th Jun 2014 eweb #0008 Reorg
  12. # 20th Sep 2014 eweb #0008 tidy html
  13. # 25th Mar 2015 eweb #0008 Apostrophies
  14. # 26th May 2015 eweb #0008 Translate entities
  15. # 7th Sep 2015 eweb #0008 encoding
  16. # 16th Dec 2015 eweb #0008 handle commas
  17. # 29th Dec 2016 eweb #0008 handle exclamation marks
  18. # 28th Oct 2017 eweb #0008 tidy up
  19. # 7th Apr 2018 eweb #0007 rubocop
  20. # 19th Jul 2018 eweb #0008 fetch from genius
  21. # 25th Nov 2018 eweb #0008 return inner_text
  22. # 25th Nov 2018 eweb #0008 dryed up
  23. # 6th Dec 2018 eweb #0008 return inner_html for wikia
  24. # 18th Aug 2019 eweb #0008 wikia moved to fandom
  25. # 29th Nov 2020 eweb #0008 genius first try 3 times
  26. # 9th May 2021 eweb #0007 URI.open
  27. # 4th Jun 2021 eweb #0008 trim hyphens
  28. #
  29. require 'nokogiri'
  30. require 'open-uri'
  31. require 'cgi'
  32. def save_lyrics(lyric)
  33. lyric.gsub!('&', '&')
  34. lyric.gsub!('&lt;', '<')
  35. lyric.gsub!('&gt;', '>')
  36. lyric.gsub!(/&#([0-9]+);/) { $1.to_i.chr }
  37. lyric.gsub!(/\n\n \n\n/, "\n\n")
  38. lyric.gsub!('<i>', '(')
  39. lyric.gsub!('</i>', ')')
  40. puts lyric
  41. puts '**** Contains entities' if lyric[/&.+;/]
  42. IO.popen('pbcopy', 'w').puts lyric
  43. end
  44. def tidy(lyric)
  45. lyric = lyric.gsub(/<div.+?<\/div>/m, '')
  46. lyric = lyric.gsub(/<script.+?<\/script>/m, '')
  47. lyric = lyric.gsub(/<!--.+?-->/m, '')
  48. lyric = lyric.gsub(/<br>\n/, "\n")
  49. lyric = lyric.gsub(/<br>/, "\n")
  50. lyric = lyric.gsub(/<p>/, '')
  51. lyric = lyric.gsub(/<\/p>/, "\n")
  52. lyric.strip
  53. end
  54. def process_lyric(lyric)
  55. lyric = tidy(lyric)
  56. if lyric && lyric != ''
  57. save_lyrics(lyric)
  58. true
  59. end
  60. end
  61. def fetch_lyricsmania
  62. artist = ARGV[0] if ARGV.any?
  63. song = ARGV[1..].join(' ') if ARGV.length > 1
  64. artist = artist.downcase
  65. song = song.downcase
  66. artist.tr!(' ', '_')
  67. artist.gsub!(/[^a-z0-9_]/, '')
  68. song.tr!(' ', '_')
  69. song.gsub!(/[^a-z0-9_]/, '')
  70. url = "http://www.lyricsmania.com/#{song}_lyrics_#{artist}.html"
  71. puts url
  72. begin
  73. doc = Nokogiri::HTML(URI.open(url))
  74. lyric = doc.xpath("id('songlyrics_h')").inner_text
  75. process_lyric(lyric)
  76. rescue StandardError => e
  77. puts "#{e.class} #{e.message}"
  78. end
  79. end
  80. def fetch_lyrics_wikia
  81. artist = ARGV[0].dup if ARGV.any?
  82. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  83. artist.tr!(' ', '_')
  84. song.tr!(' ', '_')
  85. artist = CGI.escape(artist)
  86. song = CGI.escape(song)
  87. url = "https://lyrics.fandom.com/#{artist}:#{song}"
  88. puts url
  89. begin
  90. doc = Nokogiri::HTML(URI.open(url))
  91. lyric = doc.xpath("//div[@class='lyricbox']").inner_html
  92. if lyric =~ /Unfortunately, we are not licensed to display the full lyrics/
  93. lyric = nil
  94. end
  95. if lyric =~ /Category:Instrumental/ && lyric =~ /TrebleClef/
  96. lyric = 'Instrumental'
  97. end
  98. process_lyric(lyric)
  99. rescue StandardError => e
  100. puts "#{e.class} #{e.message}"
  101. end
  102. end
  103. def fetch_lyricsmode
  104. artist = ARGV[0].dup if ARGV.any?
  105. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  106. artist.tr!(' ', '_')
  107. song.tr!(' ', '_')
  108. song.gsub!('?', '%3F')
  109. artist = artist.downcase
  110. song = song.downcase
  111. url = "https://www.lyricsmode.com/lyrics/#{artist[0]}/#{artist}/#{song}.html"
  112. puts url
  113. begin
  114. doc = Nokogiri::HTML(URI.open(url))
  115. lyric = doc.xpath("id('songlyrics_h')").inner_text
  116. process_lyric(lyric)
  117. rescue StandardError => e
  118. puts "#{e.class} #{e.message}"
  119. end
  120. end
  121. def fetch_azlyrics
  122. artist = ARGV[0].dup if ARGV.any?
  123. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  124. artist.tr!(' ', '_')
  125. song.tr!(' ', '_')
  126. song.gsub!('?', '%3F')
  127. artist = artist.downcase
  128. song = song.downcase
  129. # strips leading 'The '
  130. artist.gsub!(/^The /, '')
  131. song.gsub!(/^The /, '')
  132. artist = artist.downcase
  133. song = song.downcase
  134. artist.gsub!(/[^a-z0-9]/, '')
  135. song.gsub!(/[^a-z0-9]/, '')
  136. url = "https://www.azlyrics.com/lyrics/#{artist}/#{song}.html"
  137. puts url
  138. begin
  139. doc = Nokogiri::HTML(URI.open(url))
  140. lyric = doc.xpath("id('songlyrics_h')").inner_text
  141. process_lyric(lyric)
  142. rescue StandardError => e
  143. puts "#{e.class} #{e.message}"
  144. end
  145. end
  146. def fetch_lyricstime
  147. artist = ARGV[0].dup if ARGV.any?
  148. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  149. artist = artist.downcase
  150. song = song.downcase
  151. artist.gsub!(/[^a-z0-9]+/, '-')
  152. song.gsub!(/[^a-z0-9]+/, '-')
  153. url = "http://www.lyricstime.com/#{artist}-#{song}-lyrics.html"
  154. puts url
  155. begin
  156. doc = Nokogiri::HTML(URI.open(url))
  157. lyric = doc.xpath("id('songlyrics')/p").inner_html
  158. process_lyric(lyric)
  159. rescue StandardError => e
  160. puts e
  161. end
  162. end
  163. def fetch_irishmusicdb
  164. artist = ARGV[0].dup if ARGV.any?
  165. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  166. artist = artist.downcase
  167. song = song.downcase
  168. artist.gsub!(/[^a-z0-9]+/, '')
  169. song.gsub!(/[^a-z0-9]+/, '-')
  170. url = "http://irishmusicdb.com/#{artist[0]}/#{artist}"
  171. puts url
  172. begin
  173. doc = Nokogiri::HTML(URI.open(url))
  174. href = doc.css('a').detect { |a| a.attribute('href').to_s =~ /lyrics/ }.attribute('href')
  175. url = "#{url}/#{href}"
  176. puts url
  177. doc = Nokogiri::HTML(URI.open(url))
  178. lyric = doc.xpath("id('songlyrics')/p").inner_text
  179. process_lyric(lyric)
  180. rescue StandardError => e
  181. puts e
  182. end
  183. end
  184. # https://genius.com/Champion-jack-dupree-im-tired-of-moanin-lyrics
  185. # https://genius.com/champion-jack-dupree-im-tired-of-moaning-lyrics
  186. def fetch_genius
  187. artist = ARGV[0].dup if ARGV.any?
  188. song = ARGV[1..].join(' ').dup if ARGV.length > 1
  189. artist = artist.downcase
  190. song = song.downcase
  191. artist.gsub!(/[^a-z0-9]+/, '-')
  192. artist[0] = artist[0].upcase
  193. song.delete!("'")
  194. song.gsub!(/[^a-z0-9]+/, '-')
  195. song.gsub!(/^-/, '')
  196. song.gsub!(/-$/, '')
  197. url = "https://genius.com/#{artist}-#{song}-lyrics"
  198. puts url
  199. begin
  200. 3.times.detect do
  201. doc = Nokogiri::HTML(URI.open(url))
  202. lyric = doc.xpath("//div[@class='lyrics']").inner_text
  203. if process_lyric(lyric)
  204. true
  205. else
  206. puts url
  207. false
  208. end
  209. end
  210. rescue StandardError => e
  211. puts e
  212. end
  213. end
  214. if ARGV.length < 2
  215. elsif fetch_genius
  216. elsif fetch_lyrics_wikia
  217. elsif fetch_lyricsmode
  218. elsif fetch_azlyrics
  219. end