PageRenderTime 51ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/Assets/scrapers/metadata.imdb.com/imdb.xml

#
XML | 246 lines | 246 code | 0 blank | 0 comment | 0 complexity | 9283fd5c66bf0abcc5691359ff82d2bf MD5 | raw file
Possible License(s): GPL-2.0
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <scraper framework="1.1" date="2011-10-28">
  3. <NfoUrl dest="3">
  4. <RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/title/tt\1/&lt;/url&gt;&lt;id&gt;tt\1&lt;/id&gt;" dest="3">
  5. <expression clear="yes" noclean="1">imdb....?/Title\?([0-9]*)</expression>
  6. </RegExp>
  7. <RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/title/tt\1/&lt;/url&gt;&lt;id&gt;tt\1&lt;/id&gt;" dest="3+">
  8. <expression noclean="1">imdb....?/title/tt([0-9]*)</expression>
  9. </RegExp>
  10. </NfoUrl>
  11. <CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
  12. <RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/find?s=tt;q=\1$$4&lt;/url&gt;" dest="3">
  13. <RegExp input="$$2" output="%20(\1)" dest="4">
  14. <expression clear="yes">(.+)</expression>
  15. </RegExp>
  16. <expression noclean="1"/>
  17. </RegExp>
  18. </CreateSearchUrl>
  19. <GetSearchResults dest="8">
  20. <RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
  21. <RegExp input="$$1" output="\1" dest="7">
  22. <expression clear="yes">/title/([t0-9]*)/(combined|faq|releaseinfo|vote)</expression>
  23. </RegExp>
  24. <RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\1&lt;/title&gt;&lt;year&gt;\2&lt;/year&gt;&lt;url cache=&quot;$$7-main.html&quot;&gt;http://akas.imdb.com/title/$$7/&lt;/url&gt;&lt;id&gt;$$7&lt;/id&gt;&lt;/entity&gt;" dest="5">
  25. <expression clear="yes" noclean="1">&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  26. </RegExp>
  27. <RegExp input="$$1" output="\1" dest="4">
  28. <expression noclean="1">(&gt;&lt;a href=&quot;/title.*)</expression>
  29. </RegExp>
  30. <RegExp input="$$4" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;year&gt;\3&lt;/year&gt;&lt;url cache=&quot;\1-main.html&quot;&gt;http://akas.imdb.com/title/\1/&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
  31. <expression repeat="yes" noclean="1,2">&gt;&lt;a href=&quot;/title/([t0-9]*)/[^&gt;]*&gt;(?:&amp;#x22;)?([^&lt;]*?)(?:&amp;#x22;)?&lt;/a&gt; *\([^\(]*?([0-9]{4})</expression>
  32. </RegExp>
  33. <expression clear="yes" noclean="1"/>
  34. </RegExp>
  35. </GetSearchResults>
  36. <GetDetails dest="3">
  37. <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
  38. <RegExp input="$$2" output="&lt;id&gt;\1&lt;/id&gt;" dest="5">
  39. <expression/>
  40. </RegExp>
  41. <RegExp input="$$1" output="&lt;originaltitle&gt;\2&lt;/originaltitle&gt;" dest="5+">
  42. <expression>&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  43. </RegExp>
  44. <RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBAKATitlesById&quot;&gt;\1&lt;/chain&gt;" dest="5+">
  45. <expression>(tt[t0-9]*)</expression>
  46. </RegExp>
  47. <RegExp input="$$1" output="&lt;year&gt;\2&lt;/year&gt;" dest="5+">
  48. <expression>&lt;meta name=&quot;title&quot; content=&quot;([^&quot;]*?) \([^\(]*?([0-9]{4})\)</expression>
  49. </RegExp>
  50. <RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
  51. <expression>Top 250 #([0-9]*)&lt;/</expression>
  52. </RegExp>
  53. <RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
  54. <expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\n?&lt;span itemprop=&quot;contentRating&quot;&gt;([^&lt;]*)</expression>
  55. </RegExp>
  56. <RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
  57. <expression>&lt;h4[^&gt;]*?&gt;Taglines:&lt;/h4&gt;\n?([^&lt;]*)</expression>
  58. </RegExp>
  59. <RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
  60. <expression trim="1">&lt;h4[^&gt;]*?&gt;Runtime:&lt;/h4&gt;[^0-9]*([0-9]+)</expression>
  61. </RegExp>
  62. <RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBRatingById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  63. <expression/>
  64. </RegExp>
  65. <RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBGenresById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  66. <expression/>
  67. </RegExp>
  68. <RegExp input="$$1" output="&lt;country&gt;\1&lt;/country&gt;" dest="5+">
  69. <expression repeat="yes">"/country/[^&gt;]+&gt;([^&lt;]+)&lt;/a&gt;</expression>
  70. </RegExp>
  71. <RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
  72. <expression>"/company/[^&gt;]+&gt;([^&lt;]+)&lt;/a&gt;</expression>
  73. </RegExp>
  74. <RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
  75. <expression trim="1">&lt;p&gt;\n&lt;p itemprop=&quot;description&quot;&gt;([^&lt;]+)&lt;/p&gt;</expression>
  76. </RegExp>
  77. <RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBPlotById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  78. <expression/>
  79. </RegExp>
  80. <RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBScaledCastById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  81. <expression/>
  82. </RegExp>
  83. <RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBDirectorsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  84. <expression/>
  85. </RegExp>
  86. <RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBWritersById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  87. <expression/>
  88. </RegExp>
  89. <RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBScaledFullCastById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  90. <expression/>
  91. </RegExp>
  92. <RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBFullDirectorsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  93. <expression/>
  94. </RegExp>
  95. <RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBFullWritersById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  96. <expression/>
  97. </RegExp>
  98. <RegExp conditional="tmdbthumbs" input="$$2" output="&lt;chain function=&quot;GetTMDBThumbsByIdChain&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  99. <expression/>
  100. </RegExp>
  101. <RegExp conditional="impawards" input="$$2" output="&lt;chain function=&quot;GetIMPAThumbsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  102. <expression/>
  103. </RegExp>
  104. <RegExp conditional="movieposterdb" input="$$2" output="&lt;chain function=&quot;GetMoviePosterDBThumbs&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  105. <expression/>
  106. </RegExp>
  107. <RegExp conditional="fanart" input="$$2" output="&lt;chain function=&quot;GetTMDBFanartByIdChain&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  108. <expression/>
  109. </RegExp>
  110. <RegExp conditional="ytrailer" input="$$1" output="&lt;chain function=&quot;GetYoutubeTrailer&quot;&gt;$$6=$$9&lt;/chain&gt;" dest="5+">
  111. <RegExp input="$$1" output="\2" dest="6">
  112. <expression>&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  113. </RegExp>
  114. <RegExp input="$$1" output="\2" dest="9">
  115. <expression>&lt;meta name=&quot;title&quot; content=&quot;([^&quot;]*?) \([^\(]*?([0-9]{4})\)</expression>
  116. </RegExp>
  117. <expression/>
  118. </RegExp>
  119. <RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet480p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
  120. <RegExp input="$$1" output="\1" dest="6">
  121. <expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  122. </RegExp>
  123. <expression>480p</expression>
  124. </RegExp>
  125. <RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet720p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
  126. <RegExp input="$$1" output="\1" dest="6">
  127. <expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  128. </RegExp>
  129. <expression>720p</expression>
  130. </RegExp>
  131. <RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet1080p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
  132. <RegExp input="$$1" output="\1" dest="6">
  133. <expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  134. </RegExp>
  135. <expression>1080p</expression>
  136. </RegExp>
  137. <RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBThumbsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
  138. <expression/>
  139. </RegExp>
  140. <expression noclean="1"/>
  141. </RegExp>
  142. </GetDetails>
  143. <GetIMDBAKATitlesById dest="5">
  144. <RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBAKATitles&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
  145. <expression noclean="1" />
  146. </RegExp>
  147. </GetIMDBAKATitlesById>
  148. <ParseIMDBAKATitles dest="5">
  149. <RegExp input="$$2" output="&lt;details&gt;&lt;title&gt;\1&lt;/title&gt;&lt;/details&gt;" dest="5">
  150. <RegExp input="$$1" output="\1" dest="2">
  151. <expression>&lt;h1&gt;([^&lt;]*)</expression>
  152. </RegExp>
  153. <RegExp input="$$10" output="\1" dest="4">
  154. <RegExp input="$$1" output="\1" dest="9">
  155. <expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
  156. </RegExp>
  157. <RegExp input="$$9" output="\1" dest="10">
  158. <expression />
  159. </RegExp>
  160. <RegExp input="$$1" output="\1" dest="11">
  161. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+Hong Kong [&lt;em&gt;][^&quot;]+English</expression>
  162. </RegExp>
  163. <RegExp input="$$11" output="\1" dest="10">
  164. <expression>(.+)</expression>
  165. </RegExp>
  166. <RegExp input="$$1" output="\1" dest="11">
  167. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((Canada)\s(&lt;em&gt;)?\((English|imdb))</expression>
  168. </RegExp>
  169. <RegExp input="$$11" output="\1" dest="10">
  170. <expression>(.+)</expression>
  171. </RegExp>
  172. <RegExp input="$$1" output="\1" dest="11">
  173. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(UK(&lt;em&gt;)?&lt;br&gt;)</expression>
  174. </RegExp>
  175. <RegExp input="$$11" output="\1" dest="10">
  176. <expression>(.+)</expression>
  177. </RegExp>
  178. <RegExp input="$$1" output="\1" dest="11">
  179. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)?(,|&lt;)( |b)</expression>
  180. </RegExp>
  181. <RegExp input="$$11" output="\1" dest="10">
  182. <expression>(.+)</expression>
  183. </RegExp>
  184. <RegExp input="$$1" output="\1" dest="11">
  185. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)? (&lt;em&gt;)?\(imdb</expression>
  186. </RegExp>
  187. <RegExp input="$$11" output="\1" dest="10">
  188. <expression>(.+)</expression>
  189. </RegExp>
  190. <RegExp input="$$1" output="\1" dest="12">
  191. <expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(UK&lt;/a&gt;&lt;/div&gt;)</expression>
  192. </RegExp>
  193. <RegExp input="$$12" output="$$9" dest="10">
  194. <expression>(.+)</expression>
  195. </RegExp>
  196. <RegExp input="$$1" output="\1" dest="12">
  197. <expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(USA&lt;/a&gt;&lt;/div&gt;)</expression>
  198. </RegExp>
  199. <RegExp input="$$12" output="$$9" dest="10">
  200. <expression>(.+)</expression>
  201. </RegExp>
  202. <RegExp input="$$1" output="\1" dest="11">
  203. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(USA(&lt;em&gt;)?&lt;br&gt;)</expression>
  204. </RegExp>
  205. <RegExp input="$$11" output="\1" dest="10">
  206. <expression>(.+)</expression>
  207. </RegExp>
  208. <RegExp input="$$1" output="\1" dest="11">
  209. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((English|imdb))</expression>
  210. </RegExp>
  211. <RegExp input="$$11" output="\1" dest="10">
  212. <expression>(.+)</expression>
  213. </RegExp>
  214. <RegExp input="$$1" output="\1" dest="11">
  215. <expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((new title))</expression>
  216. </RegExp>
  217. <RegExp input="$$11" output="\1" dest="10">
  218. <expression>(.+)</expression>
  219. </RegExp>
  220. <expression noclean="1" />
  221. </RegExp>
  222. <RegExp input="$$4" output="\1" dest="2">
  223. <expression>(.+)</expression>
  224. </RegExp>
  225. <RegExp input="$$1" output="\1" dest="4">
  226. <expression>&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[akatitles]</expression>
  227. </RegExp>
  228. <RegExp input="$$4" output="\1" dest="2">
  229. <expression>(.+)</expression>
  230. </RegExp>
  231. <RegExp input="$$1" output="\1" dest="5">
  232. <expression>&lt;a href=&quot;/country/[^&gt;]+&gt;($INFO[akatitles])</expression>
  233. </RegExp>
  234. <RegExp input="$$5" output="$$9" dest="4">
  235. <expression>($INFO[akatitles])</expression>
  236. </RegExp>
  237. <RegExp input="$INFO[akatitles]" output="$$9" dest="4">
  238. <expression>Keep Original</expression>
  239. </RegExp>
  240. <RegExp input="$$4" output="\1" dest="2">
  241. <expression>(.+)</expression>
  242. </RegExp>
  243. <expression noclean="1" />
  244. </RegExp>
  245. </ParseIMDBAKATitles>
  246. </scraper>