PageRenderTime 26ms CodeModel.GetById 10ms app.highlight 9ms RepoModel.GetById 2ms app.codeStats 0ms

/Assets/scrapers/metadata.imdb.com/imdb.xml

#
XML | 246 lines | 246 code | 0 blank | 0 comment | 0 complexity | 9283fd5c66bf0abcc5691359ff82d2bf MD5 | raw file
  1<?xml version="1.0" encoding="UTF-8"?>
  2<scraper framework="1.1" date="2011-10-28">
  3	<NfoUrl dest="3">
  4		<RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/title/tt\1/&lt;/url&gt;&lt;id&gt;tt\1&lt;/id&gt;" dest="3">
  5			<expression clear="yes" noclean="1">imdb....?/Title\?([0-9]*)</expression>
  6		</RegExp>
  7		<RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/title/tt\1/&lt;/url&gt;&lt;id&gt;tt\1&lt;/id&gt;" dest="3+">
  8			<expression noclean="1">imdb....?/title/tt([0-9]*)</expression>
  9		</RegExp>
 10	</NfoUrl>
 11	<CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
 12		<RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/find?s=tt;q=\1$$4&lt;/url&gt;" dest="3">
 13			<RegExp input="$$2" output="%20(\1)" dest="4">
 14				<expression clear="yes">(.+)</expression>
 15			</RegExp>
 16			<expression noclean="1"/>
 17		</RegExp>
 18	</CreateSearchUrl>
 19	<GetSearchResults dest="8">
 20		<RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
 21			<RegExp input="$$1" output="\1" dest="7">
 22				<expression clear="yes">/title/([t0-9]*)/(combined|faq|releaseinfo|vote)</expression>
 23			</RegExp>
 24			<RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\1&lt;/title&gt;&lt;year&gt;\2&lt;/year&gt;&lt;url cache=&quot;$$7-main.html&quot;&gt;http://akas.imdb.com/title/$$7/&lt;/url&gt;&lt;id&gt;$$7&lt;/id&gt;&lt;/entity&gt;" dest="5">
 25				<expression clear="yes" noclean="1">&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
 26			</RegExp>
 27			<RegExp input="$$1" output="\1" dest="4">
 28				<expression noclean="1">(&gt;&lt;a href=&quot;/title.*)</expression>
 29			</RegExp>
 30			<RegExp input="$$4" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;year&gt;\3&lt;/year&gt;&lt;url cache=&quot;\1-main.html&quot;&gt;http://akas.imdb.com/title/\1/&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
 31				<expression repeat="yes" noclean="1,2">&gt;&lt;a href=&quot;/title/([t0-9]*)/[^&gt;]*&gt;(?:&amp;#x22;)?([^&lt;]*?)(?:&amp;#x22;)?&lt;/a&gt; *\([^\(]*?([0-9]{4})</expression>
 32			</RegExp>
 33			<expression clear="yes" noclean="1"/>
 34		</RegExp>
 35	</GetSearchResults>
 36	<GetDetails dest="3">
 37		<RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
 38			<RegExp input="$$2" output="&lt;id&gt;\1&lt;/id&gt;" dest="5">
 39				<expression/>
 40			</RegExp>
 41			<RegExp input="$$1" output="&lt;originaltitle&gt;\2&lt;/originaltitle&gt;" dest="5+">
 42				<expression>&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
 43			</RegExp>
 44			<RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBAKATitlesById&quot;&gt;\1&lt;/chain&gt;" dest="5+">
 45				<expression>(tt[t0-9]*)</expression>
 46			</RegExp>
 47			<RegExp input="$$1" output="&lt;year&gt;\2&lt;/year&gt;" dest="5+">
 48				<expression>&lt;meta name=&quot;title&quot; content=&quot;([^&quot;]*?) \([^\(]*?([0-9]{4})\)</expression>
 49			</RegExp>
 50			<RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
 51				<expression>Top 250 #([0-9]*)&lt;/</expression>
 52			</RegExp>
 53			<RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
 54				<expression>MPAA&lt;/a&gt;\)&lt;/h4&gt;\n?&lt;span itemprop=&quot;contentRating&quot;&gt;([^&lt;]*)</expression>
 55			</RegExp>
 56			<RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
 57				<expression>&lt;h4[^&gt;]*?&gt;Taglines:&lt;/h4&gt;\n?([^&lt;]*)</expression>
 58			</RegExp>
 59			<RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
 60				<expression trim="1">&lt;h4[^&gt;]*?&gt;Runtime:&lt;/h4&gt;[^0-9]*([0-9]+)</expression>
 61			</RegExp>
 62			<RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBRatingById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 63				<expression/>
 64			</RegExp>
 65			<RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBGenresById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 66				<expression/>
 67			</RegExp>
 68			<RegExp input="$$1" output="&lt;country&gt;\1&lt;/country&gt;" dest="5+">
 69				<expression repeat="yes">"/country/[^&gt;]+&gt;([^&lt;]+)&lt;/a&gt;</expression>
 70			</RegExp>
 71			<RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
 72				<expression>"/company/[^&gt;]+&gt;([^&lt;]+)&lt;/a&gt;</expression>
 73			</RegExp>
 74			<RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
 75				<expression trim="1">&lt;p&gt;\n&lt;p itemprop=&quot;description&quot;&gt;([^&lt;]+)&lt;/p&gt;</expression>
 76			</RegExp>
 77			<RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBPlotById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 78				<expression/>
 79			</RegExp>
 80			<RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBScaledCastById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 81				<expression/>
 82			</RegExp>
 83			<RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBDirectorsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 84				<expression/>
 85			</RegExp>
 86			<RegExp conditional="!fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBWritersById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 87				<expression/>
 88			</RegExp>
 89			<RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBScaledFullCastById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 90				<expression/>
 91			</RegExp>
 92			<RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBFullDirectorsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 93				<expression/>
 94			</RegExp>
 95			<RegExp conditional="fullcredits" input="$$2" output="&lt;chain function=&quot;GetIMDBFullWritersById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 96				<expression/>
 97			</RegExp>
 98			<RegExp conditional="tmdbthumbs" input="$$2" output="&lt;chain function=&quot;GetTMDBThumbsByIdChain&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
 99				<expression/>
100			</RegExp>
101			<RegExp conditional="impawards" input="$$2" output="&lt;chain function=&quot;GetIMPAThumbsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
102				<expression/>
103			</RegExp>
104			<RegExp conditional="movieposterdb" input="$$2" output="&lt;chain function=&quot;GetMoviePosterDBThumbs&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
105				<expression/>
106			</RegExp>
107			<RegExp conditional="fanart" input="$$2" output="&lt;chain function=&quot;GetTMDBFanartByIdChain&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
108				<expression/>
109			</RegExp>
110			<RegExp conditional="ytrailer" input="$$1" output="&lt;chain function=&quot;GetYoutubeTrailer&quot;&gt;$$6=$$9&lt;/chain&gt;" dest="5+">
111				<RegExp input="$$1" output="\2" dest="6">
112					<expression>&lt;meta name=&quot;title&quot; content=&quot;(IMDb - )?(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
113				</RegExp>
114				<RegExp input="$$1" output="\2" dest="9">
115					<expression>&lt;meta name=&quot;title&quot; content=&quot;([^&quot;]*?) \([^\(]*?([0-9]{4})\)</expression>
116				</RegExp>
117				<expression/>
118			</RegExp>
119			<RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet480p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
120				<RegExp input="$$1" output="\1" dest="6">
121					<expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
122				</RegExp>
123				<expression>480p</expression>
124			</RegExp>
125			<RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet720p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
126				<RegExp input="$$1" output="\1" dest="6">
127					<expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
128				</RegExp>
129				<expression>720p</expression>
130			</RegExp>
131			<RegExp input="$INFO[TrailerQ]" output="&lt;chain function=&quot;GetHDTrailersnet1080p&quot;&gt;$$6&lt;/chain&gt;" dest="5+">
132				<RegExp input="$$1" output="\1" dest="6">
133					<expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
134				</RegExp>
135				<expression>1080p</expression>
136			</RegExp>
137			<RegExp input="$$2" output="&lt;chain function=&quot;GetIMDBThumbsById&quot;&gt;$$2&lt;/chain&gt;" dest="5+">
138				<expression/>
139			</RegExp>
140			<expression noclean="1"/>
141		</RegExp>
142	</GetDetails>
143	<GetIMDBAKATitlesById dest="5">
144		<RegExp input="$$1" output="&lt;details&gt;&lt;url cache=&quot;$$1-combined.html&quot; function=&quot;ParseIMDBAKATitles&quot;&gt;http://akas.imdb.com/title/$$1/combined&lt;/url&gt;&lt;/details&gt;" dest="5">
145			<expression noclean="1" />
146		</RegExp>
147	</GetIMDBAKATitlesById>
148	<ParseIMDBAKATitles dest="5">
149		<RegExp input="$$2" output="&lt;details&gt;&lt;title&gt;\1&lt;/title&gt;&lt;/details&gt;" dest="5">
150			<RegExp input="$$1" output="\1" dest="2">
151				<expression>&lt;h1&gt;([^&lt;]*)</expression>
152			</RegExp>
153			<RegExp input="$$10" output="\1" dest="4">
154				<RegExp input="$$1" output="\1" dest="9">
155					<expression>&lt;meta name=&quot;title&quot; content=&quot;(?:&amp;#x22;)?([^&quot;]*?)(?:&amp;#x22;)? \([^\(]*?([0-9]{4})\)</expression>
156				</RegExp>
157				<RegExp input="$$9" output="\1" dest="10">
158					<expression />
159				</RegExp>
160				<RegExp input="$$1" output="\1" dest="11">
161					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+Hong Kong [&lt;em&gt;][^&quot;]+English</expression>
162				</RegExp>
163				<RegExp input="$$11" output="\1" dest="10">
164					<expression>(.+)</expression>
165				</RegExp>
166				<RegExp input="$$1" output="\1" dest="11">
167					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((Canada)\s(&lt;em&gt;)?\((English|imdb))</expression>
168				</RegExp>
169				<RegExp input="$$11" output="\1" dest="10">
170					<expression>(.+)</expression>
171				</RegExp>
172				<RegExp input="$$1" output="\1" dest="11">
173					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(UK(&lt;em&gt;)?&lt;br&gt;)</expression>
174				</RegExp>
175				<RegExp input="$$11" output="\1" dest="10">
176					<expression>(.+)</expression>
177				</RegExp>
178				<RegExp input="$$1" output="\1" dest="11">
179					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)?(,|&lt;)( |b)</expression>
180				</RegExp>
181				<RegExp input="$$11" output="\1" dest="10">
182					<expression>(.+)</expression>
183				</RegExp>
184				<RegExp input="$$1" output="\1" dest="11">
185					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&quot;]+International\s(&lt;em&gt;)?\(English title\)(&lt;/em&gt;)? (&lt;em&gt;)?\(imdb</expression>
186				</RegExp>
187				<RegExp input="$$11" output="\1" dest="10">
188					<expression>(.+)</expression>
189				</RegExp>
190				<RegExp input="$$1" output="\1" dest="12">
191					<expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(UK&lt;/a&gt;&lt;/div&gt;)</expression>
192				</RegExp>
193				<RegExp input="$$12" output="$$9" dest="10">
194					<expression>(.+)</expression>
195				</RegExp>
196				<RegExp input="$$1" output="\1" dest="12">
197					<expression clear="yes">&lt;a href=&quot;/country/[^&gt;]+&gt;(USA&lt;/a&gt;&lt;/div&gt;)</expression>
198				</RegExp>
199				<RegExp input="$$12" output="$$9" dest="10">
200					<expression>(.+)</expression>
201				</RegExp>
202				<RegExp input="$$1" output="\1" dest="11">
203					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+(USA(&lt;em&gt;)?&lt;br&gt;)</expression>
204				</RegExp>
205				<RegExp input="$$11" output="\1" dest="10">
206					<expression>(.+)</expression>
207				</RegExp>
208				<RegExp input="$$1" output="\1" dest="11">
209					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((English|imdb))</expression>
210				</RegExp>
211				<RegExp input="$$11" output="\1" dest="10">
212					<expression>(.+)</expression>
213				</RegExp>
214				<RegExp input="$$1" output="\1" dest="11">
215					<expression clear="yes">&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+((USA)\s(&lt;em&gt;)?\((new title))</expression>
216				</RegExp>
217				<RegExp input="$$11" output="\1" dest="10">
218					<expression>(.+)</expression>
219				</RegExp>
220				<expression noclean="1" />
221			</RegExp>
222			<RegExp input="$$4" output="\1" dest="2">
223				<expression>(.+)</expression>
224			</RegExp>
225			<RegExp input="$$1" output="\1" dest="4">
226				<expression>&gt;\s*?&quot;([^&lt;]+)&quot;[^&lt;]+[&lt;em&gt;]?[^&quot;]+$INFO[akatitles]</expression>
227			</RegExp>
228			<RegExp input="$$4" output="\1" dest="2">
229				<expression>(.+)</expression>
230			</RegExp>
231			<RegExp input="$$1" output="\1" dest="5">
232				<expression>&lt;a href=&quot;/country/[^&gt;]+&gt;($INFO[akatitles])</expression>
233			</RegExp>
234			<RegExp input="$$5" output="$$9" dest="4">
235				<expression>($INFO[akatitles])</expression>
236			</RegExp>
237			<RegExp input="$INFO[akatitles]" output="$$9" dest="4">
238				<expression>Keep Original</expression>
239			</RegExp>
240			<RegExp input="$$4" output="\1" dest="2">
241				<expression>(.+)</expression>
242			</RegExp>
243			<expression noclean="1" />
244		</RegExp>
245	</ParseIMDBAKATitles>
246</scraper>