PageRenderTime 28ms CodeModel.GetById 41ms RepoModel.GetById 71ms app.codeStats 0ms

/lib/imdb/utils.py

https://gitlab.com/akila-33/Sick-Beard
Python | 1296 lines | 1182 code | 30 blank | 84 comment | 118 complexity | 167762bf6db9f05c8f8e6b40163d6609 MD5 | raw file
  1. """
  2. utils module (imdb package).
  3. This module provides basic utilities for the imdb package.
  4. Copyright 2004-2012 Davide Alberani <da@erlug.linux.it>
  5. 2009 H. Turgut Uyar <uyar@tekir.org>
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of the GNU General Public License as published by
  8. the Free Software Foundation; either version 2 of the License, or
  9. (at your option) any later version.
  10. This program is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. GNU General Public License for more details.
  14. You should have received a copy of the GNU General Public License
  15. along with this program; if not, write to the Free Software
  16. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. """
  18. from __future__ import generators
  19. import re
  20. import string
  21. import logging
  22. from copy import copy, deepcopy
  23. from time import strptime, strftime
  24. from imdb import VERSION
  25. from imdb import linguistics
  26. from imdb._exceptions import IMDbParserError
  27. # Logger for imdb.utils module.
  28. _utils_logger = logging.getLogger('imdbpy.utils')
  29. # The regular expression for the "long" year format of IMDb, like
  30. # "(1998)" and "(1986/II)", where the optional roman number (that I call
  31. # "imdbIndex" after the slash is used for movies with the same title
  32. # and year of release.
  33. # XXX: probably L, C, D and M are far too much! ;-)
  34. re_year_index = re.compile(r'\(([0-9\?]{4}(/[IVXLCDM]+)?)\)')
  35. re_extended_year_index = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?((?:[0-9\?]{4})(?:-[0-9\?]{4})?)(?:/([IVXLCDM]+)?)?\)')
  36. re_remove_kind = re.compile(r'\((TV episode|TV Series|TV mini-series|TV|Video|Video Game)? ?')
  37. # Match only the imdbIndex (for name strings).
  38. re_index = re.compile(r'^\(([IVXLCDM]+)\)$')
  39. # Match things inside parentheses.
  40. re_parentheses = re.compile(r'(\(.*\))')
  41. # Match the number of episodes.
  42. re_episodes = re.compile('\s?\((\d+) episodes\)', re.I)
  43. re_episode_info = re.compile(r'{\s*(.+?)?\s?(\([0-9\?]{4}-[0-9\?]{1,2}-[0-9\?]{1,2}\))?\s?(\(#[0-9]+\.[0-9]+\))?}')
  44. # Common suffixes in surnames.
  45. _sname_suffixes = ('de', 'la', 'der', 'den', 'del', 'y', 'da', 'van',
  46. 'e', 'von', 'the', 'di', 'du', 'el', 'al')
  47. def canonicalName(name):
  48. """Return the given name in canonical "Surname, Name" format.
  49. It assumes that name is in the 'Name Surname' format."""
  50. # XXX: some statistics (as of 17 Apr 2008, over 2288622 names):
  51. # - just a surname: 69476
  52. # - single surname, single name: 2209656
  53. # - composed surname, composed name: 9490
  54. # - composed surname, single name: 67606
  55. # (2: 59764, 3: 6862, 4: 728)
  56. # - single surname, composed name: 242310
  57. # (2: 229467, 3: 9901, 4: 2041, 5: 630)
  58. # - Jr.: 8025
  59. # Don't convert names already in the canonical format.
  60. if name.find(', ') != -1: return name
  61. if isinstance(name, unicode):
  62. joiner = u'%s, %s'
  63. sur_joiner = u'%s %s'
  64. sur_space = u' %s'
  65. space = u' '
  66. else:
  67. joiner = '%s, %s'
  68. sur_joiner = '%s %s'
  69. sur_space = ' %s'
  70. space = ' '
  71. sname = name.split(' ')
  72. snl = len(sname)
  73. if snl == 2:
  74. # Just a name and a surname: how boring...
  75. name = joiner % (sname[1], sname[0])
  76. elif snl > 2:
  77. lsname = [x.lower() for x in sname]
  78. if snl == 3: _indexes = (0, snl-2)
  79. else: _indexes = (0, snl-2, snl-3)
  80. # Check for common surname prefixes at the beginning and near the end.
  81. for index in _indexes:
  82. if lsname[index] not in _sname_suffixes: continue
  83. try:
  84. # Build the surname.
  85. surn = sur_joiner % (sname[index], sname[index+1])
  86. del sname[index]
  87. del sname[index]
  88. try:
  89. # Handle the "Jr." after the name.
  90. if lsname[index+2].startswith('jr'):
  91. surn += sur_space % sname[index]
  92. del sname[index]
  93. except (IndexError, ValueError):
  94. pass
  95. name = joiner % (surn, space.join(sname))
  96. break
  97. except ValueError:
  98. continue
  99. else:
  100. name = joiner % (sname[-1], space.join(sname[:-1]))
  101. return name
  102. def normalizeName(name):
  103. """Return a name in the normal "Name Surname" format."""
  104. if isinstance(name, unicode):
  105. joiner = u'%s %s'
  106. else:
  107. joiner = '%s %s'
  108. sname = name.split(', ')
  109. if len(sname) == 2:
  110. name = joiner % (sname[1], sname[0])
  111. return name
  112. def analyze_name(name, canonical=None):
  113. """Return a dictionary with the name and the optional imdbIndex
  114. keys, from the given string.
  115. If canonical is None (default), the name is stored in its own style.
  116. If canonical is True, the name is converted to canonical style.
  117. If canonical is False, the name is converted to normal format.
  118. raise an IMDbParserError exception if the name is not valid.
  119. """
  120. original_n = name
  121. name = name.strip()
  122. res = {}
  123. imdbIndex = ''
  124. opi = name.rfind('(')
  125. cpi = name.rfind(')')
  126. # Strip notes (but not if the name starts with a parenthesis).
  127. if opi not in (-1, 0) and cpi > opi:
  128. if re_index.match(name[opi:cpi+1]):
  129. imdbIndex = name[opi+1:cpi]
  130. name = name[:opi].rstrip()
  131. else:
  132. # XXX: for the birth and death dates case like " (1926-2004)"
  133. name = re_parentheses.sub('', name).strip()
  134. if not name:
  135. raise IMDbParserError('invalid name: "%s"' % original_n)
  136. if canonical is not None:
  137. if canonical:
  138. name = canonicalName(name)
  139. else:
  140. name = normalizeName(name)
  141. res['name'] = name
  142. if imdbIndex:
  143. res['imdbIndex'] = imdbIndex
  144. return res
  145. def build_name(name_dict, canonical=None):
  146. """Given a dictionary that represents a "long" IMDb name,
  147. return a string.
  148. If canonical is None (default), the name is returned in the stored style.
  149. If canonical is True, the name is converted to canonical style.
  150. If canonical is False, the name is converted to normal format.
  151. """
  152. name = name_dict.get('canonical name') or name_dict.get('name', '')
  153. if not name: return ''
  154. if canonical is not None:
  155. if canonical:
  156. name = canonicalName(name)
  157. else:
  158. name = normalizeName(name)
  159. imdbIndex = name_dict.get('imdbIndex')
  160. if imdbIndex:
  161. name += ' (%s)' % imdbIndex
  162. return name
  163. # XXX: here only for backward compatibility. Find and remove any dependency.
  164. _articles = linguistics.GENERIC_ARTICLES
  165. _unicodeArticles = linguistics.toUnicode(_articles)
  166. articlesDicts = linguistics.articlesDictsForLang(None)
  167. spArticles = linguistics.spArticlesForLang(None)
  168. def canonicalTitle(title, lang=None):
  169. """Return the title in the canonic format 'Movie Title, The';
  170. beware that it doesn't handle long imdb titles, but only the
  171. title portion, without year[/imdbIndex] or special markup.
  172. The 'lang' argument can be used to specify the language of the title.
  173. """
  174. isUnicode = isinstance(title, unicode)
  175. articlesDicts = linguistics.articlesDictsForLang(lang)
  176. try:
  177. if title.split(', ')[-1].lower() in articlesDicts[isUnicode]:
  178. return title
  179. except IndexError:
  180. pass
  181. if isUnicode:
  182. _format = u'%s, %s'
  183. else:
  184. _format = '%s, %s'
  185. ltitle = title.lower()
  186. spArticles = linguistics.spArticlesForLang(lang)
  187. for article in spArticles[isUnicode]:
  188. if ltitle.startswith(article):
  189. lart = len(article)
  190. title = _format % (title[lart:], title[:lart])
  191. if article[-1] == ' ':
  192. title = title[:-1]
  193. break
  194. ## XXX: an attempt using a dictionary lookup.
  195. ##for artSeparator in (' ', "'", '-'):
  196. ## article = _articlesDict.get(ltitle.split(artSeparator)[0])
  197. ## if article is not None:
  198. ## lart = len(article)
  199. ## # check titles like "una", "I'm Mad" and "L'abbacchio".
  200. ## if title[lart:] == '' or (artSeparator != ' ' and
  201. ## title[lart:][1] != artSeparator): continue
  202. ## title = '%s, %s' % (title[lart:], title[:lart])
  203. ## if artSeparator == ' ': title = title[1:]
  204. ## break
  205. return title
  206. def normalizeTitle(title, lang=None):
  207. """Return the title in the normal "The Title" format;
  208. beware that it doesn't handle long imdb titles, but only the
  209. title portion, without year[/imdbIndex] or special markup.
  210. The 'lang' argument can be used to specify the language of the title.
  211. """
  212. isUnicode = isinstance(title, unicode)
  213. stitle = title.split(', ')
  214. articlesDicts = linguistics.articlesDictsForLang(lang)
  215. if len(stitle) > 1 and stitle[-1].lower() in articlesDicts[isUnicode]:
  216. sep = ' '
  217. if stitle[-1][-1] in ("'", '-'):
  218. sep = ''
  219. if isUnicode:
  220. _format = u'%s%s%s'
  221. _joiner = u', '
  222. else:
  223. _format = '%s%s%s'
  224. _joiner = ', '
  225. title = _format % (stitle[-1], sep, _joiner.join(stitle[:-1]))
  226. return title
  227. def _split_series_episode(title):
  228. """Return the series and the episode titles; if this is not a
  229. series' episode, the returned series title is empty.
  230. This function recognize two different styles:
  231. "The Series" An Episode (2005)
  232. "The Series" (2004) {An Episode (2005) (#season.episode)}"""
  233. series_title = ''
  234. episode_or_year = ''
  235. if title[-1:] == '}':
  236. # Title of the episode, as in the plain text data files.
  237. begin_eps = title.rfind('{')
  238. if begin_eps == -1: return '', ''
  239. series_title = title[:begin_eps].rstrip()
  240. # episode_or_year is returned with the {...}
  241. episode_or_year = title[begin_eps:].strip()
  242. if episode_or_year[:12] == '{SUSPENDED}}': return '', ''
  243. # XXX: works only with tv series; it's still unclear whether
  244. # IMDb will support episodes for tv mini series and tv movies...
  245. elif title[0:1] == '"':
  246. second_quot = title[1:].find('"') + 2
  247. if second_quot != 1: # a second " was found.
  248. episode_or_year = title[second_quot:].lstrip()
  249. first_char = episode_or_year[0:1]
  250. if not first_char: return '', ''
  251. if first_char != '(':
  252. # There is not a (year) but the title of the episode;
  253. # that means this is an episode title, as returned by
  254. # the web server.
  255. series_title = title[:second_quot]
  256. ##elif episode_or_year[-1:] == '}':
  257. ## # Title of the episode, as in the plain text data files.
  258. ## begin_eps = episode_or_year.find('{')
  259. ## if begin_eps == -1: return series_title, episode_or_year
  260. ## series_title = title[:second_quot+begin_eps].rstrip()
  261. ## # episode_or_year is returned with the {...}
  262. ## episode_or_year = episode_or_year[begin_eps:]
  263. return series_title, episode_or_year
  264. def is_series_episode(title):
  265. """Return True if 'title' is an series episode."""
  266. title = title.strip()
  267. if _split_series_episode(title)[0]: return 1
  268. return 0
  269. def analyze_title(title, canonical=None, canonicalSeries=None,
  270. canonicalEpisode=None, _emptyString=u''):
  271. """Analyze the given title and return a dictionary with the
  272. "stripped" title, the kind of the show ("movie", "tv series", etc.),
  273. the year of production and the optional imdbIndex (a roman number
  274. used to distinguish between movies with the same title and year).
  275. If canonical is None (default), the title is stored in its own style.
  276. If canonical is True, the title is converted to canonical style.
  277. If canonical is False, the title is converted to normal format.
  278. raise an IMDbParserError exception if the title is not valid.
  279. """
  280. # XXX: introduce the 'lang' argument?
  281. if canonical is not None:
  282. canonicalSeries = canonicalEpisode = canonical
  283. original_t = title
  284. result = {}
  285. title = title.strip()
  286. year = _emptyString
  287. kind = _emptyString
  288. imdbIndex = _emptyString
  289. series_title, episode_or_year = _split_series_episode(title)
  290. if series_title:
  291. # It's an episode of a series.
  292. series_d = analyze_title(series_title, canonical=canonicalSeries)
  293. oad = sen = ep_year = _emptyString
  294. # Plain text data files format.
  295. if episode_or_year[0:1] == '{' and episode_or_year[-1:] == '}':
  296. match = re_episode_info.findall(episode_or_year)
  297. if match:
  298. # Episode title, original air date and #season.episode
  299. episode_or_year, oad, sen = match[0]
  300. episode_or_year = episode_or_year.strip()
  301. if not oad:
  302. # No year, but the title is something like (2005-04-12)
  303. if episode_or_year and episode_or_year[0] == '(' and \
  304. episode_or_year[-1:] == ')' and \
  305. episode_or_year[1:2] != '#':
  306. oad = episode_or_year
  307. if oad[1:5] and oad[5:6] == '-':
  308. try:
  309. ep_year = int(oad[1:5])
  310. except (TypeError, ValueError):
  311. pass
  312. if not oad and not sen and episode_or_year.startswith('(#'):
  313. sen = episode_or_year
  314. elif episode_or_year.startswith('Episode dated'):
  315. oad = episode_or_year[14:]
  316. if oad[-4:].isdigit():
  317. try:
  318. ep_year = int(oad[-4:])
  319. except (TypeError, ValueError):
  320. pass
  321. episode_d = analyze_title(episode_or_year, canonical=canonicalEpisode)
  322. episode_d['kind'] = u'episode'
  323. episode_d['episode of'] = series_d
  324. if oad:
  325. episode_d['original air date'] = oad[1:-1]
  326. if ep_year and episode_d.get('year') is None:
  327. episode_d['year'] = ep_year
  328. if sen and sen[2:-1].find('.') != -1:
  329. seas, epn = sen[2:-1].split('.')
  330. if seas:
  331. # Set season and episode.
  332. try: seas = int(seas)
  333. except: pass
  334. try: epn = int(epn)
  335. except: pass
  336. episode_d['season'] = seas
  337. if epn:
  338. episode_d['episode'] = epn
  339. return episode_d
  340. # First of all, search for the kind of show.
  341. # XXX: Number of entries at 17 Apr 2008:
  342. # movie: 379,871
  343. # episode: 483,832
  344. # tv movie: 61,119
  345. # tv series: 44,795
  346. # video movie: 57,915
  347. # tv mini series: 5,497
  348. # video game: 5,490
  349. # More up-to-date statistics: http://us.imdb.com/database_statistics
  350. if title.endswith('(TV)'):
  351. kind = u'tv movie'
  352. title = title[:-4].rstrip()
  353. elif title.endswith('(V)'):
  354. kind = u'video movie'
  355. title = title[:-3].rstrip()
  356. elif title.endswith('(video)'):
  357. kind = u'video movie'
  358. title = title[:-7].rstrip()
  359. elif title.endswith('(mini)'):
  360. kind = u'tv mini series'
  361. title = title[:-6].rstrip()
  362. elif title.endswith('(VG)'):
  363. kind = u'video game'
  364. title = title[:-4].rstrip()
  365. # Search for the year and the optional imdbIndex (a roman number).
  366. yi = re_year_index.findall(title)
  367. if not yi:
  368. yi = re_extended_year_index.findall(title)
  369. if yi:
  370. yk, yiy, yii = yi[-1]
  371. yi = [(yiy, yii)]
  372. if yk == 'TV episode':
  373. kind = u'episode'
  374. elif yk == 'TV':
  375. kind = u'tv movie'
  376. elif yk == 'TV Series':
  377. kind = u'tv series'
  378. elif yk == 'Video':
  379. kind = u'video movie'
  380. elif yk == 'TV mini-series':
  381. kind = u'tv mini series'
  382. elif yk == 'Video Game':
  383. kind = u'video game'
  384. title = re_remove_kind.sub('(', title)
  385. if yi:
  386. last_yi = yi[-1]
  387. year = last_yi[0]
  388. if last_yi[1]:
  389. imdbIndex = last_yi[1][1:]
  390. year = year[:-len(imdbIndex)-1]
  391. i = title.rfind('(%s)' % last_yi[0])
  392. if i != -1:
  393. title = title[:i-1].rstrip()
  394. # This is a tv (mini) series: strip the '"' at the begin and at the end.
  395. # XXX: strip('"') is not used for compatibility with Python 2.0.
  396. if title and title[0] == title[-1] == '"':
  397. if not kind:
  398. kind = u'tv series'
  399. title = title[1:-1].strip()
  400. elif title.endswith('(TV series)'):
  401. kind = u'tv series'
  402. title = title[:-11].rstrip()
  403. if not title:
  404. raise IMDbParserError('invalid title: "%s"' % original_t)
  405. if canonical is not None:
  406. if canonical:
  407. title = canonicalTitle(title)
  408. else:
  409. title = normalizeTitle(title)
  410. # 'kind' is one in ('movie', 'episode', 'tv series', 'tv mini series',
  411. # 'tv movie', 'video movie', 'video game')
  412. result['title'] = title
  413. result['kind'] = kind or u'movie'
  414. if year and year != '????':
  415. if '-' in year:
  416. result['series years'] = year
  417. year = year[:4]
  418. try:
  419. result['year'] = int(year)
  420. except (TypeError, ValueError):
  421. pass
  422. if imdbIndex:
  423. result['imdbIndex'] = imdbIndex
  424. if isinstance(_emptyString, str):
  425. result['kind'] = str(kind or 'movie')
  426. return result
  427. _web_format = '%d %B %Y'
  428. _ptdf_format = '(%Y-%m-%d)'
  429. def _convertTime(title, fromPTDFtoWEB=1, _emptyString=u''):
  430. """Convert a time expressed in the pain text data files, to
  431. the 'Episode dated ...' format used on the web site; if
  432. fromPTDFtoWEB is false, the inverted conversion is applied."""
  433. try:
  434. if fromPTDFtoWEB:
  435. from_format = _ptdf_format
  436. to_format = _web_format
  437. else:
  438. from_format = u'Episode dated %s' % _web_format
  439. to_format = _ptdf_format
  440. t = strptime(title, from_format)
  441. title = strftime(to_format, t)
  442. if fromPTDFtoWEB:
  443. if title[0] == '0': title = title[1:]
  444. title = u'Episode dated %s' % title
  445. except ValueError:
  446. pass
  447. if isinstance(_emptyString, str):
  448. try:
  449. title = str(title)
  450. except UnicodeDecodeError:
  451. pass
  452. return title
  453. def build_title(title_dict, canonical=None, canonicalSeries=None,
  454. canonicalEpisode=None, ptdf=0, lang=None, _doYear=1,
  455. _emptyString=u''):
  456. """Given a dictionary that represents a "long" IMDb title,
  457. return a string.
  458. If canonical is None (default), the title is returned in the stored style.
  459. If canonical is True, the title is converted to canonical style.
  460. If canonical is False, the title is converted to normal format.
  461. lang can be used to specify the language of the title.
  462. If ptdf is true, the plain text data files format is used.
  463. """
  464. if canonical is not None:
  465. canonicalSeries = canonical
  466. pre_title = _emptyString
  467. kind = title_dict.get('kind')
  468. episode_of = title_dict.get('episode of')
  469. if kind == 'episode' and episode_of is not None:
  470. # Works with both Movie instances and plain dictionaries.
  471. doYear = 0
  472. if ptdf:
  473. doYear = 1
  474. pre_title = build_title(episode_of, canonical=canonicalSeries,
  475. ptdf=0, _doYear=doYear,
  476. _emptyString=_emptyString)
  477. ep_dict = {'title': title_dict.get('title', ''),
  478. 'imdbIndex': title_dict.get('imdbIndex')}
  479. ep_title = ep_dict['title']
  480. if not ptdf:
  481. doYear = 1
  482. ep_dict['year'] = title_dict.get('year', '????')
  483. if ep_title[0:1] == '(' and ep_title[-1:] == ')' and \
  484. ep_title[1:5].isdigit():
  485. ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=1,
  486. _emptyString=_emptyString)
  487. else:
  488. doYear = 0
  489. if ep_title.startswith('Episode dated'):
  490. ep_dict['title'] = _convertTime(ep_title, fromPTDFtoWEB=0,
  491. _emptyString=_emptyString)
  492. episode_title = build_title(ep_dict,
  493. canonical=canonicalEpisode, ptdf=ptdf,
  494. _doYear=doYear, _emptyString=_emptyString)
  495. if ptdf:
  496. oad = title_dict.get('original air date', _emptyString)
  497. if len(oad) == 10 and oad[4] == '-' and oad[7] == '-' and \
  498. episode_title.find(oad) == -1:
  499. episode_title += ' (%s)' % oad
  500. seas = title_dict.get('season')
  501. if seas is not None:
  502. episode_title += ' (#%s' % seas
  503. episode = title_dict.get('episode')
  504. if episode is not None:
  505. episode_title += '.%s' % episode
  506. episode_title += ')'
  507. episode_title = '{%s}' % episode_title
  508. return '%s %s' % (pre_title, episode_title)
  509. title = title_dict.get('title', '')
  510. if not title: return _emptyString
  511. if canonical is not None:
  512. if canonical:
  513. title = canonicalTitle(title, lang=lang)
  514. else:
  515. title = normalizeTitle(title, lang=lang)
  516. if pre_title:
  517. title = '%s %s' % (pre_title, title)
  518. if kind in (u'tv series', u'tv mini series'):
  519. title = '"%s"' % title
  520. if _doYear:
  521. imdbIndex = title_dict.get('imdbIndex')
  522. year = title_dict.get('year') or u'????'
  523. if isinstance(_emptyString, str):
  524. year = str(year)
  525. title += ' (%s' % year
  526. if imdbIndex:
  527. title += '/%s' % imdbIndex
  528. title += ')'
  529. if kind:
  530. if kind == 'tv movie':
  531. title += ' (TV)'
  532. elif kind == 'video movie':
  533. title += ' (V)'
  534. elif kind == 'tv mini series':
  535. title += ' (mini)'
  536. elif kind == 'video game':
  537. title += ' (VG)'
  538. return title
  539. def split_company_name_notes(name):
  540. """Return two strings, the first representing the company name,
  541. and the other representing the (optional) notes."""
  542. name = name.strip()
  543. notes = u''
  544. if name.endswith(')'):
  545. fpidx = name.find('(')
  546. if fpidx != -1:
  547. notes = name[fpidx:]
  548. name = name[:fpidx].rstrip()
  549. return name, notes
  550. def analyze_company_name(name, stripNotes=False):
  551. """Return a dictionary with the name and the optional 'country'
  552. keys, from the given string.
  553. If stripNotes is true, tries to not consider optional notes.
  554. raise an IMDbParserError exception if the name is not valid.
  555. """
  556. if stripNotes:
  557. name = split_company_name_notes(name)[0]
  558. o_name = name
  559. name = name.strip()
  560. country = None
  561. if name.endswith(']'):
  562. idx = name.rfind('[')
  563. if idx != -1:
  564. country = name[idx:]
  565. name = name[:idx].rstrip()
  566. if not name:
  567. raise IMDbParserError('invalid name: "%s"' % o_name)
  568. result = {'name': name}
  569. if country:
  570. result['country'] = country
  571. return result
  572. def build_company_name(name_dict, _emptyString=u''):
  573. """Given a dictionary that represents a "long" IMDb company name,
  574. return a string.
  575. """
  576. name = name_dict.get('name')
  577. if not name:
  578. return _emptyString
  579. country = name_dict.get('country')
  580. if country is not None:
  581. name += ' %s' % country
  582. return name
  583. class _LastC:
  584. """Size matters."""
  585. def __cmp__(self, other):
  586. if isinstance(other, self.__class__): return 0
  587. return 1
  588. _last = _LastC()
  589. def cmpMovies(m1, m2):
  590. """Compare two movies by year, in reverse order; the imdbIndex is checked
  591. for movies with the same year of production and title."""
  592. # Sort tv series' episodes.
  593. m1e = m1.get('episode of')
  594. m2e = m2.get('episode of')
  595. if m1e is not None and m2e is not None:
  596. cmp_series = cmpMovies(m1e, m2e)
  597. if cmp_series != 0:
  598. return cmp_series
  599. m1s = m1.get('season')
  600. m2s = m2.get('season')
  601. if m1s is not None and m2s is not None:
  602. if m1s < m2s:
  603. return 1
  604. elif m1s > m2s:
  605. return -1
  606. m1p = m1.get('episode')
  607. m2p = m2.get('episode')
  608. if m1p < m2p:
  609. return 1
  610. elif m1p > m2p:
  611. return -1
  612. try:
  613. if m1e is None: m1y = int(m1.get('year', 0))
  614. else: m1y = int(m1e.get('year', 0))
  615. except ValueError:
  616. m1y = 0
  617. try:
  618. if m2e is None: m2y = int(m2.get('year', 0))
  619. else: m2y = int(m2e.get('year', 0))
  620. except ValueError:
  621. m2y = 0
  622. if m1y > m2y: return -1
  623. if m1y < m2y: return 1
  624. # Ok, these movies have the same production year...
  625. #m1t = m1.get('canonical title', _last)
  626. #m2t = m2.get('canonical title', _last)
  627. # It should works also with normal dictionaries (returned from searches).
  628. #if m1t is _last and m2t is _last:
  629. m1t = m1.get('title', _last)
  630. m2t = m2.get('title', _last)
  631. if m1t < m2t: return -1
  632. if m1t > m2t: return 1
  633. # Ok, these movies have the same title...
  634. m1i = m1.get('imdbIndex', _last)
  635. m2i = m2.get('imdbIndex', _last)
  636. if m1i > m2i: return -1
  637. if m1i < m2i: return 1
  638. m1id = getattr(m1, 'movieID', None)
  639. # Introduce this check even for other comparisons functions?
  640. # XXX: is it safe to check without knowning the data access system?
  641. # probably not a great idea. Check for 'kind', instead?
  642. if m1id is not None:
  643. m2id = getattr(m2, 'movieID', None)
  644. if m1id > m2id: return -1
  645. elif m1id < m2id: return 1
  646. return 0
  647. def cmpPeople(p1, p2):
  648. """Compare two people by billingPos, name and imdbIndex."""
  649. p1b = getattr(p1, 'billingPos', None) or _last
  650. p2b = getattr(p2, 'billingPos', None) or _last
  651. if p1b > p2b: return 1
  652. if p1b < p2b: return -1
  653. p1n = p1.get('canonical name', _last)
  654. p2n = p2.get('canonical name', _last)
  655. if p1n is _last and p2n is _last:
  656. p1n = p1.get('name', _last)
  657. p2n = p2.get('name', _last)
  658. if p1n > p2n: return 1
  659. if p1n < p2n: return -1
  660. p1i = p1.get('imdbIndex', _last)
  661. p2i = p2.get('imdbIndex', _last)
  662. if p1i > p2i: return 1
  663. if p1i < p2i: return -1
  664. return 0
  665. def cmpCompanies(p1, p2):
  666. """Compare two companies."""
  667. p1n = p1.get('long imdb name', _last)
  668. p2n = p2.get('long imdb name', _last)
  669. if p1n is _last and p2n is _last:
  670. p1n = p1.get('name', _last)
  671. p2n = p2.get('name', _last)
  672. if p1n > p2n: return 1
  673. if p1n < p2n: return -1
  674. p1i = p1.get('country', _last)
  675. p2i = p2.get('country', _last)
  676. if p1i > p2i: return 1
  677. if p1i < p2i: return -1
  678. return 0
  679. # References to titles, names and characters.
  680. # XXX: find better regexp!
  681. re_titleRef = re.compile(r'_(.+?(?: \([0-9\?]{4}(?:/[IVXLCDM]+)?\))?(?: \(mini\)| \(TV\)| \(V\)| \(VG\))?)_ \(qv\)')
  682. # FIXME: doesn't match persons with ' in the name.
  683. re_nameRef = re.compile(r"'([^']+?)' \(qv\)")
  684. # XXX: good choice? Are there characters with # in the name?
  685. re_characterRef = re.compile(r"#([^']+?)# \(qv\)")
  686. # Functions used to filter the text strings.
  687. def modNull(s, titlesRefs, namesRefs, charactersRefs):
  688. """Do nothing."""
  689. return s
  690. def modClearTitleRefs(s, titlesRefs, namesRefs, charactersRefs):
  691. """Remove titles references."""
  692. return re_titleRef.sub(r'\1', s)
  693. def modClearNameRefs(s, titlesRefs, namesRefs, charactersRefs):
  694. """Remove names references."""
  695. return re_nameRef.sub(r'\1', s)
  696. def modClearCharacterRefs(s, titlesRefs, namesRefs, charactersRefs):
  697. """Remove characters references"""
  698. return re_characterRef.sub(r'\1', s)
  699. def modClearRefs(s, titlesRefs, namesRefs, charactersRefs):
  700. """Remove titles, names and characters references."""
  701. s = modClearTitleRefs(s, {}, {}, {})
  702. s = modClearCharacterRefs(s, {}, {}, {})
  703. return modClearNameRefs(s, {}, {}, {})
  704. def modifyStrings(o, modFunct, titlesRefs, namesRefs, charactersRefs):
  705. """Modify a string (or string values in a dictionary or strings
  706. in a list), using the provided modFunct function and titlesRefs
  707. namesRefs and charactersRefs references dictionaries."""
  708. # Notice that it doesn't go any deeper than the first two levels in a list.
  709. if isinstance(o, (unicode, str)):
  710. return modFunct(o, titlesRefs, namesRefs, charactersRefs)
  711. elif isinstance(o, (list, tuple, dict)):
  712. _stillorig = 1
  713. if isinstance(o, (list, tuple)): keys = xrange(len(o))
  714. else: keys = o.keys()
  715. for i in keys:
  716. v = o[i]
  717. if isinstance(v, (unicode, str)):
  718. if _stillorig:
  719. o = copy(o)
  720. _stillorig = 0
  721. o[i] = modFunct(v, titlesRefs, namesRefs, charactersRefs)
  722. elif isinstance(v, (list, tuple)):
  723. modifyStrings(o[i], modFunct, titlesRefs, namesRefs,
  724. charactersRefs)
  725. return o
  726. def date_and_notes(s):
  727. """Parse (birth|death) date and notes; returns a tuple in the
  728. form (date, notes)."""
  729. s = s.strip()
  730. if not s: return (u'', u'')
  731. notes = u''
  732. if s[0].isdigit() or s.split()[0].lower() in ('c.', 'january', 'february',
  733. 'march', 'april', 'may', 'june',
  734. 'july', 'august', 'september',
  735. 'october', 'november',
  736. 'december', 'ca.', 'circa',
  737. '????,'):
  738. i = s.find(',')
  739. if i != -1:
  740. notes = s[i+1:].strip()
  741. s = s[:i]
  742. else:
  743. notes = s
  744. s = u''
  745. if s == '????': s = u''
  746. return s, notes
  747. class RolesList(list):
  748. """A list of Person or Character instances, used for the currentRole
  749. property."""
  750. def __unicode__(self):
  751. return u' / '.join([unicode(x) for x in self])
  752. def __str__(self):
  753. # FIXME: does it make sense at all? Return a unicode doesn't
  754. # seem right, in __str__.
  755. return u' / '.join([unicode(x).encode('utf8') for x in self])
  756. # Replace & with &amp;, but only if it's not already part of a charref.
  757. #_re_amp = re.compile(r'(&)(?!\w+;)', re.I)
  758. #_re_amp = re.compile(r'(?<=\W)&(?=[^a-zA-Z0-9_#])')
  759. _re_amp = re.compile(r'&(?![^a-zA-Z0-9_#]{1,5};)')
  760. def escape4xml(value):
  761. """Escape some chars that can't be present in a XML value."""
  762. if isinstance(value, int):
  763. value = str(value)
  764. value = _re_amp.sub('&amp;', value)
  765. value = value.replace('"', '&quot;').replace("'", '&apos;')
  766. value = value.replace('<', '&lt;').replace('>', '&gt;')
  767. if isinstance(value, unicode):
  768. value = value.encode('ascii', 'xmlcharrefreplace')
  769. return value
  770. def _refsToReplace(value, modFunct, titlesRefs, namesRefs, charactersRefs):
  771. """Return three lists - for movie titles, persons and characters names -
  772. with two items tuples: the first item is the reference once escaped
  773. by the user-provided modFunct function, the second is the same
  774. reference un-escaped."""
  775. mRefs = []
  776. for refRe, refTemplate in [(re_titleRef, u'_%s_ (qv)'),
  777. (re_nameRef, u"'%s' (qv)"),
  778. (re_characterRef, u'#%s# (qv)')]:
  779. theseRefs = []
  780. for theRef in refRe.findall(value):
  781. # refTemplate % theRef values don't change for a single
  782. # _Container instance, so this is a good candidate for a
  783. # cache or something - even if it's so rarely used that...
  784. # Moreover, it can grow - ia.update(...) - and change if
  785. # modFunct is modified.
  786. goodValue = modFunct(refTemplate % theRef, titlesRefs, namesRefs,
  787. charactersRefs)
  788. # Prevents problems with crap in plain text data files.
  789. # We should probably exclude invalid chars and string that
  790. # are too long in the re_*Ref expressions.
  791. if '_' in goodValue or len(goodValue) > 128:
  792. continue
  793. toReplace = escape4xml(goodValue)
  794. # Only the 'value' portion is replaced.
  795. replaceWith = goodValue.replace(theRef, escape4xml(theRef))
  796. theseRefs.append((toReplace, replaceWith))
  797. mRefs.append(theseRefs)
  798. return mRefs
  799. def _handleTextNotes(s):
  800. """Split text::notes strings."""
  801. ssplit = s.split('::', 1)
  802. if len(ssplit) == 1:
  803. return s
  804. return u'%s<notes>%s</notes>' % (ssplit[0], ssplit[1])
  805. def _normalizeValue(value, withRefs=False, modFunct=None, titlesRefs=None,
  806. namesRefs=None, charactersRefs=None):
  807. """Replace some chars that can't be present in a XML text."""
  808. # XXX: use s.encode(encoding, 'xmlcharrefreplace') ? Probably not
  809. # a great idea: after all, returning a unicode is safe.
  810. if isinstance(value, (unicode, str)):
  811. if not withRefs:
  812. value = _handleTextNotes(escape4xml(value))
  813. else:
  814. # Replace references that were accidentally escaped.
  815. replaceLists = _refsToReplace(value, modFunct, titlesRefs,
  816. namesRefs, charactersRefs)
  817. value = modFunct(value, titlesRefs or {}, namesRefs or {},
  818. charactersRefs or {})
  819. value = _handleTextNotes(escape4xml(value))
  820. for replaceList in replaceLists:
  821. for toReplace, replaceWith in replaceList:
  822. value = value.replace(toReplace, replaceWith)
  823. else:
  824. value = unicode(value)
  825. return value
  826. def _tag4TON(ton, addAccessSystem=False, _containerOnly=False):
  827. """Build a tag for the given _Container instance;
  828. both open and close tags are returned."""
  829. tag = ton.__class__.__name__.lower()
  830. what = 'name'
  831. if tag == 'movie':
  832. value = ton.get('long imdb title') or ton.get('title', '')
  833. what = 'title'
  834. else:
  835. value = ton.get('long imdb name') or ton.get('name', '')
  836. value = _normalizeValue(value)
  837. extras = u''
  838. crl = ton.currentRole
  839. if crl:
  840. if not isinstance(crl, list):
  841. crl = [crl]
  842. for cr in crl:
  843. crTag = cr.__class__.__name__.lower()
  844. crValue = cr['long imdb name']
  845. crValue = _normalizeValue(crValue)
  846. crID = cr.getID()
  847. if crID is not None:
  848. extras += u'<current-role><%s id="%s">' \
  849. u'<name>%s</name></%s>' % (crTag, crID,
  850. crValue, crTag)
  851. else:
  852. extras += u'<current-role><%s><name>%s</name></%s>' % \
  853. (crTag, crValue, crTag)
  854. if cr.notes:
  855. extras += u'<notes>%s</notes>' % _normalizeValue(cr.notes)
  856. extras += u'</current-role>'
  857. theID = ton.getID()
  858. if theID is not None:
  859. beginTag = u'<%s id="%s"' % (tag, theID)
  860. if addAccessSystem and ton.accessSystem:
  861. beginTag += ' access-system="%s"' % ton.accessSystem
  862. if not _containerOnly:
  863. beginTag += u'><%s>%s</%s>' % (what, value, what)
  864. else:
  865. beginTag += u'>'
  866. else:
  867. if not _containerOnly:
  868. beginTag = u'<%s><%s>%s</%s>' % (tag, what, value, what)
  869. else:
  870. beginTag = u'<%s>' % tag
  871. beginTag += extras
  872. if ton.notes:
  873. beginTag += u'<notes>%s</notes>' % _normalizeValue(ton.notes)
  874. return (beginTag, u'</%s>' % tag)
  875. TAGS_TO_MODIFY = {
  876. 'movie.parents-guide': ('item', True),
  877. 'movie.number-of-votes': ('item', True),
  878. 'movie.soundtrack.item': ('item', True),
  879. 'movie.quotes': ('quote', False),
  880. 'movie.quotes.quote': ('line', False),
  881. 'movie.demographic': ('item', True),
  882. 'movie.episodes': ('season', True),
  883. 'movie.episodes.season': ('episode', True),
  884. 'person.merchandising-links': ('item', True),
  885. 'person.genres': ('item', True),
  886. 'person.quotes': ('quote', False),
  887. 'person.keywords': ('item', True),
  888. 'character.quotes': ('item', True),
  889. 'character.quotes.item': ('quote', False),
  890. 'character.quotes.item.quote': ('line', False)
  891. }
  892. _allchars = string.maketrans('', '')
  893. _keepchars = _allchars.translate(_allchars, string.ascii_lowercase + '-' +
  894. string.digits)
  895. def _tagAttr(key, fullpath):
  896. """Return a tuple with a tag name and a (possibly empty) attribute,
  897. applying the conversions specified in TAGS_TO_MODIFY and checking
  898. that the tag is safe for a XML document."""
  899. attrs = {}
  900. _escapedKey = escape4xml(key)
  901. if fullpath in TAGS_TO_MODIFY:
  902. tagName, useTitle = TAGS_TO_MODIFY[fullpath]
  903. if useTitle:
  904. attrs['key'] = _escapedKey
  905. elif not isinstance(key, unicode):
  906. if isinstance(key, str):
  907. tagName = unicode(key, 'ascii', 'ignore')
  908. else:
  909. strType = str(type(key)).replace("<type '", "").replace("'>", "")
  910. attrs['keytype'] = strType
  911. tagName = unicode(key)
  912. else:
  913. tagName = key
  914. if isinstance(key, int):
  915. attrs['keytype'] = 'int'
  916. origTagName = tagName
  917. tagName = tagName.lower().replace(' ', '-')
  918. tagName = str(tagName).translate(_allchars, _keepchars)
  919. if origTagName != tagName:
  920. if 'key' not in attrs:
  921. attrs['key'] = _escapedKey
  922. if (not tagName) or tagName[0].isdigit() or tagName[0] == '-':
  923. # This is a fail-safe: we should never be here, since unpredictable
  924. # keys must be listed in TAGS_TO_MODIFY.
  925. # This will proably break the DTD/schema, but at least it will
  926. # produce a valid XML.
  927. tagName = 'item'
  928. _utils_logger.error('invalid tag: %s [%s]' % (_escapedKey, fullpath))
  929. attrs['key'] = _escapedKey
  930. return tagName, u' '.join([u'%s="%s"' % i for i in attrs.items()])
  931. def _seq2xml(seq, _l=None, withRefs=False, modFunct=None,
  932. titlesRefs=None, namesRefs=None, charactersRefs=None,
  933. _topLevel=True, key2infoset=None, fullpath=''):
  934. """Convert a sequence or a dictionary to a list of XML
  935. unicode strings."""
  936. if _l is None:
  937. _l = []
  938. if isinstance(seq, dict):
  939. for key in seq:
  940. value = seq[key]
  941. if isinstance(key, _Container):
  942. # Here we're assuming that a _Container is never a top-level
  943. # key (otherwise we should handle key2infoset).
  944. openTag, closeTag = _tag4TON(key)
  945. # So that fullpath will contains something meaningful.
  946. tagName = key.__class__.__name__.lower()
  947. else:
  948. tagName, attrs = _tagAttr(key, fullpath)
  949. openTag = u'<%s' % tagName
  950. if attrs:
  951. openTag += ' %s' % attrs
  952. if _topLevel and key2infoset and key in key2infoset:
  953. openTag += u' infoset="%s"' % key2infoset[key]
  954. if isinstance(value, int):
  955. openTag += ' type="int"'
  956. elif isinstance(value, float):
  957. openTag += ' type="float"'
  958. openTag += u'>'
  959. closeTag = u'</%s>' % tagName
  960. _l.append(openTag)
  961. _seq2xml(value, _l, withRefs, modFunct, titlesRefs,
  962. namesRefs, charactersRefs, _topLevel=False,
  963. fullpath='%s.%s' % (fullpath, tagName))
  964. _l.append(closeTag)
  965. elif isinstance(seq, (list, tuple)):
  966. tagName, attrs = _tagAttr('item', fullpath)
  967. beginTag = u'<%s' % tagName
  968. if attrs:
  969. beginTag += u' %s' % attrs
  970. #beginTag += u'>'
  971. closeTag = u'</%s>' % tagName
  972. for item in seq:
  973. if isinstance(item, _Container):
  974. _seq2xml(item, _l, withRefs, modFunct, titlesRefs,
  975. namesRefs, charactersRefs, _topLevel=False,
  976. fullpath='%s.%s' % (fullpath,
  977. item.__class__.__name__.lower()))
  978. else:
  979. openTag = beginTag
  980. if isinstance(item, int):
  981. openTag += ' type="int"'
  982. elif isinstance(item, float):
  983. openTag += ' type="float"'
  984. openTag += u'>'
  985. _l.append(openTag)
  986. _seq2xml(item, _l, withRefs, modFunct, titlesRefs,
  987. namesRefs, charactersRefs, _topLevel=False,
  988. fullpath='%s.%s' % (fullpath, tagName))
  989. _l.append(closeTag)
  990. else:
  991. if isinstance(seq, _Container):
  992. _l.extend(_tag4TON(seq))
  993. else:
  994. # Text, ints, floats and the like.
  995. _l.append(_normalizeValue(seq, withRefs=withRefs,
  996. modFunct=modFunct,
  997. titlesRefs=titlesRefs,
  998. namesRefs=namesRefs,
  999. charactersRefs=charactersRefs))
  1000. return _l
  1001. _xmlHead = u"""<?xml version="1.0"?>
  1002. <!DOCTYPE %s SYSTEM "http://imdbpy.sf.net/dtd/imdbpy{VERSION}.dtd">
  1003. """
  1004. _xmlHead = _xmlHead.replace('{VERSION}',
  1005. VERSION.replace('.', '').split('dev')[0][:2])
  1006. class _Container(object):
  1007. """Base class for Movie, Person, Character and Company classes."""
  1008. # The default sets of information retrieved.
  1009. default_info = ()
  1010. # Aliases for some not-so-intuitive keys.
  1011. keys_alias = {}
  1012. # List of keys to modify.
  1013. keys_tomodify_list = ()
  1014. # Function used to compare two instances of this class.
  1015. cmpFunct = None
  1016. # Regular expression used to build the 'full-size (headshot|cover url)'.
  1017. _re_fullsizeURL = re.compile(r'\._V1\._SX(\d+)_SY(\d+)_')
  1018. def __init__(self, myID=None, data=None, notes=u'',
  1019. currentRole=u'', roleID=None, roleIsPerson=False,
  1020. accessSystem=None, titlesRefs=None, namesRefs=None,
  1021. charactersRefs=None, modFunct=None, *args, **kwds):
  1022. """Initialize a Movie, Person, Character or Company object.
  1023. *myID* -- your personal identifier for this object.
  1024. *data* -- a dictionary used to initialize the object.
  1025. *notes* -- notes for the person referred in the currentRole
  1026. attribute; e.g.: '(voice)' or the alias used in the
  1027. movie credits.
  1028. *accessSystem* -- a string representing the data access system used.
  1029. *currentRole* -- a Character instance representing the current role
  1030. or duty of a person in this movie, or a Person
  1031. object representing the actor/actress who played
  1032. a given character in a Movie. If a string is
  1033. passed, an object is automatically build.
  1034. *roleID* -- if available, the characterID/personID of the currentRole
  1035. object.
  1036. *roleIsPerson* -- when False (default) the currentRole is assumed
  1037. to be a Character object, otherwise a Person.
  1038. *titlesRefs* -- a dictionary with references to movies.
  1039. *namesRefs* -- a dictionary with references to persons.
  1040. *charactersRefs* -- a dictionary with references to characters.
  1041. *modFunct* -- function called returning text fields.
  1042. """
  1043. self.reset()
  1044. self.accessSystem = accessSystem
  1045. self.myID = myID
  1046. if data is None: data = {}
  1047. self.set_data(data, override=1)
  1048. self.notes = notes
  1049. if titlesRefs is None: titlesRefs = {}
  1050. self.update_titlesRefs(titlesRefs)
  1051. if namesRefs is None: namesRefs = {}
  1052. self.update_namesRefs(namesRefs)
  1053. if charactersRefs is None: charactersRefs = {}
  1054. self.update_charactersRefs(charactersRefs)
  1055. self.set_mod_funct(modFunct)
  1056. self.keys_tomodify = {}
  1057. for item in self.keys_tomodify_list:
  1058. self.keys_tomodify[item] = None
  1059. self._roleIsPerson = roleIsPerson
  1060. if not roleIsPerson:
  1061. from imdb.Character import Character
  1062. self._roleClass = Character
  1063. else:
  1064. from imdb.Person import Person
  1065. self._roleClass = Person
  1066. self.currentRole = currentRole
  1067. if roleID:
  1068. self.roleID = roleID
  1069. self._init(*args, **kwds)
  1070. def _get_roleID(self):
  1071. """Return the characterID or personID of the currentRole object."""
  1072. if not self.__role:
  1073. return None
  1074. if isinstance(self.__role, list):
  1075. return [x.getID() for x in self.__role]
  1076. return self.currentRole.getID()
  1077. def _set_roleID(self, roleID):
  1078. """Set the characterID or personID of the currentRole object."""
  1079. if not self.__role:
  1080. # XXX: needed? Just ignore it? It's probably safer to
  1081. # ignore it, to prevent some bugs in the parsers.
  1082. #raise IMDbError,"Can't set ID of an empty Character/Person object."
  1083. pass
  1084. if not self._roleIsPerson:
  1085. if not isinstance(roleID, (list, tuple)):
  1086. self.currentRole.characterID = roleID
  1087. else:
  1088. for index, item in enumerate(roleID):
  1089. self.__role[index].characterID = item
  1090. else:
  1091. if not isinstance(roleID, (list, tuple)):
  1092. self.currentRole.personID = roleID
  1093. else:
  1094. for index, item in enumerate(roleID):
  1095. self.__role[index].personID = item
  1096. roleID = property(_get_roleID, _set_roleID,
  1097. doc="the characterID or personID of the currentRole object.")
  1098. def _get_currentRole(self):
  1099. """Return a Character or Person instance."""
  1100. if self.__role:
  1101. return self.__role
  1102. return self._roleClass(name=u'', accessSystem=self.accessSystem,
  1103. modFunct=self.modFunct)
  1104. def _set_currentRole(self, role):
  1105. """Set self.currentRole to a Character or Person instance."""
  1106. if isinstance(role, (unicode, str)):
  1107. if not role:
  1108. self.__role = None
  1109. else:
  1110. self.__role = self._roleClass(name=role, modFunct=self.modFunct,
  1111. accessSystem=self.accessSystem)
  1112. elif isinstance(role, (list, tuple)):
  1113. self.__role = RolesList()
  1114. for item in role:
  1115. if isinstance(item, (unicode, str)):
  1116. self.__role.append(self._roleClass(name=item,
  1117. accessSystem=self.accessSystem,
  1118. modFunct=self.modFunct))
  1119. else:
  1120. self.__role.append(item)
  1121. if not self.__role:
  1122. self.__role = None
  1123. else:
  1124. self.__role = role
  1125. currentRole = property(_get_currentRole, _set_currentRole,
  1126. doc="The role of a Person in a Movie" + \
  1127. " or the interpreter of a Character in a Movie.")
  1128. def _init(self, **kwds): pass
  1129. def reset(self):
  1130. """Reset the object."""
  1131. self.data = {}
  1132. self.myID = None
  1133. self.notes = u''
  1134. self.titlesRefs = {}
  1135. self.namesRefs = {}
  1136. self.charactersRefs = {}
  1137. self.modFunct = modClearRefs
  1138. self.current_info = []
  1139. self.infoset2keys = {}
  1140. self.key2infoset = {}
  1141. self.__role = None
  1142. self._reset()
  1143. def _reset(self): pass
  1144. def clear(self):
  1145. """Reset the dictionary."""
  1146. self.data.clear()
  1147. self.notes = u''
  1148. self.titlesRefs = {}
  1149. self.namesRefs = {}
  1150. self.charactersRefs = {}
  1151. self.current_info = []
  1152. self.infoset2keys = {}
  1153. self.key2infoset = {}
  1154. self.__role = None
  1155. self._clear()
  1156. def _clear(self): pass
  1157. def get_current_info(self):
  1158. """Return the current set of information retrieved."""
  1159. return self.current_info
  1160. def update_infoset_map(self, infoset, keys, mainInfoset):
  1161. """Update the mappings between infoset and keys."""
  1162. if keys is None:
  1163. keys = []
  1164. if mainInfoset is not None:
  1165. theIS = mainInfoset
  1166. else:
  1167. theIS = infoset
  1168. self.infoset2keys[theIS] = keys
  1169. for key in keys:
  1170. self.key2infoset[key] = theIS
  1171. def set_current_info(self, ci):
  1172. """Set the current set of information retrieved."""
  1173. # XXX:Remove? It's never used and there's no way to update infoset2keys.
  1174. self.current_info = ci
  1175. def add_to_current_info(self, val, keys=None, mainInfoset=None):
  1176. """Add a set of information to the current list."""
  1177. if val not in self.current_info:
  1178. self.current_info.append(val)
  1179. self.update_infoset_map(val, keys, mainInfoset)
  1180. def has_current_info(self, val):
  1181. """Return true if the given set of information is in the list."""
  1182. return val in self.current_info
  1183. def set_mod_f