PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/server/superdesk/io/io_tests.py

https://gitlab.com/wilane/superdesk
Python | 155 lines | 119 code | 23 blank | 13 comment | 1 complexity | 3afa24296c9f1d9144a086f92e6c02eb MD5 | raw file
  1. # -*- coding: utf-8; -*-
  2. #
  3. # This file is part of Superdesk.
  4. #
  5. # Copyright 2013, 2014 Sourcefabric z.u. and contributors.
  6. #
  7. # For the full copyright and license information, please see the
  8. # AUTHORS and LICENSE files distributed with this source code, or
  9. # at https://www.sourcefabric.org/superdesk/license
  10. import os
  11. import unittest
  12. from superdesk.etree import etree, get_word_count
  13. from superdesk.io import get_xml_parser
  14. from .newsml_1_2 import NewsMLOneParser
  15. from .newsml_2_0 import NewsMLTwoParser
  16. from .nitf import NITFParser
  17. def get_etree(filename):
  18. dirname = os.path.dirname(os.path.realpath(__file__))
  19. with open(os.path.join(dirname, 'fixtures', filename)) as f:
  20. return etree.fromstring(f.read().encode('utf-8'))
  21. class UtilsTest(unittest.TestCase):
  22. def test_get_word_count(self):
  23. self.assertEqual(2, get_word_count('plain text'), 'plain text')
  24. self.assertEqual(2, get_word_count('<p> html text </p>'), 'paragraph')
  25. self.assertEqual(22, get_word_count(
  26. '<doc><p xml:lang="en-US">The weather was superb today in Norfolk, Virginia. Made me want to take\n'
  27. 'out my boat, manufactured by the <org value="acm" idsrc="iptc.org">Acme Boat Company</org>.</p></doc>'))
  28. def test_get_xml_parser_newsmlg2(self):
  29. etree = get_etree('snep.xml')
  30. self.assertIsInstance(get_xml_parser(etree), NewsMLTwoParser)
  31. def test_get_xml_parser_nitf(self):
  32. etree = get_etree('nitf-fishing.xml')
  33. self.assertIsInstance(get_xml_parser(etree), NITFParser)
  34. def test_get_xml_parser_newsml12(self):
  35. etree = get_etree('afp.xml')
  36. self.assertIsInstance(get_xml_parser(etree), NewsMLOneParser)
  37. class ItemTest(unittest.TestCase):
  38. def setUpFixture(self, filename):
  39. self.tree = get_etree(filename)
  40. provider = {'name': 'Test'}
  41. self.item = get_xml_parser(self.tree).parse_message(self.tree, provider)[0]
  42. class TextParserTest(ItemTest):
  43. def setUp(self):
  44. self.setUpFixture('text.xml')
  45. def test_instance(self):
  46. self.assertTrue(self.item)
  47. def test_parse_id(self):
  48. self.assertEquals("tag:reuters.com,0000:newsml_L4N0BT5PJ", self.item.get('guid'))
  49. self.assertEquals('263518268', self.item.get('version'))
  50. self.assertEquals(self.item.get('guid'), self.item.get('uri'))
  51. def test_parse_item_meta(self):
  52. self.assertEquals("text", self.item.get('type'))
  53. self.assertEquals("2013-03-01T15:09:04", self.item.get('versioncreated').isoformat())
  54. self.assertEquals("2013-03-01T15:09:04", self.item.get('firstcreated').isoformat())
  55. self.assertEquals("Editorial Note", self.item.get('ednote'))
  56. def test_parse_content_meta(self):
  57. self.assertEquals('3', self.item.get('urgency'))
  58. self.assertEquals("SOCCER-ENGLAND/CHELSEA-BENITEZ", self.item["slugline"])
  59. self.assertEquals("Soccer-Smiling Benitez pleads for support "
  60. "after midweek outburst against opponent", self.item["headline"])
  61. # self.assertEquals("Reuters", self.item["creditline"])
  62. self.assertEquals("Bangalore", self.item["dateline"])
  63. self.assertEquals("SOCCER-ENGLAND/CHELSEA-BENITEZ:Soccer-Smiling Benitez pleads for support after midweek outburst", self.item.get('description')) # noqa
  64. # def test_parse_rights_info(self):
  65. # self.assertEquals("Thomson Reuters", self.item.get('copyrightholder'))
  66. # self.assertEquals("(c) Copyright Thomson Reuters 2013. Click For Restrictions - http://about.reuters.com/fulllegal.asp", self.item.get('copyrightnotice')) # noqa
  67. def test_content_set(self):
  68. self.assertEqual("<p>By Toby Davis</p>", self.item.get('body_html'))
  69. self.assertEqual(569, self.item.get('word_count'))
  70. self.assertIsInstance(self.item.get('body_html'), type(''))
  71. def test_language(self):
  72. self.assertEquals('en', self.item.get('language'))
  73. def test_subject(self):
  74. self.assertEquals(2, len(self.item.get('subject')))
  75. self.assertIn({'qcode': '15054000', 'name': 'soccer'}, self.item.get('subject'))
  76. def test_pubstatus(self):
  77. self.assertEquals('usable', self.item.get('pubstatus'))
  78. class PictureParserTest(ItemTest):
  79. def setUp(self):
  80. self.setUpFixture('picture.xml')
  81. def test_type(self):
  82. self.assertEquals('picture', self.item.get('type'))
  83. def test_content_set(self):
  84. self.assertEquals(3, len(self.item.get('renditions')))
  85. remote = self.item.get('renditions').get('baseImage')
  86. self.assertTrue(remote)
  87. self.assertEquals("tag:reuters.com,0000:binary_GM1E9341HD701-BASEIMAGE", remote.get('residRef'))
  88. self.assertEquals(772617, remote.get('sizeinbytes'))
  89. self.assertEquals("image/jpeg", remote.get('mimetype'))
  90. self.assertEquals("http://content.reuters.com/auth-server/content/tag:reuters.com,0000:newsml_GM1E9341HD701:360624134/tag:reuters.com,0000:binary_GM1E9341HD701-BASEIMAGE", remote.get('href')) # noqa
  91. def test_byline(self):
  92. self.assertEquals('MARKO DJURICA', self.item.get('byline'))
  93. def test_place(self):
  94. self.assertEquals(2, len(self.item.get('place')))
  95. self.assertIn({'name': 'NAIROBI'}, self.item['place'])
  96. self.assertIn({'name': 'Kenya'}, self.item['place'])
  97. class SNEPParserTest(ItemTest):
  98. def setUp(self):
  99. self.setUpFixture('snep.xml')
  100. def test_content_set(self):
  101. self.assertEquals(2, len(self.item.get('groups')))
  102. group = self.item.get('groups')[0]
  103. self.assertTrue(group)
  104. self.assertEquals("root", group.get('id'))
  105. self.assertEquals("grpRole:SNEP", group.get('role'))
  106. self.assertEquals(1, len(group.get('refs')))
  107. self.assertEquals("main", group.get('refs')[0].get('idRef'))
  108. group = self.item.get('groups')[1]
  109. self.assertEquals(10, len(group.get('refs')))
  110. self.assertEquals("main", group.get('id'))
  111. ref = group.get('refs')[0]
  112. self.assertTrue(ref)
  113. self.assertEquals("tag:reuters.com,0000:newsml_BRE9220HA", ref.get('residRef'))
  114. self.assertEquals("application/vnd.iptc.g2.packageitem+xml", ref.get('contentType'))
  115. self.assertEquals("icls:composite", ref.get('itemClass'))
  116. self.assertEquals("At least 15 killed on Kenya coast on election day", ref.get('headline'))