PageRenderTime 67ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tests/test_format.py

http://github.com/pydata/pandas
Python | 2944 lines | 2877 code | 53 blank | 14 comment | 50 complexity | 38377b1d5fb1564014011ef00c40c1d2 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. from __future__ import print_function
  2. # -*- coding: utf-8 -*-
  3. import re
  4. from pandas.compat import range, zip, lrange, StringIO, PY3, lzip, u
  5. import pandas.compat as compat
  6. import itertools
  7. import os
  8. import sys
  9. from textwrap import dedent
  10. import warnings
  11. from numpy import nan
  12. from numpy.random import randn
  13. import numpy as np
  14. from pandas import DataFrame, Series, Index, _np_version_under1p7, Timestamp, MultiIndex
  15. import pandas.core.format as fmt
  16. import pandas.util.testing as tm
  17. import pandas.core.common as com
  18. from pandas.util.terminal import get_terminal_size
  19. import pandas
  20. import pandas.tslib as tslib
  21. import pandas as pd
  22. from pandas.core.config import (set_option, get_option,
  23. option_context, reset_option)
  24. from datetime import datetime
  25. _frame = DataFrame(tm.getSeriesData())
  26. def curpath():
  27. pth, _ = os.path.split(os.path.abspath(__file__))
  28. return pth
  29. def has_info_repr(df):
  30. r = repr(df)
  31. c1 = r.split('\n')[0].startswith("<class")
  32. c2 = r.split('\n')[0].startswith(r"&lt;class") # _repr_html_
  33. return c1 or c2
  34. def has_non_verbose_info_repr(df):
  35. has_info = has_info_repr(df)
  36. r = repr(df)
  37. nv = len(r.split('\n')) == 4 # 1. <class>, 2. Index, 3. Columns, 4. dtype
  38. return has_info and nv
  39. def has_horizontally_truncated_repr(df):
  40. try: # Check header row
  41. fst_line = np.array(repr(df).splitlines()[0].split())
  42. cand_col = np.where(fst_line=='...')[0][0]
  43. except:
  44. return False
  45. # Make sure each row has this ... in the same place
  46. r = repr(df)
  47. for ix,l in enumerate(r.splitlines()):
  48. if not r.split()[cand_col] == '...':
  49. return False
  50. return True
  51. def has_vertically_truncated_repr(df):
  52. r = repr(df)
  53. only_dot_row = False
  54. for row in r.splitlines():
  55. if re.match('^[\.\ ]+$',row):
  56. only_dot_row = True
  57. return only_dot_row
  58. def has_truncated_repr(df):
  59. return has_horizontally_truncated_repr(df) or has_vertically_truncated_repr(df)
  60. def has_doubly_truncated_repr(df):
  61. return has_horizontally_truncated_repr(df) and has_vertically_truncated_repr(df)
  62. def has_expanded_repr(df):
  63. r = repr(df)
  64. for line in r.split('\n'):
  65. if line.endswith('\\'):
  66. return True
  67. return False
  68. class TestDataFrameFormatting(tm.TestCase):
  69. _multiprocess_can_split_ = True
  70. def setUp(self):
  71. self.warn_filters = warnings.filters
  72. warnings.filterwarnings('ignore',
  73. category=FutureWarning,
  74. module=".*format")
  75. self.frame = _frame.copy()
  76. def tearDown(self):
  77. warnings.filters = self.warn_filters
  78. def test_repr_embedded_ndarray(self):
  79. arr = np.empty(10, dtype=[('err', object)])
  80. for i in range(len(arr)):
  81. arr['err'][i] = np.random.randn(i)
  82. df = DataFrame(arr)
  83. repr(df['err'])
  84. repr(df)
  85. df.to_string()
  86. def test_eng_float_formatter(self):
  87. self.frame.ix[5] = 0
  88. fmt.set_eng_float_format()
  89. result = repr(self.frame)
  90. fmt.set_eng_float_format(use_eng_prefix=True)
  91. repr(self.frame)
  92. fmt.set_eng_float_format(accuracy=0)
  93. repr(self.frame)
  94. self.reset_display_options()
  95. def test_repr_tuples(self):
  96. buf = StringIO()
  97. df = DataFrame({'tups': lzip(range(10), range(10))})
  98. repr(df)
  99. df.to_string(col_space=10, buf=buf)
  100. def test_repr_truncation(self):
  101. max_len = 20
  102. with option_context("display.max_colwidth", max_len):
  103. df = DataFrame({'A': np.random.randn(10),
  104. 'B': [tm.rands(np.random.randint(max_len - 1,
  105. max_len + 1)) for i in range(10)]})
  106. r = repr(df)
  107. r = r[r.find('\n') + 1:]
  108. _strlen = fmt._strlen_func()
  109. for line, value in lzip(r.split('\n'), df['B']):
  110. if _strlen(value) + 1 > max_len:
  111. self.assertIn('...', line)
  112. else:
  113. self.assertNotIn('...', line)
  114. with option_context("display.max_colwidth", 999999):
  115. self.assertNotIn('...', repr(df))
  116. with option_context("display.max_colwidth", max_len + 2):
  117. self.assertNotIn('...', repr(df))
  118. def test_repr_chop_threshold(self):
  119. df = DataFrame([[0.1, 0.5],[0.5, -0.1]])
  120. pd.reset_option("display.chop_threshold") # default None
  121. self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
  122. with option_context("display.chop_threshold", 0.2 ):
  123. self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0')
  124. with option_context("display.chop_threshold", 0.6 ):
  125. self.assertEqual(repr(df), ' 0 1\n0 0 0\n1 0 0')
  126. with option_context("display.chop_threshold", None ):
  127. self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1')
  128. def test_repr_obeys_max_seq_limit(self):
  129. import pandas.core.common as com
  130. with option_context("display.max_seq_items",2000):
  131. self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000)
  132. with option_context("display.max_seq_items",5):
  133. self.assertTrue(len(com.pprint_thing(lrange(1000)))< 100)
  134. def test_repr_is_valid_construction_code(self):
  135. import pandas as pd
  136. # for the case of Index, where the repr is traditional rather then stylized
  137. idx = pd.Index(['a','b'])
  138. res = eval("pd."+repr(idx))
  139. tm.assert_series_equal(Series(res),Series(idx))
  140. def test_repr_should_return_str(self):
  141. # http://docs.python.org/py3k/reference/datamodel.html#object.__repr__
  142. # http://docs.python.org/reference/datamodel.html#object.__repr__
  143. # "...The return value must be a string object."
  144. # (str on py2.x, str (unicode) on py3)
  145. data = [8, 5, 3, 5]
  146. index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"),
  147. u("\u03c6")]
  148. cols = [u("\u03c8")]
  149. df = DataFrame(data, columns=cols, index=index1)
  150. self.assertTrue(type(df.__repr__()) == str) # both py2 / 3
  151. def test_repr_no_backslash(self):
  152. with option_context('mode.sim_interactive', True):
  153. df = DataFrame(np.random.randn(10, 4))
  154. self.assertTrue('\\' not in repr(df))
  155. def test_expand_frame_repr(self):
  156. df_small = DataFrame('hello', [0], [0])
  157. df_wide = DataFrame('hello', [0], lrange(10))
  158. df_tall = DataFrame('hello', lrange(30), lrange(5))
  159. with option_context('mode.sim_interactive', True):
  160. with option_context('display.max_columns', 10,
  161. 'display.width',20,
  162. 'display.max_rows', 20,
  163. 'display.show_dimensions', True):
  164. with option_context('display.expand_frame_repr', True):
  165. self.assertFalse(has_truncated_repr(df_small))
  166. self.assertFalse(has_expanded_repr(df_small))
  167. self.assertFalse(has_truncated_repr(df_wide))
  168. self.assertTrue(has_expanded_repr(df_wide))
  169. self.assertTrue(has_vertically_truncated_repr(df_tall))
  170. self.assertTrue(has_expanded_repr(df_tall))
  171. with option_context('display.expand_frame_repr', False):
  172. self.assertFalse(has_truncated_repr(df_small))
  173. self.assertFalse(has_expanded_repr(df_small))
  174. self.assertFalse(has_horizontally_truncated_repr(df_wide))
  175. self.assertFalse(has_expanded_repr(df_wide))
  176. self.assertTrue(has_vertically_truncated_repr(df_tall))
  177. self.assertFalse(has_expanded_repr(df_tall))
  178. def test_repr_non_interactive(self):
  179. # in non interactive mode, there can be no dependency on the
  180. # result of terminal auto size detection
  181. df = DataFrame('hello', lrange(1000), lrange(5))
  182. with option_context('mode.sim_interactive', False,
  183. 'display.width', 0,
  184. 'display.height', 0,
  185. 'display.max_rows',5000):
  186. self.assertFalse(has_truncated_repr(df))
  187. self.assertFalse(has_expanded_repr(df))
  188. def test_repr_max_columns_max_rows(self):
  189. term_width, term_height = get_terminal_size()
  190. if term_width < 10 or term_height < 10:
  191. raise nose.SkipTest("terminal size too small, "
  192. "{0} x {1}".format(term_width, term_height))
  193. def mkframe(n):
  194. index = ['%05d' % i for i in range(n)]
  195. return DataFrame(0, index, index)
  196. df6 = mkframe(6)
  197. df10 = mkframe(10)
  198. with option_context('mode.sim_interactive', True):
  199. with option_context('display.width', term_width * 2):
  200. with option_context('display.max_rows', 5,
  201. 'display.max_columns', 5):
  202. self.assertFalse(has_expanded_repr(mkframe(4)))
  203. self.assertFalse(has_expanded_repr(mkframe(5)))
  204. self.assertFalse(has_expanded_repr(df6))
  205. self.assertTrue(has_doubly_truncated_repr(df6))
  206. with option_context('display.max_rows', 20,
  207. 'display.max_columns', 10):
  208. # Out off max_columns boundary, but no extending
  209. # since not exceeding width
  210. self.assertFalse(has_expanded_repr(df6))
  211. self.assertFalse(has_truncated_repr(df6))
  212. with option_context('display.max_rows', 9,
  213. 'display.max_columns', 10):
  214. # out vertical bounds can not result in exanded repr
  215. self.assertFalse(has_expanded_repr(df10))
  216. self.assertTrue(has_vertically_truncated_repr(df10))
  217. # width=None in terminal, auto detection
  218. with option_context('display.max_columns', 100,
  219. 'display.max_rows', term_width * 20,
  220. 'display.width', None):
  221. df = mkframe((term_width // 7) - 2)
  222. self.assertFalse(has_expanded_repr(df))
  223. df = mkframe((term_width // 7) + 2)
  224. com.pprint_thing(df._repr_fits_horizontal_())
  225. self.assertTrue(has_expanded_repr(df))
  226. def test_to_string_repr_unicode(self):
  227. buf = StringIO()
  228. unicode_values = [u('\u03c3')] * 10
  229. unicode_values = np.array(unicode_values, dtype=object)
  230. df = DataFrame({'unicode': unicode_values})
  231. df.to_string(col_space=10, buf=buf)
  232. # it works!
  233. repr(df)
  234. idx = Index(['abc', u('\u03c3a'), 'aegdvg'])
  235. ser = Series(np.random.randn(len(idx)), idx)
  236. rs = repr(ser).split('\n')
  237. line_len = len(rs[0])
  238. for line in rs[1:]:
  239. try:
  240. line = line.decode(get_option("display.encoding"))
  241. except:
  242. pass
  243. if not line.startswith('dtype:'):
  244. self.assertEqual(len(line), line_len)
  245. # it works even if sys.stdin in None
  246. _stdin= sys.stdin
  247. try:
  248. sys.stdin = None
  249. repr(df)
  250. finally:
  251. sys.stdin = _stdin
  252. def test_to_string_unicode_columns(self):
  253. df = DataFrame({u('\u03c3'): np.arange(10.)})
  254. buf = StringIO()
  255. df.to_string(buf=buf)
  256. buf.getvalue()
  257. buf = StringIO()
  258. df.info(buf=buf)
  259. buf.getvalue()
  260. result = self.frame.to_string()
  261. tm.assert_isinstance(result, compat.text_type)
  262. def test_to_string_utf8_columns(self):
  263. n = u("\u05d0").encode('utf-8')
  264. with option_context('display.max_rows', 1):
  265. df = pd.DataFrame([1, 2], columns=[n])
  266. repr(df)
  267. def test_to_string_unicode_two(self):
  268. dm = DataFrame({u('c/\u03c3'): []})
  269. buf = StringIO()
  270. dm.to_string(buf)
  271. def test_to_string_unicode_three(self):
  272. dm = DataFrame(['\xc2'])
  273. buf = StringIO()
  274. dm.to_string(buf)
  275. def test_to_string_with_formatters(self):
  276. df = DataFrame({'int': [1, 2, 3],
  277. 'float': [1.0, 2.0, 3.0],
  278. 'object': [(1, 2), True, False]},
  279. columns=['int', 'float', 'object'])
  280. formatters = [('int', lambda x: '0x%x' % x),
  281. ('float', lambda x: '[% 4.1f]' % x),
  282. ('object', lambda x: '-%s-' % str(x))]
  283. result = df.to_string(formatters=dict(formatters))
  284. result2 = df.to_string(formatters=lzip(*formatters)[1])
  285. self.assertEqual(result, (' int float object\n'
  286. '0 0x1 [ 1.0] -(1, 2)-\n'
  287. '1 0x2 [ 2.0] -True-\n'
  288. '2 0x3 [ 3.0] -False-'))
  289. self.assertEqual(result, result2)
  290. def test_to_string_with_formatters_unicode(self):
  291. df = DataFrame({u('c/\u03c3'): [1, 2, 3]})
  292. result = df.to_string(formatters={u('c/\u03c3'):
  293. lambda x: '%s' % x})
  294. self.assertEqual(result, u(' c/\u03c3\n') +
  295. '0 1\n1 2\n2 3')
  296. def test_to_string_buffer_all_unicode(self):
  297. buf = StringIO()
  298. empty = DataFrame({u('c/\u03c3'): Series()})
  299. nonempty = DataFrame({u('c/\u03c3'): Series([1, 2, 3])})
  300. print(empty, file=buf)
  301. print(nonempty, file=buf)
  302. # this should work
  303. buf.getvalue()
  304. def test_to_string_with_col_space(self):
  305. df = DataFrame(np.random.random(size=(1, 3)))
  306. c10 = len(df.to_string(col_space=10).split("\n")[1])
  307. c20 = len(df.to_string(col_space=20).split("\n")[1])
  308. c30 = len(df.to_string(col_space=30).split("\n")[1])
  309. self.assertTrue(c10 < c20 < c30)
  310. def test_to_string_truncate_indices(self):
  311. for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex,
  312. tm.makeDateIndex, tm.makePeriodIndex ]:
  313. for column in [ tm.makeStringIndex ]:
  314. for h in [10,20]:
  315. for w in [10,20]:
  316. with option_context("display.expand_frame_repr",False):
  317. df = DataFrame(index=index(h), columns=column(w))
  318. with option_context("display.max_rows", 15):
  319. if h == 20:
  320. self.assertTrue(has_vertically_truncated_repr(df))
  321. else:
  322. self.assertFalse(has_vertically_truncated_repr(df))
  323. with option_context("display.max_columns", 15):
  324. if w == 20:
  325. self.assertTrue(has_horizontally_truncated_repr(df))
  326. else:
  327. self.assertFalse(has_horizontally_truncated_repr(df))
  328. with option_context("display.max_rows", 15,"display.max_columns", 15):
  329. if h == 20 and w == 20:
  330. self.assertTrue(has_doubly_truncated_repr(df))
  331. else:
  332. self.assertFalse(has_doubly_truncated_repr(df))
  333. def test_to_string_truncate_multilevel(self):
  334. arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
  335. ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
  336. df = pd.DataFrame(index=arrays,columns=arrays)
  337. with option_context("display.max_rows", 7,"display.max_columns", 7):
  338. self.assertTrue(has_doubly_truncated_repr(df))
  339. def test_to_html_with_col_space(self):
  340. def check_with_width(df, col_space):
  341. import re
  342. # check that col_space affects HTML generation
  343. # and be very brittle about it.
  344. html = df.to_html(col_space=col_space)
  345. hdrs = [x for x in html.split("\n") if re.search("<th[>\s]", x)]
  346. self.assertTrue(len(hdrs) > 0)
  347. for h in hdrs:
  348. self.assertTrue("min-width" in h)
  349. self.assertTrue(str(col_space) in h)
  350. df = DataFrame(np.random.random(size=(1, 3)))
  351. check_with_width(df, 30)
  352. check_with_width(df, 50)
  353. def test_to_html_with_empty_string_label(self):
  354. # GH3547, to_html regards empty string labels as repeated labels
  355. data = {'c1': ['a', 'b'], 'c2': ['a', ''], 'data': [1, 2]}
  356. df = DataFrame(data).set_index(['c1', 'c2'])
  357. res = df.to_html()
  358. self.assertTrue("rowspan" not in res)
  359. def test_to_html_unicode(self):
  360. # it works!
  361. df = DataFrame({u('\u03c3'): np.arange(10.)})
  362. df.to_html()
  363. df = DataFrame({'A': [u('\u03c3')]})
  364. df.to_html()
  365. def test_to_html_escaped(self):
  366. a = 'str<ing1 &amp;'
  367. b = 'stri>ng2 &amp;'
  368. test_dict = {'co<l1': {a: "<type 'str'>",
  369. b: "<type 'str'>"},
  370. 'co>l2':{a: "<type 'str'>",
  371. b: "<type 'str'>"}}
  372. rs = pd.DataFrame(test_dict).to_html()
  373. xp = """<table border="1" class="dataframe">
  374. <thead>
  375. <tr style="text-align: right;">
  376. <th></th>
  377. <th>co&lt;l1</th>
  378. <th>co&gt;l2</th>
  379. </tr>
  380. </thead>
  381. <tbody>
  382. <tr>
  383. <th>str&lt;ing1 &amp;amp;</th>
  384. <td> &lt;type 'str'&gt;</td>
  385. <td> &lt;type 'str'&gt;</td>
  386. </tr>
  387. <tr>
  388. <th>stri&gt;ng2 &amp;amp;</th>
  389. <td> &lt;type 'str'&gt;</td>
  390. <td> &lt;type 'str'&gt;</td>
  391. </tr>
  392. </tbody>
  393. </table>"""
  394. self.assertEqual(xp, rs)
  395. def test_to_html_escape_disabled(self):
  396. a = 'str<ing1 &amp;'
  397. b = 'stri>ng2 &amp;'
  398. test_dict = {'co<l1': {a: "<b>bold</b>",
  399. b: "<b>bold</b>"},
  400. 'co>l2': {a: "<b>bold</b>",
  401. b: "<b>bold</b>"}}
  402. rs = pd.DataFrame(test_dict).to_html(escape=False)
  403. xp = """<table border="1" class="dataframe">
  404. <thead>
  405. <tr style="text-align: right;">
  406. <th></th>
  407. <th>co<l1</th>
  408. <th>co>l2</th>
  409. </tr>
  410. </thead>
  411. <tbody>
  412. <tr>
  413. <th>str<ing1 &amp;</th>
  414. <td> <b>bold</b></td>
  415. <td> <b>bold</b></td>
  416. </tr>
  417. <tr>
  418. <th>stri>ng2 &amp;</th>
  419. <td> <b>bold</b></td>
  420. <td> <b>bold</b></td>
  421. </tr>
  422. </tbody>
  423. </table>"""
  424. self.assertEqual(xp, rs)
  425. def test_to_html_multiindex_sparsify_false_multi_sparse(self):
  426. with option_context('display.multi_sparse', False):
  427. index = pd.MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]],
  428. names=['foo', None])
  429. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
  430. result = df.to_html()
  431. expected = """\
  432. <table border="1" class="dataframe">
  433. <thead>
  434. <tr style="text-align: right;">
  435. <th></th>
  436. <th></th>
  437. <th>0</th>
  438. <th>1</th>
  439. </tr>
  440. <tr>
  441. <th>foo</th>
  442. <th></th>
  443. <th></th>
  444. <th></th>
  445. </tr>
  446. </thead>
  447. <tbody>
  448. <tr>
  449. <th>0</th>
  450. <th>0</th>
  451. <td> 0</td>
  452. <td> 1</td>
  453. </tr>
  454. <tr>
  455. <th>0</th>
  456. <th>1</th>
  457. <td> 2</td>
  458. <td> 3</td>
  459. </tr>
  460. <tr>
  461. <th>1</th>
  462. <th>0</th>
  463. <td> 4</td>
  464. <td> 5</td>
  465. </tr>
  466. <tr>
  467. <th>1</th>
  468. <th>1</th>
  469. <td> 6</td>
  470. <td> 7</td>
  471. </tr>
  472. </tbody>
  473. </table>"""
  474. self.assertEqual(result, expected)
  475. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]],
  476. columns=index[::2], index=index)
  477. result = df.to_html()
  478. expected = """\
  479. <table border="1" class="dataframe">
  480. <thead>
  481. <tr>
  482. <th></th>
  483. <th>foo</th>
  484. <th>0</th>
  485. <th>1</th>
  486. </tr>
  487. <tr>
  488. <th></th>
  489. <th></th>
  490. <th>0</th>
  491. <th>0</th>
  492. </tr>
  493. <tr>
  494. <th>foo</th>
  495. <th></th>
  496. <th></th>
  497. <th></th>
  498. </tr>
  499. </thead>
  500. <tbody>
  501. <tr>
  502. <th>0</th>
  503. <th>0</th>
  504. <td> 0</td>
  505. <td> 1</td>
  506. </tr>
  507. <tr>
  508. <th>0</th>
  509. <th>1</th>
  510. <td> 2</td>
  511. <td> 3</td>
  512. </tr>
  513. <tr>
  514. <th>1</th>
  515. <th>0</th>
  516. <td> 4</td>
  517. <td> 5</td>
  518. </tr>
  519. <tr>
  520. <th>1</th>
  521. <th>1</th>
  522. <td> 6</td>
  523. <td> 7</td>
  524. </tr>
  525. </tbody>
  526. </table>"""
  527. self.assertEqual(result, expected)
  528. def test_to_html_multiindex_sparsify(self):
  529. index = pd.MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]],
  530. names=['foo', None])
  531. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
  532. result = df.to_html()
  533. expected = """<table border="1" class="dataframe">
  534. <thead>
  535. <tr style="text-align: right;">
  536. <th></th>
  537. <th></th>
  538. <th>0</th>
  539. <th>1</th>
  540. </tr>
  541. <tr>
  542. <th>foo</th>
  543. <th></th>
  544. <th></th>
  545. <th></th>
  546. </tr>
  547. </thead>
  548. <tbody>
  549. <tr>
  550. <th rowspan="2" valign="top">0</th>
  551. <th>0</th>
  552. <td> 0</td>
  553. <td> 1</td>
  554. </tr>
  555. <tr>
  556. <th>1</th>
  557. <td> 2</td>
  558. <td> 3</td>
  559. </tr>
  560. <tr>
  561. <th rowspan="2" valign="top">1</th>
  562. <th>0</th>
  563. <td> 4</td>
  564. <td> 5</td>
  565. </tr>
  566. <tr>
  567. <th>1</th>
  568. <td> 6</td>
  569. <td> 7</td>
  570. </tr>
  571. </tbody>
  572. </table>"""
  573. self.assertEqual(result, expected)
  574. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]],
  575. columns=index[::2], index=index)
  576. result = df.to_html()
  577. expected = """\
  578. <table border="1" class="dataframe">
  579. <thead>
  580. <tr>
  581. <th></th>
  582. <th>foo</th>
  583. <th>0</th>
  584. <th>1</th>
  585. </tr>
  586. <tr>
  587. <th></th>
  588. <th></th>
  589. <th>0</th>
  590. <th>0</th>
  591. </tr>
  592. <tr>
  593. <th>foo</th>
  594. <th></th>
  595. <th></th>
  596. <th></th>
  597. </tr>
  598. </thead>
  599. <tbody>
  600. <tr>
  601. <th rowspan="2" valign="top">0</th>
  602. <th>0</th>
  603. <td> 0</td>
  604. <td> 1</td>
  605. </tr>
  606. <tr>
  607. <th>1</th>
  608. <td> 2</td>
  609. <td> 3</td>
  610. </tr>
  611. <tr>
  612. <th rowspan="2" valign="top">1</th>
  613. <th>0</th>
  614. <td> 4</td>
  615. <td> 5</td>
  616. </tr>
  617. <tr>
  618. <th>1</th>
  619. <td> 6</td>
  620. <td> 7</td>
  621. </tr>
  622. </tbody>
  623. </table>"""
  624. self.assertEqual(result, expected)
  625. def test_to_html_index_formatter(self):
  626. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]],
  627. columns=['foo', None], index=lrange(4))
  628. f = lambda x: 'abcd'[x]
  629. result = df.to_html(formatters={'__index__': f})
  630. expected = """\
  631. <table border="1" class="dataframe">
  632. <thead>
  633. <tr style="text-align: right;">
  634. <th></th>
  635. <th>foo</th>
  636. <th>None</th>
  637. </tr>
  638. </thead>
  639. <tbody>
  640. <tr>
  641. <th>a</th>
  642. <td> 0</td>
  643. <td> 1</td>
  644. </tr>
  645. <tr>
  646. <th>b</th>
  647. <td> 2</td>
  648. <td> 3</td>
  649. </tr>
  650. <tr>
  651. <th>c</th>
  652. <td> 4</td>
  653. <td> 5</td>
  654. </tr>
  655. <tr>
  656. <th>d</th>
  657. <td> 6</td>
  658. <td> 7</td>
  659. </tr>
  660. </tbody>
  661. </table>"""
  662. self.assertEqual(result, expected)
  663. def test_to_html_regression_GH6098(self):
  664. df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')],
  665. u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')],
  666. 'données1': np.random.randn(5),
  667. 'données2': np.random.randn(5)})
  668. # it works
  669. df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_()
  670. def test_to_html_truncate(self):
  671. index = pd.DatetimeIndex(start='20010101',freq='D',periods=20)
  672. df = pd.DataFrame(index=index,columns=range(20))
  673. fmt.set_option('display.max_rows',8)
  674. fmt.set_option('display.max_columns',4)
  675. result = df._repr_html_()
  676. expected = '''\
  677. <div style="max-height:1000px;max-width:1500px;overflow:auto;">
  678. <table border="1" class="dataframe">
  679. <thead>
  680. <tr style="text-align: right;">
  681. <th></th>
  682. <th>0</th>
  683. <th>1</th>
  684. <th>...</th>
  685. <th>18</th>
  686. <th>19</th>
  687. </tr>
  688. </thead>
  689. <tbody>
  690. <tr>
  691. <th>2001-01-01</th>
  692. <td> NaN</td>
  693. <td> NaN</td>
  694. <td>...</td>
  695. <td> NaN</td>
  696. <td> NaN</td>
  697. </tr>
  698. <tr>
  699. <th>2001-01-02</th>
  700. <td> NaN</td>
  701. <td> NaN</td>
  702. <td>...</td>
  703. <td> NaN</td>
  704. <td> NaN</td>
  705. </tr>
  706. <tr>
  707. <th>2001-01-03</th>
  708. <td> NaN</td>
  709. <td> NaN</td>
  710. <td>...</td>
  711. <td> NaN</td>
  712. <td> NaN</td>
  713. </tr>
  714. <tr>
  715. <th>2001-01-04</th>
  716. <td> NaN</td>
  717. <td> NaN</td>
  718. <td>...</td>
  719. <td> NaN</td>
  720. <td> NaN</td>
  721. </tr>
  722. <tr>
  723. <th>...</th>
  724. <td>...</td>
  725. <td>...</td>
  726. <td>...</td>
  727. <td>...</td>
  728. <td>...</td>
  729. </tr>
  730. <tr>
  731. <th>2001-01-17</th>
  732. <td> NaN</td>
  733. <td> NaN</td>
  734. <td>...</td>
  735. <td> NaN</td>
  736. <td> NaN</td>
  737. </tr>
  738. <tr>
  739. <th>2001-01-18</th>
  740. <td> NaN</td>
  741. <td> NaN</td>
  742. <td>...</td>
  743. <td> NaN</td>
  744. <td> NaN</td>
  745. </tr>
  746. <tr>
  747. <th>2001-01-19</th>
  748. <td> NaN</td>
  749. <td> NaN</td>
  750. <td>...</td>
  751. <td> NaN</td>
  752. <td> NaN</td>
  753. </tr>
  754. <tr>
  755. <th>2001-01-20</th>
  756. <td> NaN</td>
  757. <td> NaN</td>
  758. <td>...</td>
  759. <td> NaN</td>
  760. <td> NaN</td>
  761. </tr>
  762. </tbody>
  763. </table>
  764. <p>20 rows × 20 columns</p>
  765. </div>'''
  766. if sys.version_info[0] < 3:
  767. expected = expected.decode('utf-8')
  768. self.assertEqual(result, expected)
  769. def test_to_html_truncate_multi_index(self):
  770. arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
  771. ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
  772. df = pd.DataFrame(index=arrays,columns=arrays)
  773. fmt.set_option('display.max_rows',7)
  774. fmt.set_option('display.max_columns',7)
  775. result = df._repr_html_()
  776. expected = '''\
  777. <div style="max-height:1000px;max-width:1500px;overflow:auto;">
  778. <table border="1" class="dataframe">
  779. <thead>
  780. <tr>
  781. <th></th>
  782. <th></th>
  783. <th colspan="2" halign="left">bar</th>
  784. <th>baz</th>
  785. <th>...</th>
  786. <th>foo</th>
  787. <th colspan="2" halign="left">qux</th>
  788. </tr>
  789. <tr>
  790. <th></th>
  791. <th></th>
  792. <th>one</th>
  793. <th>two</th>
  794. <th>one</th>
  795. <th>...</th>
  796. <th>two</th>
  797. <th>one</th>
  798. <th>two</th>
  799. </tr>
  800. </thead>
  801. <tbody>
  802. <tr>
  803. <th rowspan="2" valign="top">bar</th>
  804. <th>one</th>
  805. <td> NaN</td>
  806. <td> NaN</td>
  807. <td> NaN</td>
  808. <td>...</td>
  809. <td> NaN</td>
  810. <td> NaN</td>
  811. <td> NaN</td>
  812. </tr>
  813. <tr>
  814. <th>two</th>
  815. <td> NaN</td>
  816. <td> NaN</td>
  817. <td> NaN</td>
  818. <td>...</td>
  819. <td> NaN</td>
  820. <td> NaN</td>
  821. <td> NaN</td>
  822. </tr>
  823. <tr>
  824. <th>baz</th>
  825. <th>one</th>
  826. <td> NaN</td>
  827. <td> NaN</td>
  828. <td> NaN</td>
  829. <td>...</td>
  830. <td> NaN</td>
  831. <td> NaN</td>
  832. <td> NaN</td>
  833. </tr>
  834. <tr>
  835. <th>...</th>
  836. <th>...</th>
  837. <td>...</td>
  838. <td>...</td>
  839. <td>...</td>
  840. <td>...</td>
  841. <td>...</td>
  842. <td>...</td>
  843. <td>...</td>
  844. </tr>
  845. <tr>
  846. <th>foo</th>
  847. <th>two</th>
  848. <td> NaN</td>
  849. <td> NaN</td>
  850. <td> NaN</td>
  851. <td>...</td>
  852. <td> NaN</td>
  853. <td> NaN</td>
  854. <td> NaN</td>
  855. </tr>
  856. <tr>
  857. <th rowspan="2" valign="top">qux</th>
  858. <th>one</th>
  859. <td> NaN</td>
  860. <td> NaN</td>
  861. <td> NaN</td>
  862. <td>...</td>
  863. <td> NaN</td>
  864. <td> NaN</td>
  865. <td> NaN</td>
  866. </tr>
  867. <tr>
  868. <th>two</th>
  869. <td> NaN</td>
  870. <td> NaN</td>
  871. <td> NaN</td>
  872. <td>...</td>
  873. <td> NaN</td>
  874. <td> NaN</td>
  875. <td> NaN</td>
  876. </tr>
  877. </tbody>
  878. </table>
  879. <p>8 rows × 8 columns</p>
  880. </div>'''
  881. if sys.version_info[0] < 3:
  882. expected = expected.decode('utf-8')
  883. self.assertEqual(result, expected)
  884. def test_to_html_truncate_multi_index_sparse_off(self):
  885. arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
  886. ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
  887. df = pd.DataFrame(index=arrays,columns=arrays)
  888. fmt.set_option('display.max_rows',7)
  889. fmt.set_option('display.max_columns',7)
  890. fmt.set_option('display.multi_sparse',False)
  891. result = df._repr_html_()
  892. expected = '''\
  893. <div style="max-height:1000px;max-width:1500px;overflow:auto;">
  894. <table border="1" class="dataframe">
  895. <thead>
  896. <tr>
  897. <th></th>
  898. <th></th>
  899. <th>bar</th>
  900. <th>bar</th>
  901. <th>baz</th>
  902. <th>...</th>
  903. <th>foo</th>
  904. <th>qux</th>
  905. <th>qux</th>
  906. </tr>
  907. <tr>
  908. <th></th>
  909. <th></th>
  910. <th>one</th>
  911. <th>two</th>
  912. <th>one</th>
  913. <th>...</th>
  914. <th>two</th>
  915. <th>one</th>
  916. <th>two</th>
  917. </tr>
  918. </thead>
  919. <tbody>
  920. <tr>
  921. <th>bar</th>
  922. <th>one</th>
  923. <td> NaN</td>
  924. <td> NaN</td>
  925. <td> NaN</td>
  926. <td>...</td>
  927. <td> NaN</td>
  928. <td> NaN</td>
  929. <td> NaN</td>
  930. </tr>
  931. <tr>
  932. <th>bar</th>
  933. <th>two</th>
  934. <td> NaN</td>
  935. <td> NaN</td>
  936. <td> NaN</td>
  937. <td>...</td>
  938. <td> NaN</td>
  939. <td> NaN</td>
  940. <td> NaN</td>
  941. </tr>
  942. <tr>
  943. <th>baz</th>
  944. <th>one</th>
  945. <td> NaN</td>
  946. <td> NaN</td>
  947. <td> NaN</td>
  948. <td>...</td>
  949. <td> NaN</td>
  950. <td> NaN</td>
  951. <td> NaN</td>
  952. </tr>
  953. <tr>
  954. <th>foo</th>
  955. <th>two</th>
  956. <td> NaN</td>
  957. <td> NaN</td>
  958. <td> NaN</td>
  959. <td>...</td>
  960. <td> NaN</td>
  961. <td> NaN</td>
  962. <td> NaN</td>
  963. </tr>
  964. <tr>
  965. <th>qux</th>
  966. <th>one</th>
  967. <td> NaN</td>
  968. <td> NaN</td>
  969. <td> NaN</td>
  970. <td>...</td>
  971. <td> NaN</td>
  972. <td> NaN</td>
  973. <td> NaN</td>
  974. </tr>
  975. <tr>
  976. <th>qux</th>
  977. <th>two</th>
  978. <td> NaN</td>
  979. <td> NaN</td>
  980. <td> NaN</td>
  981. <td>...</td>
  982. <td> NaN</td>
  983. <td> NaN</td>
  984. <td> NaN</td>
  985. </tr>
  986. </tbody>
  987. </table>
  988. <p>8 rows × 8 columns</p>
  989. </div>'''
  990. if sys.version_info[0] < 3:
  991. expected = expected.decode('utf-8')
  992. self.assertEqual(result, expected)
  993. def test_nonunicode_nonascii_alignment(self):
  994. df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]])
  995. rep_str = df.to_string()
  996. lines = rep_str.split('\n')
  997. self.assertEqual(len(lines[1]), len(lines[2]))
  998. def test_unicode_problem_decoding_as_ascii(self):
  999. dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})})
  1000. compat.text_type(dm.to_string())
  1001. def test_string_repr_encoding(self):
  1002. filepath = tm.get_data_path('unicode_series.csv')
  1003. df = pandas.read_csv(filepath, header=None, encoding='latin1')
  1004. repr(df)
  1005. repr(df[1])
  1006. def test_repr_corner(self):
  1007. # representing infs poses no problems
  1008. df = DataFrame({'foo': np.inf * np.empty(10)})
  1009. foo = repr(df)
  1010. def test_frame_info_encoding(self):
  1011. index = ['\'Til There Was You (1997)',
  1012. 'ldum klaka (Cold Fever) (1994)']
  1013. fmt.set_option('display.max_rows', 1)
  1014. df = DataFrame(columns=['a', 'b', 'c'], index=index)
  1015. repr(df)
  1016. repr(df.T)
  1017. fmt.set_option('display.max_rows', 200)
  1018. def test_pprint_thing(self):
  1019. import nose
  1020. from pandas.core.common import pprint_thing as pp_t
  1021. if PY3:
  1022. raise nose.SkipTest("doesn't work on Python 3")
  1023. self.assertEqual(pp_t('a') , u('a'))
  1024. self.assertEqual(pp_t(u('a')) , u('a'))
  1025. self.assertEqual(pp_t(None) , 'None')
  1026. self.assertEqual(pp_t(u('\u05d0'), quote_strings=True),
  1027. u("u'\u05d0'"))
  1028. self.assertEqual(pp_t(u('\u05d0'), quote_strings=False),
  1029. u('\u05d0'))
  1030. self.assertEqual(pp_t((u('\u05d0'),
  1031. u('\u05d1')), quote_strings=True),
  1032. u("(u'\u05d0', u'\u05d1')"))
  1033. self.assertEqual(pp_t((u('\u05d0'), (u('\u05d1'),
  1034. u('\u05d2'))),
  1035. quote_strings=True),
  1036. u("(u'\u05d0', (u'\u05d1', u'\u05d2'))"))
  1037. self.assertEqual(pp_t(('foo', u('\u05d0'), (u('\u05d0'),
  1038. u('\u05d0'))),
  1039. quote_strings=True),
  1040. u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))"))
  1041. # escape embedded tabs in string
  1042. # GH #2038
  1043. self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t",)))
  1044. def test_wide_repr(self):
  1045. with option_context('mode.sim_interactive', True, 'display.show_dimensions', True):
  1046. col = lambda l, k: [tm.rands(k) for _ in range(l)]
  1047. max_cols = get_option('display.max_columns')
  1048. df = DataFrame([col(max_cols - 1, 25) for _ in range(10)])
  1049. set_option('display.expand_frame_repr', False)
  1050. rep_str = repr(df)
  1051. assert "10 rows x %d columns" % (max_cols - 1) in rep_str
  1052. set_option('display.expand_frame_repr', True)
  1053. wide_repr = repr(df)
  1054. self.assertNotEqual(rep_str, wide_repr)
  1055. with option_context('display.width', 120):
  1056. wider_repr = repr(df)
  1057. self.assertTrue(len(wider_repr) < len(wide_repr))
  1058. reset_option('display.expand_frame_repr')
  1059. def test_wide_repr_wide_columns(self):
  1060. with option_context('mode.sim_interactive', True):
  1061. df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90])
  1062. rep_str = repr(df)
  1063. self.assertEqual(len(rep_str.splitlines()), 20)
  1064. def test_wide_repr_named(self):
  1065. with option_context('mode.sim_interactive', True):
  1066. col = lambda l, k: [tm.rands(k) for _ in range(l)]
  1067. max_cols = get_option('display.max_columns')
  1068. df = DataFrame([col(max_cols-1, 25) for _ in range(10)])
  1069. df.index.name = 'DataFrame Index'
  1070. set_option('display.expand_frame_repr', False)
  1071. rep_str = repr(df)
  1072. set_option('display.expand_frame_repr', True)
  1073. wide_repr = repr(df)
  1074. self.assertNotEqual(rep_str, wide_repr)
  1075. with option_context('display.width', 150):
  1076. wider_repr = repr(df)
  1077. self.assertTrue(len(wider_repr) < len(wide_repr))
  1078. for line in wide_repr.splitlines()[1::13]:
  1079. self.assertIn('DataFrame Index', line)
  1080. reset_option('display.expand_frame_repr')
  1081. def test_wide_repr_multiindex(self):
  1082. with option_context('mode.sim_interactive', True):
  1083. col = lambda l, k: [tm.rands(k) for _ in range(l)]
  1084. midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
  1085. np.array(col(10, 5))])
  1086. max_cols = get_option('display.max_columns')
  1087. df = DataFrame([col(max_cols-1, 25) for _ in range(10)],
  1088. index=midx)
  1089. df.index.names = ['Level 0', 'Level 1']
  1090. set_option('display.expand_frame_repr', False)
  1091. rep_str = repr(df)
  1092. set_option('display.expand_frame_repr', True)
  1093. wide_repr = repr(df)
  1094. self.assertNotEqual(rep_str, wide_repr)
  1095. with option_context('display.width', 150):
  1096. wider_repr = repr(df)
  1097. self.assertTrue(len(wider_repr) < len(wide_repr))
  1098. for line in wide_repr.splitlines()[1::13]:
  1099. self.assertIn('Level 0 Level 1', line)
  1100. reset_option('display.expand_frame_repr')
  1101. def test_wide_repr_multiindex_cols(self):
  1102. with option_context('mode.sim_interactive', True):
  1103. max_cols = get_option('display.max_columns')
  1104. col = lambda l, k: [tm.rands(k) for _ in range(l)]
  1105. midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)),
  1106. np.array(col(10, 5))])
  1107. mcols = pandas.MultiIndex.from_arrays([np.array(col(max_cols-1, 3)),
  1108. np.array(col(max_cols-1, 3))])
  1109. df = DataFrame([col(max_cols-1, 25) for _ in range(10)],
  1110. index=midx, columns=mcols)
  1111. df.index.names = ['Level 0', 'Level 1']
  1112. set_option('display.expand_frame_repr', False)
  1113. rep_str = repr(df)
  1114. set_option('display.expand_frame_repr', True)
  1115. wide_repr = repr(df)
  1116. self.assertNotEqual(rep_str, wide_repr)
  1117. with option_context('display.width', 150):
  1118. wider_repr = repr(df)
  1119. self.assertTrue(len(wider_repr) < len(wide_repr))
  1120. reset_option('display.expand_frame_repr')
  1121. def test_wide_repr_unicode(self):
  1122. with option_context('mode.sim_interactive', True):
  1123. col = lambda l, k: [tm.randu(k) for _ in range(l)]
  1124. max_cols = get_option('display.max_columns')
  1125. df = DataFrame([col(max_cols-1, 25) for _ in range(10)])
  1126. set_option('display.expand_frame_repr', False)
  1127. rep_str = repr(df)
  1128. set_option('display.expand_frame_repr', True)
  1129. wide_repr = repr(df)
  1130. self.assertNotEqual(rep_str, wide_repr)
  1131. with option_context('display.width', 150):
  1132. wider_repr = repr(df)
  1133. self.assertTrue(len(wider_repr) < len(wide_repr))
  1134. reset_option('display.expand_frame_repr')
  1135. def test_wide_repr_wide_long_columns(self):
  1136. with option_context('mode.sim_interactive', True):
  1137. df = DataFrame(
  1138. {'a': ['a' * 30, 'b' * 30], 'b': ['c' * 70, 'd' * 80]})
  1139. result = repr(df)
  1140. self.assertTrue('ccccc' in result)
  1141. self.assertTrue('ddddd' in result)
  1142. def test_long_series(self):
  1143. n = 1000
  1144. s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in range(n)], dtype='int64')
  1145. import re
  1146. str_rep = str(s)
  1147. nmatches = len(re.findall('dtype',str_rep))
  1148. self.assertEqual(nmatches, 1)
  1149. def test_index_with_nan(self):
  1150. # GH 2850
  1151. df = DataFrame({'id1': {0: '1a3', 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
  1152. 'id3': {0: '78d', 1: '79d'}, 'value': {0: 123, 1: 64}})
  1153. # multi-index
  1154. y = df.set_index(['id1', 'id2', 'id3'])
  1155. result = y.to_string()
  1156. expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64')
  1157. self.assertEqual(result, expected)
  1158. # index
  1159. y = df.set_index('id2')
  1160. result = y.to_string()
  1161. expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64')
  1162. self.assertEqual(result, expected)
  1163. # with append (this failed in 0.12)
  1164. y = df.set_index(['id1', 'id2']).set_index('id3', append=True)
  1165. result = y.to_string()
  1166. expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64')
  1167. self.assertEqual(result, expected)
  1168. # all-nan in mi
  1169. df2 = df.copy()
  1170. df2.ix[:,'id2'] = np.nan
  1171. y = df2.set_index('id2')
  1172. result = y.to_string()
  1173. expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64')
  1174. self.assertEqual(result, expected)
  1175. # partial nan in mi
  1176. df2 = df.copy()
  1177. df2.ix[:,'id2'] = np.nan
  1178. y = df2.set_index(['id2','id3'])
  1179. result = y.to_string()
  1180. expected = u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64')
  1181. self.assertEqual(result, expected)
  1182. df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
  1183. 'id3': {0: np.nan, 1: '79d'}, 'value': {0: 123, 1: 64}})
  1184. y = df.set_index(['id1','id2','id3'])
  1185. result = y.to_string()
  1186. expected = u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64')
  1187. self.assertEqual(result, expected)
  1188. def test_to_string(self):
  1189. from pandas import read_table
  1190. import re
  1191. # big mixed
  1192. biggie = DataFrame({'A': randn(200),
  1193. 'B': tm.makeStringIndex(200)},
  1194. index=lrange(200))
  1195. biggie['A'][:20] = nan
  1196. biggie['B'][:20] = nan
  1197. s = biggie.to_string()
  1198. buf = StringIO()
  1199. retval = biggie.to_string(buf=buf)
  1200. self.assertIsNone(retval)
  1201. self.assertEqual(buf.getvalue(), s)
  1202. tm.assert_isinstance(s, compat.string_types)
  1203. # print in right order
  1204. result = biggie.to_string(columns=['B', 'A'], col_space=17,
  1205. float_format='%.5f'.__mod__)
  1206. lines = result.split('\n')
  1207. header = lines[0].strip().split()
  1208. joined = '\n'.join([re.sub('\s+', ' ', x).strip() for x in lines[1:]])
  1209. recons = read_table(StringIO(joined), names=header,
  1210. header=None, sep=' ')
  1211. tm.assert_series_equal(recons['B'], biggie['B'])
  1212. self.assertEqual(recons['A'].count(), biggie['A'].count())
  1213. self.assertTrue((np.abs(recons['A'].dropna() -
  1214. biggie['A'].dropna()) < 0.1).all())
  1215. # expected = ['B', 'A']
  1216. # self.assertEqual(header, expected)
  1217. result = biggie.to_string(columns=['A'], col_space=17)
  1218. header = result.split('\n')[0].strip().split()
  1219. expected = ['A']
  1220. self.assertEqual(header, expected)
  1221. biggie.to_string(columns=['B', 'A'],
  1222. formatters={'A': lambda x: '%.1f' % x})
  1223. biggie.to_string(columns=['B', 'A'], float_format=str)
  1224. biggie.to_string(columns=['B', 'A'], col_space=12,
  1225. float_format=str)
  1226. frame = DataFrame(index=np.arange(200))
  1227. frame.to_string()
  1228. def test_to_string_no_header(self):
  1229. df = DataFrame({'x': [1, 2, 3],
  1230. 'y': [4, 5, 6]})
  1231. df_s = df.to_string(header=False)
  1232. expected = "0 1 4\n1 2 5\n2 3 6"
  1233. assert(df_s == expected)
  1234. def test_to_string_no_index(self):
  1235. df = DataFrame({'x': [1, 2, 3],
  1236. 'y': [4, 5, 6]})
  1237. df_s = df.to_string(index=False)
  1238. expected = " x y\n 1 4\n 2 5\n 3 6"
  1239. assert(df_s == expected)
  1240. def test_to_string_float_formatting(self):
  1241. self.reset_display_options()
  1242. fmt.set_option('display.precision', 6, 'display.column_space',
  1243. 12, 'display.notebook_repr_html', False)
  1244. df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6,
  1245. 1.7e+8, 1.253456, np.pi, -1e6]})
  1246. df_s = df.to_string()
  1247. # Python 2.5 just wants me to be sad. And debian 32-bit
  1248. # sys.version_info[0] == 2 and sys.version_info[1] < 6:
  1249. if _three_digit_exp():
  1250. expected = (' x\n0 0.00000e+000\n1 2.50000e-001\n'
  1251. '2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n'
  1252. '5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n'
  1253. '8 -1.00000e+006')
  1254. else:
  1255. expected = (' x\n0 0.00000e+00\n1 2.50000e-01\n'
  1256. '2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n'
  1257. '5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n'
  1258. '8 -1.00000e+06')
  1259. assert(df_s == expected)
  1260. df = DataFrame({'x': [3234, 0.253]})
  1261. df_s = df.to_string()
  1262. expected = (' x\n'
  1263. '0 3234.000\n'
  1264. '1 0.253')
  1265. assert(df_s == expected)
  1266. self.reset_display_options()
  1267. self.assertEqual(get_option("display.precision"), 7)
  1268. df = DataFrame({'x': [1e9, 0.2512]})
  1269. df_s = df.to_string()
  1270. # Python 2.5 just wants me to be sad. And debian 32-bit
  1271. # sys.version_info[0] == 2 and sys.version_info[1] < 6:
  1272. if _three_digit_exp():
  1273. expected = (' x\n'
  1274. '0 1.000000e+009\n'
  1275. '1 2.512000e-001')
  1276. else:
  1277. expected = (' x\n'
  1278. '0 1.000000e+09\n'
  1279. '1 2.512000e-01')
  1280. assert(df_s == expected)
  1281. def test_to_string_small_float_values(self):
  1282. df = DataFrame({'a': [1.5, 1e-17, -5.5e-7]})
  1283. result = df.to_string()
  1284. # sadness per above
  1285. if '%.4g' % 1.7e8 == '1.7e+008':
  1286. expected = (' a\n'
  1287. '0 1.500000e+000\n'
  1288. '1 1.000000e-017\n'
  1289. '2 -5.500000e-007')
  1290. else:
  1291. expected = (' a\n'
  1292. '0 1.500000e+00\n'
  1293. '1 1.000000e-17\n'
  1294. '2 -5.500000e-07')
  1295. self.assertEqual(result, expected)
  1296. # but not all exactly zero
  1297. df = df * 0
  1298. result = df.to_string()
  1299. expected = (' 0\n'
  1300. '0 0\n'
  1301. '1 0\n'
  1302. '2 -0')
  1303. def test_to_string_float_index(self):
  1304. index = Index([1.5, 2, 3, 4, 5])
  1305. df = DataFrame(lrange(5), index=index)
  1306. result = df.to_string()
  1307. expected = (' 0\n'
  1308. '1.5 0\n'
  1309. '2.0 1\n'
  1310. '3.0 2\n'
  1311. '4.0 3\n'
  1312. '5.0 4')
  1313. self.assertEqual(result, expected)
  1314. def test_to_string_ascii_error(self):
  1315. data = [('0 ',
  1316. u(' .gitignore '),
  1317. u(' 5 '),
  1318. ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80'
  1319. '\xa2\xe2\x80\xa2\xe2\x80\xa2')]
  1320. df = DataFrame(data)
  1321. # it works!
  1322. repr(df)
  1323. def test_to_string_int_formatting(self):
  1324. df = DataFrame({'x': [-15, 20, 25, -35]})
  1325. self.assertTrue(issubclass(df['x'].dtype.type, np.integer))
  1326. output = df.to_string()
  1327. expected = (' x\n'
  1328. '0 -15\n'
  1329. '1 20\n'
  1330. '2 25\n'
  1331. '3 -35')
  1332. self.assertEqual(output, expected)
  1333. def test_to_string_index_formatter(self):
  1334. df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)])
  1335. rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]})
  1336. xp = """\
  1337. 0 1 2 3 4
  1338. a 0 1 2 3 4
  1339. b 5 6 7 8 9
  1340. c 10 11 12 13 14\
  1341. """
  1342. self.assertEqual(rs, xp)
  1343. def test_to_string_left_justify_cols(self):
  1344. self.reset_display_options()
  1345. df = DataFrame({'x': [3234, 0.253]})
  1346. df_s = df.to_string(justify='left')
  1347. expected = (' x \n'
  1348. '0 3234.000\n'
  1349. '1 0.253')
  1350. assert(df_s == expected)
  1351. def test_to_string_format_na(self):
  1352. self.reset_display_options()
  1353. df = DataFrame({'A': [np.nan, -1, -2.1234, 3, 4],
  1354. 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
  1355. result = df.to_string()
  1356. expected = (' A B\n'
  1357. '0 NaN NaN\n'
  1358. '1 -1.0000 foo\n'
  1359. '2 -2.1234 foooo\n'
  1360. '3 3.0000 fooooo\n'
  1361. '4 4.0000 bar')
  1362. self.assertEqual(result, expected)
  1363. df = DataFrame({'A': [np.nan, -1., -2., 3., 4.],
  1364. 'B': [np.nan, 'foo', 'foooo', 'fooooo', 'bar']})
  1365. result = df.to_string()
  1366. expected = (' A B\n'
  1367. '0 NaN NaN\n'
  1368. '1 -1 foo\n'
  1369. '2 -2 foooo\n'
  1370. '3 3 fooooo\n'
  1371. '4 4 bar')
  1372. self.assertEqual(result, expected)
  1373. def test_to_string_line_width(self):
  1374. df = pd.DataFrame(123, lrange(10, 15), lrange(30))
  1375. s = df.to_string(line_width=80)
  1376. self.assertEqual(max(len(l) for l in s.split('\n')), 80)
  1377. def test_show_dimensions(self):
  1378. df = pd.DataFrame(123, lrange(10, 15), lrange(30))
  1379. with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width',
  1380. 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', True):
  1381. self.assertTrue('5 rows' in str(df))
  1382. self.assertTrue('5 rows' in df._repr_html_())
  1383. with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width',
  1384. 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', False):
  1385. self.assertFalse('5 rows' in str(df))
  1386. self.assertFalse('5 rows' in df._repr_html_())
  1387. with option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.width',
  1388. 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'):
  1389. self.assertTrue('5 ro…

Large files files are truncated, but you can click here to view the full file