PageRenderTime 66ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/pandas/core/format.py

http://github.com/pydata/pandas
Python | 2298 lines | 2261 code | 24 blank | 13 comment | 33 complexity | b71d18b5fbb16c4590d0e20f32c22215 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. #coding: utf-8
  2. from __future__ import print_function
  3. # pylint: disable=W0141
  4. import sys
  5. import re
  6. from pandas.core.base import PandasObject
  7. from pandas.core.common import adjoin, isnull, notnull
  8. from pandas.core.index import Index, MultiIndex, _ensure_index
  9. from pandas import compat
  10. from pandas.compat import(StringIO, lzip, range, map, zip, reduce, u,
  11. OrderedDict)
  12. from pandas.util.terminal import get_terminal_size
  13. from pandas.core.config import get_option, set_option, reset_option
  14. import pandas.core.common as com
  15. import pandas.lib as lib
  16. from pandas.tslib import iNaT
  17. import numpy as np
  18. import itertools
  19. import csv
  20. from datetime import time
  21. from pandas.tseries.period import PeriodIndex, DatetimeIndex
  22. docstring_to_string = """
  23. Parameters
  24. ----------
  25. frame : DataFrame
  26. object to render
  27. buf : StringIO-like, optional
  28. buffer to write to
  29. columns : sequence, optional
  30. the subset of columns to write; default None writes all columns
  31. col_space : int, optional
  32. the minimum width of each column
  33. header : bool, optional
  34. whether to print column labels, default True
  35. index : bool, optional
  36. whether to print index (row) labels, default True
  37. na_rep : string, optional
  38. string representation of NAN to use, default 'NaN'
  39. formatters : list or dict of one-parameter functions, optional
  40. formatter functions to apply to columns' elements by position or name,
  41. default None. The result of each function must be a unicode string.
  42. List must be of length equal to the number of columns.
  43. float_format : one-parameter function, optional
  44. formatter function to apply to columns' elements if they are floats,
  45. default None. The result of this function must be a unicode string.
  46. sparsify : bool, optional
  47. Set to False for a DataFrame with a hierarchical index to print every
  48. multiindex key at each row, default True
  49. justify : {'left', 'right'}, default None
  50. Left or right-justify the column labels. If None uses the option from
  51. the print configuration (controlled by set_option), 'right' out
  52. of the box.
  53. index_names : bool, optional
  54. Prints the names of the indexes, default True
  55. force_unicode : bool, default False
  56. Always return a unicode result. Deprecated in v0.10.0 as string
  57. formatting is now rendered to unicode by default.
  58. Returns
  59. -------
  60. formatted : string (or unicode, depending on data and options)"""
  61. class CategoricalFormatter(object):
  62. def __init__(self, categorical, buf=None, length=True,
  63. na_rep='NaN', name=False, footer=True):
  64. self.categorical = categorical
  65. self.buf = buf if buf is not None else StringIO(u(""))
  66. self.name = name
  67. self.na_rep = na_rep
  68. self.length = length
  69. self.footer = footer
  70. def _get_footer(self):
  71. footer = ''
  72. if self.name:
  73. name = com.pprint_thing(self.categorical.name,
  74. escape_chars=('\t', '\r', '\n'))
  75. footer += ('Name: %s' % name if self.categorical.name is not None
  76. else '')
  77. if self.length:
  78. if footer:
  79. footer += ', '
  80. footer += "Length: %d" % len(self.categorical)
  81. levheader = 'Levels (%d): ' % len(self.categorical.levels)
  82. # TODO: should max_line_width respect a setting?
  83. levstring = np.array_repr(self.categorical.levels, max_line_width=60)
  84. indent = ' ' * (levstring.find('[') + len(levheader) + 1)
  85. lines = levstring.split('\n')
  86. levstring = '\n'.join([lines[0]] +
  87. [indent + x.lstrip() for x in lines[1:]])
  88. if footer:
  89. footer += ', '
  90. footer += levheader + levstring
  91. return compat.text_type(footer)
  92. def _get_formatted_values(self):
  93. return format_array(np.asarray(self.categorical), None,
  94. float_format=None,
  95. na_rep=self.na_rep)
  96. def to_string(self):
  97. categorical = self.categorical
  98. if len(categorical) == 0:
  99. if self.footer:
  100. return self._get_footer()
  101. else:
  102. return u('')
  103. fmt_values = self._get_formatted_values()
  104. pad_space = 10
  105. result = ['%s' % i for i in fmt_values]
  106. if self.footer:
  107. footer = self._get_footer()
  108. if footer:
  109. result.append(footer)
  110. return compat.text_type(u('\n').join(result))
  111. class SeriesFormatter(object):
  112. def __init__(self, series, buf=None, header=True, length=True,
  113. na_rep='NaN', name=False, float_format=None, dtype=True):
  114. self.series = series
  115. self.buf = buf if buf is not None else StringIO()
  116. self.name = name
  117. self.na_rep = na_rep
  118. self.length = length
  119. self.header = header
  120. if float_format is None:
  121. float_format = get_option("display.float_format")
  122. self.float_format = float_format
  123. self.dtype = dtype
  124. def _get_footer(self):
  125. footer = u('')
  126. if self.name:
  127. if getattr(self.series.index, 'freq', None):
  128. footer += 'Freq: %s' % self.series.index.freqstr
  129. if footer and self.series.name is not None:
  130. footer += ', '
  131. series_name = com.pprint_thing(self.series.name,
  132. escape_chars=('\t', '\r', '\n'))
  133. footer += ("Name: %s" %
  134. series_name) if self.series.name is not None else ""
  135. if self.length:
  136. if footer:
  137. footer += ', '
  138. footer += 'Length: %d' % len(self.series)
  139. if self.dtype:
  140. name = getattr(self.series.dtype, 'name', None)
  141. if name:
  142. if footer:
  143. footer += ', '
  144. footer += 'dtype: %s' % com.pprint_thing(name)
  145. return compat.text_type(footer)
  146. def _get_formatted_index(self):
  147. index = self.series.index
  148. is_multi = isinstance(index, MultiIndex)
  149. if is_multi:
  150. have_header = any(name for name in index.names)
  151. fmt_index = index.format(names=True)
  152. else:
  153. have_header = index.name is not None
  154. fmt_index = index.format(name=True)
  155. return fmt_index, have_header
  156. def _get_formatted_values(self):
  157. return format_array(self.series.values, None,
  158. float_format=self.float_format,
  159. na_rep=self.na_rep)
  160. def to_string(self):
  161. series = self.series
  162. if len(series) == 0:
  163. return u('')
  164. fmt_index, have_header = self._get_formatted_index()
  165. fmt_values = self._get_formatted_values()
  166. maxlen = max(len(x) for x in fmt_index)
  167. pad_space = min(maxlen, 60)
  168. result = ['%s %s'] * len(fmt_values)
  169. for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)):
  170. idx = k.ljust(pad_space)
  171. result[i] = result[i] % (idx, v)
  172. if self.header and have_header:
  173. result.insert(0, fmt_index[0])
  174. footer = self._get_footer()
  175. if footer:
  176. result.append(footer)
  177. return compat.text_type(u('\n').join(result))
  178. def _strlen_func():
  179. if compat.PY3: # pragma: no cover
  180. _strlen = len
  181. else:
  182. encoding = get_option("display.encoding")
  183. def _strlen(x):
  184. try:
  185. return len(x.decode(encoding))
  186. except UnicodeError:
  187. return len(x)
  188. return _strlen
  189. class TableFormatter(object):
  190. is_truncated = False
  191. show_dimensions = None
  192. @property
  193. def should_show_dimensions(self):
  194. return self.show_dimensions is True or (self.show_dimensions == 'truncate' and self.is_truncated)
  195. def _get_formatter(self, i):
  196. if isinstance(self.formatters, (list, tuple)):
  197. if com.is_integer(i):
  198. return self.formatters[i]
  199. else:
  200. return None
  201. else:
  202. if com.is_integer(i) and i not in self.columns:
  203. i = self.columns[i]
  204. return self.formatters.get(i, None)
  205. class DataFrameFormatter(TableFormatter):
  206. """
  207. Render a DataFrame
  208. self.to_string() : console-friendly tabular output
  209. self.to_html() : html table
  210. self.to_latex() : LaTeX tabular environment table
  211. """
  212. __doc__ = __doc__ if __doc__ else ''
  213. __doc__ += docstring_to_string
  214. def __init__(self, frame, buf=None, columns=None, col_space=None,
  215. header=True, index=True, na_rep='NaN', formatters=None,
  216. justify=None, float_format=None, sparsify=None,
  217. index_names=True, line_width=None, max_rows=None,
  218. max_cols=None, show_dimensions=False, **kwds):
  219. self.frame = frame
  220. self.buf = buf if buf is not None else StringIO()
  221. self.show_index_names = index_names
  222. if sparsify is None:
  223. sparsify = get_option("display.multi_sparse")
  224. self.sparsify = sparsify
  225. self.float_format = float_format
  226. self.formatters = formatters if formatters is not None else {}
  227. self.na_rep = na_rep
  228. self.col_space = col_space
  229. self.header = header
  230. self.index = index
  231. self.line_width = line_width
  232. self.max_rows = max_rows
  233. self.max_cols = max_cols
  234. self.max_rows_displayed = min(max_rows or len(self.frame),
  235. len(self.frame))
  236. self.show_dimensions = show_dimensions
  237. if justify is None:
  238. self.justify = get_option("display.colheader_justify")
  239. else:
  240. self.justify = justify
  241. self.kwds = kwds
  242. if columns is not None:
  243. self.columns = _ensure_index(columns)
  244. self.frame = self.frame[self.columns]
  245. else:
  246. self.columns = frame.columns
  247. self._chk_truncate()
  248. def _chk_truncate(self):
  249. from pandas.tools.merge import concat
  250. truncate_h = self.max_cols and (len(self.columns) > self.max_cols)
  251. truncate_v = self.max_rows and (len(self.frame) > self.max_rows)
  252. # Cut the data to the information actually printed
  253. max_cols = self.max_cols
  254. max_rows = self.max_rows
  255. frame = self.frame
  256. if truncate_h:
  257. if max_cols > 1:
  258. col_num = (max_cols // 2)
  259. frame = concat( (frame.iloc[:,:col_num],frame.iloc[:,-col_num:]),axis=1 )
  260. else:
  261. col_num = max_cols
  262. frame = frame.iloc[:,:max_cols]
  263. self.tr_col_num = col_num
  264. if truncate_v:
  265. if max_rows > 1:
  266. row_num = max_rows // 2
  267. frame = concat( (frame.iloc[:row_num,:],frame.iloc[-row_num:,:]) )
  268. else:
  269. row_num = max_rows
  270. frame = frame.iloc[:max_rows,:]
  271. self.tr_row_num = row_num
  272. self.tr_frame = frame
  273. self.truncate_h = truncate_h
  274. self.truncate_v = truncate_v
  275. self.is_truncated = self.truncate_h or self.truncate_v
  276. def _to_str_columns(self):
  277. """
  278. Render a DataFrame to a list of columns (as lists of strings).
  279. """
  280. _strlen = _strlen_func()
  281. frame = self.tr_frame
  282. # may include levels names also
  283. str_index = self._get_formatted_index(frame)
  284. str_columns = self._get_formatted_column_labels(frame)
  285. if self.header:
  286. stringified = []
  287. col_headers = frame.columns
  288. for i, c in enumerate(frame):
  289. cheader = str_columns[i]
  290. max_colwidth = max(self.col_space or 0,
  291. *(_strlen(x) for x in cheader))
  292. fmt_values = self._format_col(i)
  293. fmt_values = _make_fixed_width(fmt_values, self.justify,
  294. minimum=max_colwidth)
  295. max_len = max(np.max([_strlen(x) for x in fmt_values]),
  296. max_colwidth)
  297. if self.justify == 'left':
  298. cheader = [x.ljust(max_len) for x in cheader]
  299. else:
  300. cheader = [x.rjust(max_len) for x in cheader]
  301. stringified.append(cheader + fmt_values)
  302. else:
  303. stringified = []
  304. for i, c in enumerate(frame):
  305. formatter = self._get_formatter(i)
  306. fmt_values = self._format_col(i)
  307. fmt_values = _make_fixed_width(fmt_values, self.justify)
  308. stringified.append(fmt_values)
  309. strcols = stringified
  310. if self.index:
  311. strcols.insert(0, str_index)
  312. # Add ... to signal truncated
  313. truncate_h = self.truncate_h
  314. truncate_v = self.truncate_v
  315. if truncate_h:
  316. col_num = self.tr_col_num
  317. col_width = len(strcols[col_num][0]) # infer from column header
  318. strcols.insert(col_num + 1, ['...'.center(col_width)] * (len(str_index)))
  319. if truncate_v:
  320. n_header_rows = len(str_index) - len(frame)
  321. row_num = self.tr_row_num
  322. for ix,col in enumerate(strcols):
  323. cwidth = len(strcols[ix][row_num]) # infer from above row
  324. is_dot_col = False
  325. if truncate_h:
  326. is_dot_col = ix == col_num + 1
  327. if cwidth > 3 or is_dot_col:
  328. my_str = '...'
  329. else:
  330. my_str = '..'
  331. if ix == 0:
  332. dot_str = my_str.ljust(cwidth)
  333. elif is_dot_col:
  334. dot_str = my_str.center(cwidth)
  335. else:
  336. dot_str = my_str.rjust(cwidth)
  337. strcols[ix].insert(row_num + n_header_rows, dot_str)
  338. return strcols
  339. def to_string(self):
  340. """
  341. Render a DataFrame to a console-friendly tabular output.
  342. """
  343. frame = self.frame
  344. if len(frame.columns) == 0 or len(frame.index) == 0:
  345. info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
  346. % (type(self.frame).__name__,
  347. com.pprint_thing(frame.columns),
  348. com.pprint_thing(frame.index)))
  349. text = info_line
  350. else:
  351. strcols = self._to_str_columns()
  352. if self.line_width is None:
  353. text = adjoin(1, *strcols)
  354. else:
  355. text = self._join_multiline(*strcols)
  356. self.buf.writelines(text)
  357. if self.should_show_dimensions:
  358. self.buf.write("\n\n[%d rows x %d columns]"
  359. % (len(frame), len(frame.columns)))
  360. def _join_multiline(self, *strcols):
  361. lwidth = self.line_width
  362. adjoin_width = 1
  363. strcols = list(strcols)
  364. if self.index:
  365. idx = strcols.pop(0)
  366. lwidth -= np.array([len(x) for x in idx]).max() + adjoin_width
  367. col_widths = [np.array([len(x) for x in col]).max()
  368. if len(col) > 0 else 0
  369. for col in strcols]
  370. col_bins = _binify(col_widths, lwidth)
  371. nbins = len(col_bins)
  372. if self.max_rows and len(self.frame) > self.max_rows:
  373. nrows = self.max_rows + 1
  374. else:
  375. nrows = len(self.frame)
  376. str_lst = []
  377. st = 0
  378. for i, ed in enumerate(col_bins):
  379. row = strcols[st:ed]
  380. row.insert(0, idx)
  381. if nbins > 1:
  382. if ed <= len(strcols) and i < nbins - 1:
  383. row.append([' \\'] + [' '] * (nrows - 1))
  384. else:
  385. row.append([' '] * nrows)
  386. str_lst.append(adjoin(adjoin_width, *row))
  387. st = ed
  388. return '\n\n'.join(str_lst)
  389. def to_latex(self, column_format=None, longtable=False):
  390. """
  391. Render a DataFrame to a LaTeX tabular/longtable environment output.
  392. """
  393. self.escape = self.kwds.get('escape', True)
  394. #TODO: column_format is not settable in df.to_latex
  395. def get_col_type(dtype):
  396. if issubclass(dtype.type, np.number):
  397. return 'r'
  398. else:
  399. return 'l'
  400. frame = self.frame
  401. if len(frame.columns) == 0 or len(frame.index) == 0:
  402. info_line = (u('Empty %s\nColumns: %s\nIndex: %s')
  403. % (type(self.frame).__name__,
  404. frame.columns, frame.index))
  405. strcols = [[info_line]]
  406. else:
  407. strcols = self._to_str_columns()
  408. if column_format is None:
  409. dtypes = self.frame.dtypes.values
  410. if self.index:
  411. column_format = 'l%s' % ''.join(map(get_col_type, dtypes))
  412. else:
  413. column_format = '%s' % ''.join(map(get_col_type, dtypes))
  414. elif not isinstance(column_format,
  415. compat.string_types): # pragma: no cover
  416. raise AssertionError('column_format must be str or unicode, not %s'
  417. % type(column_format))
  418. def write(buf, frame, column_format, strcols, longtable=False):
  419. if not longtable:
  420. buf.write('\\begin{tabular}{%s}\n' % column_format)
  421. buf.write('\\toprule\n')
  422. else:
  423. buf.write('\\begin{longtable}{%s}\n' % column_format)
  424. buf.write('\\toprule\n')
  425. nlevels = frame.index.nlevels
  426. for i, row in enumerate(zip(*strcols)):
  427. if i == nlevels:
  428. buf.write('\\midrule\n') # End of header
  429. if longtable:
  430. buf.write('\\endhead\n')
  431. buf.write('\\midrule\n')
  432. buf.write('\\multicolumn{3}{r}{{Continued on next '
  433. 'page}} \\\\\n')
  434. buf.write('\midrule\n')
  435. buf.write('\endfoot\n\n')
  436. buf.write('\\bottomrule\n')
  437. buf.write('\\endlastfoot\n')
  438. if self.escape:
  439. crow = [(x.replace('\\', '\\textbackslash') # escape backslashes first
  440. .replace('_', '\\_')
  441. .replace('%', '\\%')
  442. .replace('$', '\\$')
  443. .replace('#', '\\#')
  444. .replace('{', '\\{')
  445. .replace('}', '\\}')
  446. .replace('~', '\\textasciitilde')
  447. .replace('^', '\\textasciicircum')
  448. .replace('&', '\\&') if x else '{}') for x in row]
  449. else:
  450. crow = [x if x else '{}' for x in row]
  451. buf.write(' & '.join(crow))
  452. buf.write(' \\\\\n')
  453. if not longtable:
  454. buf.write('\\bottomrule\n')
  455. buf.write('\\end{tabular}\n')
  456. else:
  457. buf.write('\\end{longtable}\n')
  458. if hasattr(self.buf, 'write'):
  459. write(self.buf, frame, column_format, strcols, longtable)
  460. elif isinstance(self.buf, compat.string_types):
  461. with open(self.buf, 'w') as f:
  462. write(f, frame, column_format, strcols, longtable)
  463. else:
  464. raise TypeError('buf is not a file name and it has no write '
  465. 'method')
  466. def _format_col(self, i):
  467. frame = self.tr_frame
  468. formatter = self._get_formatter(i)
  469. return format_array(
  470. (frame.iloc[:, i]).get_values(),
  471. formatter, float_format=self.float_format, na_rep=self.na_rep,
  472. space=self.col_space
  473. )
  474. def to_html(self, classes=None):
  475. """
  476. Render a DataFrame to a html table.
  477. """
  478. html_renderer = HTMLFormatter(self, classes=classes,
  479. max_rows=self.max_rows,
  480. max_cols=self.max_cols)
  481. if hasattr(self.buf, 'write'):
  482. html_renderer.write_result(self.buf)
  483. elif isinstance(self.buf, compat.string_types):
  484. with open(self.buf, 'w') as f:
  485. html_renderer.write_result(f)
  486. else:
  487. raise TypeError('buf is not a file name and it has no write '
  488. ' method')
  489. def _get_formatted_column_labels(self,frame):
  490. from pandas.core.index import _sparsify
  491. def is_numeric_dtype(dtype):
  492. return issubclass(dtype.type, np.number)
  493. columns = frame.columns
  494. if isinstance(columns, MultiIndex):
  495. fmt_columns = columns.format(sparsify=False, adjoin=False)
  496. fmt_columns = lzip(*fmt_columns)
  497. dtypes = self.frame.dtypes.values
  498. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  499. str_columns = list(zip(*[
  500. [' ' + y if y not in self.formatters and need_leadsp[x]
  501. else y for y in x] for x in fmt_columns]))
  502. if self.sparsify:
  503. str_columns = _sparsify(str_columns)
  504. str_columns = [list(x) for x in zip(*str_columns)]
  505. else:
  506. fmt_columns = columns.format()
  507. dtypes = self.frame.dtypes
  508. need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
  509. str_columns = [[' ' + x
  510. if not self._get_formatter(i) and need_leadsp[x]
  511. else x]
  512. for i, (col, x) in
  513. enumerate(zip(columns, fmt_columns))]
  514. if self.show_index_names and self.has_index_names:
  515. for x in str_columns:
  516. x.append('')
  517. return str_columns
  518. @property
  519. def has_index_names(self):
  520. return _has_names(self.frame.index)
  521. @property
  522. def has_column_names(self):
  523. return _has_names(self.frame.columns)
  524. def _get_formatted_index(self,frame):
  525. # Note: this is only used by to_string(), not by to_html().
  526. index = frame.index
  527. columns = frame.columns
  528. show_index_names = self.show_index_names and self.has_index_names
  529. show_col_names = (self.show_index_names and self.has_column_names)
  530. fmt = self._get_formatter('__index__')
  531. if isinstance(index, MultiIndex):
  532. fmt_index = index.format(sparsify=self.sparsify, adjoin=False,
  533. names=show_index_names,
  534. formatter=fmt)
  535. else:
  536. fmt_index = [index.format(name=show_index_names, formatter=fmt)]
  537. adjoined = adjoin(1, *fmt_index).split('\n')
  538. # empty space for columns
  539. if show_col_names:
  540. col_header = ['%s' % x for x in self._get_column_name_list()]
  541. else:
  542. col_header = [''] * columns.nlevels
  543. if self.header:
  544. return col_header + adjoined
  545. else:
  546. return adjoined
  547. def _get_column_name_list(self):
  548. names = []
  549. columns = self.frame.columns
  550. if isinstance(columns, MultiIndex):
  551. names.extend('' if name is None else name
  552. for name in columns.names)
  553. else:
  554. names.append('' if columns.name is None else columns.name)
  555. return names
  556. class HTMLFormatter(TableFormatter):
  557. indent_delta = 2
  558. def __init__(self, formatter, classes=None, max_rows=None, max_cols=None):
  559. self.fmt = formatter
  560. self.classes = classes
  561. self.frame = self.fmt.frame
  562. self.columns = self.fmt.tr_frame.columns
  563. self.elements = []
  564. self.bold_rows = self.fmt.kwds.get('bold_rows', False)
  565. self.escape = self.fmt.kwds.get('escape', True)
  566. self.max_rows = max_rows or len(self.fmt.frame)
  567. self.max_cols = max_cols or len(self.fmt.columns)
  568. self.show_dimensions = self.fmt.show_dimensions
  569. self.is_truncated = self.max_rows < len(self.fmt.frame) or self.max_cols < len(self.fmt.columns)
  570. def write(self, s, indent=0):
  571. rs = com.pprint_thing(s)
  572. self.elements.append(' ' * indent + rs)
  573. def write_th(self, s, indent=0, tags=None):
  574. if (self.fmt.col_space is not None
  575. and self.fmt.col_space > 0):
  576. tags = (tags or "")
  577. tags += 'style="min-width: %s;"' % self.fmt.col_space
  578. return self._write_cell(s, kind='th', indent=indent, tags=tags)
  579. def write_td(self, s, indent=0, tags=None):
  580. return self._write_cell(s, kind='td', indent=indent, tags=tags)
  581. def _write_cell(self, s, kind='td', indent=0, tags=None):
  582. if tags is not None:
  583. start_tag = '<%s %s>' % (kind, tags)
  584. else:
  585. start_tag = '<%s>' % kind
  586. if self.escape:
  587. # escape & first to prevent double escaping of &
  588. esc = OrderedDict(
  589. [('&', r'&amp;'), ('<', r'&lt;'), ('>', r'&gt;')]
  590. )
  591. else:
  592. esc = {}
  593. rs = com.pprint_thing(s, escape_chars=esc)
  594. self.write(
  595. '%s%s</%s>' % (start_tag, rs, kind), indent)
  596. def write_tr(self, line, indent=0, indent_delta=4, header=False,
  597. align=None, tags=None, nindex_levels=0):
  598. if tags is None:
  599. tags = {}
  600. if align is None:
  601. self.write('<tr>', indent)
  602. else:
  603. self.write('<tr style="text-align: %s;">' % align, indent)
  604. indent += indent_delta
  605. for i, s in enumerate(line):
  606. val_tag = tags.get(i, None)
  607. if header or (self.bold_rows and i < nindex_levels):
  608. self.write_th(s, indent, tags=val_tag)
  609. else:
  610. self.write_td(s, indent, tags=val_tag)
  611. indent -= indent_delta
  612. self.write('</tr>', indent)
  613. def write_result(self, buf):
  614. indent = 0
  615. frame = self.frame
  616. _classes = ['dataframe'] # Default class.
  617. if self.classes is not None:
  618. if isinstance(self.classes, str):
  619. self.classes = self.classes.split()
  620. if not isinstance(self.classes, (list, tuple)):
  621. raise AssertionError(('classes must be list or tuple, '
  622. 'not %s') % type(self.classes))
  623. _classes.extend(self.classes)
  624. self.write('<table border="1" class="%s">' % ' '.join(_classes),
  625. indent)
  626. indent += self.indent_delta
  627. indent = self._write_header(indent)
  628. indent = self._write_body(indent)
  629. self.write('</table>', indent)
  630. if self.should_show_dimensions:
  631. by = chr(215) if compat.PY3 else unichr(215) # ×
  632. self.write(u('<p>%d rows %s %d columns</p>') %
  633. (len(frame), by, len(frame.columns)))
  634. _put_lines(buf, self.elements)
  635. def _write_header(self, indent):
  636. truncate_h = self.fmt.truncate_h
  637. row_levels = self.frame.index.nlevels
  638. if not self.fmt.header:
  639. # write nothing
  640. return indent
  641. def _column_header():
  642. if self.fmt.index:
  643. row = [''] * (self.frame.index.nlevels - 1)
  644. else:
  645. row = []
  646. if isinstance(self.columns, MultiIndex):
  647. if self.fmt.has_column_names and self.fmt.index:
  648. row.append(single_column_table(self.columns.names))
  649. else:
  650. row.append('')
  651. style = "text-align: %s;" % self.fmt.justify
  652. row.extend([single_column_table(c, self.fmt.justify, style) for
  653. c in self.columns])
  654. else:
  655. if self.fmt.index:
  656. row.append(self.columns.name or '')
  657. row.extend(self.columns)
  658. return row
  659. self.write('<thead>', indent)
  660. row = []
  661. indent += self.indent_delta
  662. if isinstance(self.columns, MultiIndex):
  663. template = 'colspan="%d" halign="left"'
  664. if self.fmt.sparsify:
  665. # GH3547
  666. sentinel = com.sentinel_factory()
  667. else:
  668. sentinel = None
  669. levels = self.columns.format(sparsify=sentinel,
  670. adjoin=False, names=False)
  671. level_lengths = _get_level_lengths(levels, sentinel)
  672. inner_lvl = len(level_lengths) - 1
  673. for lnum, (records, values) in enumerate(zip(level_lengths,
  674. levels)):
  675. if truncate_h:
  676. # modify the header lines
  677. ins_col = self.fmt.tr_col_num
  678. if self.fmt.sparsify:
  679. recs_new = {}
  680. # Increment tags after ... col.
  681. for tag,span in list(records.items()):
  682. if tag >= ins_col:
  683. recs_new[tag + 1] = span
  684. elif tag + span > ins_col:
  685. recs_new[tag] = span + 1
  686. if lnum == inner_lvl:
  687. values = values[:ins_col] + (u('...'),) + \
  688. values[ins_col:]
  689. else: # sparse col headers do not receive a ...
  690. values = values[:ins_col] + \
  691. (values[ins_col - 1],) + values[ins_col:]
  692. else:
  693. recs_new[tag] = span
  694. # if ins_col lies between tags, all col headers get ...
  695. if tag + span == ins_col:
  696. recs_new[ins_col] = 1
  697. values = values[:ins_col] + (u('...'),) + \
  698. values[ins_col:]
  699. records = recs_new
  700. inner_lvl = len(level_lengths) - 1
  701. if lnum == inner_lvl:
  702. records[ins_col] = 1
  703. else:
  704. recs_new = {}
  705. for tag,span in list(records.items()):
  706. if tag >= ins_col:
  707. recs_new[tag + 1] = span
  708. else:
  709. recs_new[tag] = span
  710. recs_new[ins_col] = 1
  711. records = recs_new
  712. values = values[:ins_col] + [u('...')] + values[ins_col:]
  713. name = self.columns.names[lnum]
  714. row = [''] * (row_levels - 1) + ['' if name is None
  715. else com.pprint_thing(name)]
  716. tags = {}
  717. j = len(row)
  718. for i, v in enumerate(values):
  719. if i in records:
  720. if records[i] > 1:
  721. tags[j] = template % records[i]
  722. else:
  723. continue
  724. j += 1
  725. row.append(v)
  726. self.write_tr(row, indent, self.indent_delta, tags=tags,
  727. header=True)
  728. else:
  729. col_row = _column_header()
  730. align = self.fmt.justify
  731. if truncate_h:
  732. ins_col = row_levels + self.fmt.tr_col_num
  733. col_row.insert(ins_col, '...')
  734. self.write_tr(col_row, indent, self.indent_delta, header=True,
  735. align=align)
  736. if self.fmt.has_index_names:
  737. row = [
  738. x if x is not None else '' for x in self.frame.index.names
  739. ] + [''] * min(len(self.columns), self.max_cols)
  740. if truncate_h:
  741. ins_col = row_levels + self.fmt.tr_col_num
  742. row.insert(ins_col, '')
  743. self.write_tr(row, indent, self.indent_delta, header=True)
  744. indent -= self.indent_delta
  745. self.write('</thead>', indent)
  746. return indent
  747. def _write_body(self, indent):
  748. self.write('<tbody>', indent)
  749. indent += self.indent_delta
  750. fmt_values = {}
  751. for i in range(min(len(self.columns), self.max_cols)):
  752. fmt_values[i] = self.fmt._format_col(i)
  753. # write values
  754. if self.fmt.index:
  755. if isinstance(self.frame.index, MultiIndex):
  756. self._write_hierarchical_rows(fmt_values, indent)
  757. else:
  758. self._write_regular_rows(fmt_values, indent)
  759. else:
  760. for i in range(len(self.frame)):
  761. row = [fmt_values[j][i] for j in range(len(self.columns))]
  762. self.write_tr(row, indent, self.indent_delta, tags=None)
  763. indent -= self.indent_delta
  764. self.write('</tbody>', indent)
  765. indent -= self.indent_delta
  766. return indent
  767. def _write_regular_rows(self, fmt_values, indent):
  768. truncate_h = self.fmt.truncate_h
  769. truncate_v = self.fmt.truncate_v
  770. ncols = len(self.fmt.tr_frame.columns)
  771. nrows = len(self.fmt.tr_frame)
  772. fmt = self.fmt._get_formatter('__index__')
  773. if fmt is not None:
  774. index_values = self.fmt.tr_frame.index.map(fmt)
  775. else:
  776. index_values = self.fmt.tr_frame.index.format()
  777. for i in range(nrows):
  778. if truncate_v and i == (self.fmt.tr_row_num):
  779. str_sep_row = [ '...' for ele in row ]
  780. self.write_tr(str_sep_row, indent, self.indent_delta, tags=None,
  781. nindex_levels=1)
  782. row = []
  783. row.append(index_values[i])
  784. row.extend(fmt_values[j][i] for j in range(ncols))
  785. if truncate_h:
  786. dot_col_ix = self.fmt.tr_col_num + 1
  787. row.insert(dot_col_ix, '...')
  788. self.write_tr(row, indent, self.indent_delta, tags=None,
  789. nindex_levels=1)
  790. def _write_hierarchical_rows(self, fmt_values, indent):
  791. template = 'rowspan="%d" valign="top"'
  792. truncate_h = self.fmt.truncate_h
  793. truncate_v = self.fmt.truncate_v
  794. frame = self.fmt.tr_frame
  795. ncols = len(frame.columns)
  796. nrows = len(frame)
  797. row_levels = self.frame.index.nlevels
  798. idx_values = frame.index.format(sparsify=False, adjoin=False,
  799. names=False)
  800. idx_values = lzip(*idx_values)
  801. if self.fmt.sparsify:
  802. # GH3547
  803. sentinel = com.sentinel_factory()
  804. levels = frame.index.format(sparsify=sentinel,
  805. adjoin=False, names=False)
  806. level_lengths = _get_level_lengths(levels, sentinel)
  807. inner_lvl = len(level_lengths) - 1
  808. if truncate_v:
  809. # Insert ... row and adjust idx_values and
  810. # level_lengths to take this into account.
  811. ins_row = self.fmt.tr_row_num
  812. for lnum,records in enumerate(level_lengths):
  813. rec_new = {}
  814. for tag,span in list(records.items()):
  815. if tag >= ins_row:
  816. rec_new[tag + 1] = span
  817. elif tag + span > ins_row:
  818. rec_new[tag] = span + 1
  819. dot_row = list(idx_values[ins_row - 1])
  820. dot_row[-1] = u('...')
  821. idx_values.insert(ins_row,tuple(dot_row))
  822. else:
  823. rec_new[tag] = span
  824. # If ins_row lies between tags, all cols idx cols receive ...
  825. if tag + span == ins_row:
  826. rec_new[ins_row] = 1
  827. if lnum == 0:
  828. idx_values.insert(ins_row,tuple([u('...')]*len(level_lengths)))
  829. level_lengths[lnum] = rec_new
  830. level_lengths[inner_lvl][ins_row] = 1
  831. for ix_col in range(len(fmt_values)):
  832. fmt_values[ix_col].insert(ins_row,'...')
  833. nrows += 1
  834. for i in range(nrows):
  835. row = []
  836. tags = {}
  837. sparse_offset = 0
  838. j = 0
  839. for records, v in zip(level_lengths, idx_values[i]):
  840. if i in records:
  841. if records[i] > 1:
  842. tags[j] = template % records[i]
  843. else:
  844. sparse_offset += 1
  845. continue
  846. j += 1
  847. row.append(v)
  848. row.extend(fmt_values[j][i] for j in range(ncols))
  849. if truncate_h:
  850. row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...')
  851. self.write_tr(row, indent, self.indent_delta, tags=tags,
  852. nindex_levels=len(levels) - sparse_offset)
  853. else:
  854. for i in range(len(frame)):
  855. idx_values = list(zip(*frame.index.format(sparsify=False,
  856. adjoin=False,
  857. names=False)))
  858. row = []
  859. row.extend(idx_values[i])
  860. row.extend(fmt_values[j][i] for j in range(ncols))
  861. if truncate_h:
  862. row.insert(row_levels + self.fmt.tr_col_num, '...')
  863. self.write_tr(row, indent, self.indent_delta, tags=None,
  864. nindex_levels=frame.index.nlevels)
  865. def _get_level_lengths(levels, sentinel=''):
  866. from itertools import groupby
  867. def _make_grouper():
  868. record = {'count': 0}
  869. def grouper(x):
  870. if x != sentinel:
  871. record['count'] += 1
  872. return record['count']
  873. return grouper
  874. result = []
  875. for lev in levels:
  876. i = 0
  877. f = _make_grouper()
  878. recs = {}
  879. for key, gpr in groupby(lev, f):
  880. values = list(gpr)
  881. recs[i] = len(values)
  882. i += len(values)
  883. result.append(recs)
  884. return result
  885. class CSVFormatter(object):
  886. def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
  887. cols=None, header=True, index=True, index_label=None,
  888. mode='w', nanRep=None, encoding=None, quoting=None,
  889. line_terminator='\n', chunksize=None, engine=None,
  890. tupleize_cols=False, quotechar='"', date_format=None,
  891. doublequote=True, escapechar=None):
  892. self.engine = engine # remove for 0.13
  893. self.obj = obj
  894. if path_or_buf is None:
  895. path_or_buf = StringIO()
  896. self.path_or_buf = path_or_buf
  897. self.sep = sep
  898. self.na_rep = na_rep
  899. self.float_format = float_format
  900. self.header = header
  901. self.index = index
  902. self.index_label = index_label
  903. self.mode = mode
  904. self.encoding = encoding
  905. if quoting is None:
  906. quoting = csv.QUOTE_MINIMAL
  907. self.quoting = quoting
  908. if quoting == csv.QUOTE_NONE:
  909. # prevents crash in _csv
  910. quotechar = None
  911. self.quotechar = quotechar
  912. self.doublequote = doublequote
  913. self.escapechar = escapechar
  914. self.line_terminator = line_terminator
  915. self.date_format = date_format
  916. # GH3457
  917. if not self.obj.columns.is_unique and engine == 'python':
  918. raise NotImplementedError("columns.is_unique == False not "
  919. "supported with engine='python'")
  920. self.tupleize_cols = tupleize_cols
  921. self.has_mi_columns = isinstance(obj.columns, MultiIndex
  922. ) and not self.tupleize_cols
  923. # validate mi options
  924. if self.has_mi_columns:
  925. if cols is not None:
  926. raise TypeError("cannot specify cols with a MultiIndex on the "
  927. "columns")
  928. if cols is not None:
  929. if isinstance(cols, Index):
  930. cols = cols.to_native_types(na_rep=na_rep,
  931. float_format=float_format,
  932. date_format=date_format)
  933. else:
  934. cols = list(cols)
  935. self.obj = self.obj.loc[:, cols]
  936. # update columns to include possible multiplicity of dupes
  937. # and make sure sure cols is just a list of labels
  938. cols = self.obj.columns
  939. if isinstance(cols, Index):
  940. cols = cols.to_native_types(na_rep=na_rep,
  941. float_format=float_format,
  942. date_format=date_format)
  943. else:
  944. cols = list(cols)
  945. # save it
  946. self.cols = cols
  947. # preallocate data 2d list
  948. self.blocks = self.obj._data.blocks
  949. ncols = sum(b.shape[0] for b in self.blocks)
  950. self.data = [None] * ncols
  951. if chunksize is None:
  952. chunksize = (100000 / (len(self.cols) or 1)) or 1
  953. self.chunksize = int(chunksize)
  954. self.data_index = obj.index
  955. if isinstance(obj.index, PeriodIndex):
  956. self.data_index = obj.index.to_timestamp()
  957. if (isinstance(self.data_index, DatetimeIndex) and
  958. date_format is not None):
  959. self.data_index = Index([x.strftime(date_format)
  960. if notnull(x) else ''
  961. for x in self.data_index])
  962. self.nlevels = getattr(self.data_index, 'nlevels', 1)
  963. if not index:
  964. self.nlevels = 0
  965. # original python implem. of df.to_csv
  966. # invoked by df.to_csv(engine=python)
  967. def _helper_csv(self, writer, na_rep=None, cols=None,
  968. header=True, index=True,
  969. index_label=None, float_format=None, date_format=None):
  970. if cols is None:
  971. cols = self.columns
  972. has_aliases = isinstance(header, (tuple, list, np.ndarray))
  973. if has_aliases or header:
  974. if index:
  975. # should write something for index label
  976. if index_label is not False:
  977. if index_label is None:
  978. if isinstance(self.obj.index, MultiIndex):
  979. index_label = []
  980. for i, name in enumerate(self.obj.index.names):
  981. if name is None:
  982. name = ''
  983. index_label.append(name)
  984. else:
  985. index_label = self.obj.index.name
  986. if index_label is None:
  987. index_label = ['']
  988. else:
  989. index_label = [index_label]
  990. elif not isinstance(index_label,
  991. (list, tuple, np.ndarray)):
  992. # given a string for a DF with Index
  993. index_label = [index_label]
  994. encoded_labels = list(index_label)
  995. else:
  996. encoded_labels = []
  997. if has_aliases:
  998. if len(header) != len(cols):
  999. raise ValueError(('Writing %d cols but got %d aliases'
  1000. % (len(cols), len(header))))
  1001. else:
  1002. write_cols = header
  1003. else:
  1004. write_cols = cols
  1005. encoded_cols = list(write_cols)
  1006. writer.writerow(encoded_labels + encoded_cols)
  1007. else:
  1008. encoded_cols = list(cols)
  1009. writer.writerow(encoded_cols)
  1010. if date_format is None:
  1011. date_formatter = lambda x: lib.Timestamp(x)._repr_base
  1012. else:
  1013. def strftime_with_nulls(x):
  1014. x = lib.Timestamp(x)
  1015. if notnull(x):
  1016. return x.strftime(date_format)
  1017. date_formatter = lambda x: strftime_with_nulls(x)
  1018. data_index = self.obj.index
  1019. if isinstance(self.obj.index, PeriodIndex):
  1020. data_index = self.obj.index.to_timestamp()
  1021. if isinstance(data_index, DatetimeIndex) and date_format is not None:
  1022. data_index = Index([date_formatter(x) for x in data_index])
  1023. values = self.obj.copy()
  1024. values.index = data_index
  1025. values.columns = values.columns.to_native_types(
  1026. na_rep=na_rep, float_format=float_format,
  1027. date_format=date_format)
  1028. values = values[cols]
  1029. series = {}
  1030. for k, v in compat.iteritems(values._series):
  1031. series[k] = v.values
  1032. nlevels = getattr(data_index, 'nlevels', 1)
  1033. for j, idx in enumerate(data_index):
  1034. row_fields = []
  1035. if index:
  1036. if nlevels == 1:
  1037. row_fields = [idx]
  1038. else: # handle MultiIndex
  1039. row_fields = list(idx)
  1040. for i, col in enumerate(cols):
  1041. val = series[col][j]
  1042. if lib.checknull(val):
  1043. val = na_rep
  1044. if float_format is not None and com.is_float(val):
  1045. val = float_format % val
  1046. elif isinstance(val, (np.datetime64, lib.Timestamp)):
  1047. val = date_formatter(val)
  1048. row_fields.append(val)
  1049. writer.writerow(row_fields)
  1050. def save(self):
  1051. # create the writer & save
  1052. if hasattr(self.path_or_buf, 'write'):
  1053. f = self.path_or_buf
  1054. close = False
  1055. else:
  1056. f = com._get_handle(self.path_or_buf, self.mode,
  1057. encoding=self.encoding)
  1058. close = True
  1059. try:
  1060. writer_kwargs = dict(lineterminator=self.line_terminator,
  1061. delimiter=self.sep, quoting=self.quoting,
  1062. doublequote=self.doublequote,
  1063. escapechar=self.escapechar,
  1064. quotechar=self.quotechar)
  1065. if self.encoding is not None:
  1066. writer_kwargs['encoding'] = self.encoding
  1067. self.writer = com.UnicodeWriter(f, **writer_kwargs)
  1068. else:
  1069. self.writer = csv.writer(f, **writer_kwargs)
  1070. if self.engine == 'python':
  1071. # to be removed in 0.13
  1072. self._helper_csv(self.writer, na_rep=self.na_rep,
  1073. float_format=self.float_format,
  1074. cols=self.cols, header=self.header,
  1075. index=self.index,
  1076. index_label=self.index_label,
  1077. date_format=self.date_format)
  1078. else:
  1079. self._save()
  1080. finally:
  1081. if close:
  1082. f.close()
  1083. def _save_header(self):
  1084. writer = self.writer
  1085. obj = self.obj
  1086. index_label = self.index_label
  1087. cols = self.cols
  1088. has_mi_columns = self.has_mi_columns
  1089. header = self.header
  1090. encoded_labels = []
  1091. has_aliases = isinstance(header, (tuple, list, np.ndarray))
  1092. if not (has_aliases or self.header):
  1093. return
  1094. if has_aliases:
  1095. if len(header) != len(cols):
  1096. raise ValueError(('Writing %d cols but got %d aliases'
  1097. % (len(cols), len(header))))
  1098. else:
  1099. write_cols = header
  1100. else:
  1101. write_cols = cols
  1102. if self.index:
  1103. # should write something for index label
  1104. if index_label is not False:
  1105. if index_label is None:
  1106. if isinstance(obj.index, MultiIndex):
  1107. index_label = []
  1108. for i, name in enumerate(obj.index.names):
  1109. if name is None:
  1110. name = ''
  1111. index_label.append(name)
  1112. else:
  1113. index_label = obj.index.name
  1114. if index_label is None:
  1115. index_label = ['']
  1116. else:
  1117. index_label = [index_label]
  1118. elif not isinstance(index_label, (list, tuple, np.ndarray)):
  1119. # given a string for a DF with Index
  1120. index_label = [index_label]
  1121. encoded_labels = list(index_label)
  1122. else:
  1123. encoded_labels = []
  1124. if not has_mi_columns:
  1125. encoded_labels += list(write_cols)
  1126. # write out the mi
  1127. if has_mi_columns:
  1128. columns = obj.columns
  1129. # write out the names for each level, then ALL of the values for
  1130. # each level
  1131. for i in range(columns.nlevels):
  1132. # we need at least 1 index column to write our col names
  1133. col_line = []
  1134. if self.index:
  1135. # name is the first column
  1136. col_line.append(columns.names[i])
  1137. if isinstance(index_label, list) and len(index_label) > 1:
  1138. col_line.extend([''] * (len(index_label) - 1))
  1139. col_line.extend(columns.get_level_values(i))
  1140. writer.writerow(col_line)
  1141. # add blanks for the columns, so that we
  1142. # have consistent seps
  1143. encoded_labels.extend([''] * len(columns))
  1144. # write out the index label line
  1145. writer.writerow(encoded_labels)
  1146. def _save(self):
  1147. self._save_header()
  1148. nrows = len(self.data_index)
  1149. # write in chunksize bites
  1150. chunksize = self.chunksize
  1151. chunks = int(nrows / chunksize) + 1
  1152. for i in range(chunks):
  1153. start_i = i * chunksize
  1154. end_i = min((i + 1) * chunksize, nrows)
  1155. if start_i >= end_i:
  1156. break
  1157. self._save_chunk(start_i, end_i)
  1158. def _save_chunk(self, start_i, end_i):
  1159. data_index = self.data_index
  1160. # create the data for a chunk
  1161. slicer = slice(start_i, end_i)
  1162. for i in range(len(self.blocks)):
  1163. b = self.blocks[i]
  1164. d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
  1165. float_format=self.float_format,
  1166. date_format=self.date_format)
  1167. for col_loc, col in zip(b.mgr_locs, d):
  1168. # self.data is a preallocated list
  1169. self.data[col_loc] = col
  1170. ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
  1171. float_format=self.float_format,
  1172. date_format=self.date_format)
  1173. lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
  1174. # from collections import namedtuple
  1175. # ExcelCell = namedtuple("ExcelCell",
  1176. # 'row, col, val, style, mergestart, mergeend')
  1177. class ExcelCell(object):
  1178. __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend')
  1179. __slots__ = __fields__
  1180. def __init__(self, row, col, val,
  1181. style=None, mergestart=None, mergeend=None):
  1182. self.row = row
  1183. self.col = col
  1184. self.val = val
  1185. self.style = style
  1186. self.mergestart = mergestart
  1187. self.mergeend = mergeend
  1188. header_style = {"font": {"bold": True},
  1189. "borders": {"top": "thin",
  1190. "right": "thin",
  1191. "bottom": "thin",
  1192. "left": "thin"},
  1193. "alignment": {"horizontal": "center", "vertical": "top"}}
  1194. class ExcelFormatter(object):
  1195. """
  1196. Class for formatting a DataFrame to a list of ExcelCells,
  1197. Parameters
  1198. ----------
  1199. df : dataframe
  1200. na_rep: na representation
  1201. float_format : string, default None
  1202. Format string for floating point numbers
  1203. cols : sequence, optional
  1204. Columns to write
  1205. header : boolean or list of string, default True
  1206. Write out column names. If a list of string is given it is
  1207. assumed to be aliases for the column names
  1208. index : boolean, default True
  1209. output row names (index)
  1210. index_label : string or sequence, default None
  1211. Column label for index column(s) if desired. If None is given, and
  1212. `header` and `index` are True, then the index names are used. A
  1213. sequence should be given if the DataFrame uses MultiIndex.
  1214. merge_cells : boolean, default False
  1215. Format MultiIndex and Hierarchical Rows as merged cells.
  1216. inf_rep : string, default `'inf'`
  1217. representation for np.inf values (which aren't representable in Excel)
  1218. A `'-'` sign will be added in front of -inf.
  1219. """
  1220. def __init__(self, df, na_rep='', float_format=None, cols=None,
  1221. header=True, index=True, index_label=None, merge_cells=False,
  1222. inf_rep='inf'):
  1223. self.df = df
  1224. self.rowcounter = 0
  1225. self.na_rep = na_rep
  1226. self.columns = cols
  1227. if cols is None:
  1228. self.columns = df.columns
  1229. self.float_format = float_format
  1230. self.index = index
  1231. self.index_label = index_label
  1232. self.header = header
  1233. self.merge_cells = merge_cells
  1234. self.inf_rep = inf_rep
  1235. def _format_value(self, val):
  1236. if lib.checknull(val):
  1237. val = self.na_rep
  1238. elif com.is_float(val):
  1239. if np.isposinf(val):
  1240. val = '-%s' % self.inf_rep
  1241. elif np.isneginf(val):
  1242. val = self.inf_rep
  1243. elif self.float_format is not None:
  1244. val = float(self.float_format % val)
  1245. return val
  1246. def _format_header_mi(self):
  1247. has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
  1248. if not(has_aliases or self.header):
  1249. return
  1250. columns = self.columns
  1251. level_strs = columns.format(sparsify=True, adjoin=False, names=False)
  1252. level_lengths = _get_level_lengths(level_strs)
  1253. coloffset = 0
  1254. lnum = 0
  1255. if self.index and isinstance(self.df.index, MultiIndex):
  1256. coloffset = len(self.df.index[0]) - 1
  1257. if self.merge_cells:
  1258. # Format multi-index as a merged cells.
  1259. for lnum in range(len(level_lengths)):
  1260. name = columns.names[lnum]
  1261. yield ExcelCell(lnum, coloffset, name, header_style)
  1262. for lnum, (spans, levels, labels) in enumerate(zip(level_lengths,
  1263. columns.levels,
  1264. columns.labels)
  1265. ):
  1266. values = levels.take(labels)
  1267. for i in spans:
  1268. if spans[i] > 1:
  1269. yield ExcelCell(lnum,
  1270. coloffset + i + 1,
  1271. values[i],
  1272. header_style,
  1273. lnum,
  1274. coloffset + i + spans[i])
  1275. else:
  1276. yield ExcelCell(lnum,
  1277. coloffset + i + 1,
  1278. values[i],
  1279. header_style)
  1280. else:
  1281. # Format in legacy format with dots to indicate levels.
  1282. for i, values in enumerate(zip(*level_strs)):
  1283. v = ".".join(map(com.pprint_thing, values))
  1284. yield ExcelCell(lnum, coloffset + i + 1, v, header_style)
  1285. self.rowcounter = lnum
  1286. def _format_header_regular(self):
  1287. has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
  1288. if has_aliases or self.header:
  1289. coloffset = 0
  1290. if self.index:
  1291. coloffset = 1
  1292. if isinstance(self.df.index, MultiIndex):
  1293. coloffset = len(self.df.index[0])
  1294. colnames = self.columns
  1295. if has_aliases:
  1296. if len(self.header) != len(self.columns):
  1297. raise ValueError(('Writing %d cols but got %d aliases'
  1298. % (len(self.columns), len(self.header))))
  1299. else:
  1300. colnames = self.header
  1301. for colindex, colname in enumerate(colnames):
  1302. yield ExcelCell(self.rowcounter, colindex + coloffset, colname,
  1303. header_style)
  1304. def _format_header(self):
  1305. if isinstance(self.columns, MultiIndex):
  1306. gen = self._format_header_mi()
  1307. else:
  1308. gen = self._format_header_regular()
  1309. gen2 = ()
  1310. if self.df.index.names:
  1311. row = [x if x is not None else ''
  1312. for x in self.df.index.names] + [''] * len(self.columns)
  1313. if reduce(lambda x, y: x and y, map(lambda x: x != '', row)):
  1314. gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style)
  1315. for colindex, val in enumerate(row))
  1316. self.rowcounter += 1
  1317. return itertools.chain(gen, gen2)
  1318. def _format_body(self):
  1319. if isinstance(self.df.index, MultiIndex):
  1320. return self._format_hierarchical_rows()
  1321. else:
  1322. return self._format_regular_rows()
  1323. def _format_regular_rows(self):
  1324. has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
  1325. if has_aliases or self.header:
  1326. self.rowcounter += 1
  1327. coloffset = 0
  1328. # output index and index_label?
  1329. if self.index:
  1330. # chek aliases
  1331. # if list only take first as this is not a MultiIndex
  1332. if self.index_label and isinstance(self.index_label,
  1333. (list, tuple, np.ndarray)):
  1334. index_label = self.index_label[0]
  1335. # if string good to go
  1336. elif self.index_label and isinstance(self.index_label, str):
  1337. index_label = self.index_label
  1338. else:
  1339. index_label = self.df.index.names[0]
  1340. if index_label and self.header is not False:
  1341. if self.merge_cells:
  1342. yield ExcelCell(self.rowcounter,
  1343. 0,
  1344. index_label,
  1345. header_style)
  1346. self.rowcounter += 1
  1347. else:
  1348. yield ExcelCell(self.rowcounter - 1,
  1349. 0,
  1350. index_label,
  1351. header_style)
  1352. # write index_values
  1353. index_values = self.df.index
  1354. if isinstance(self.df.index, PeriodIndex):
  1355. index_values = self.df.index.to_timestamp()
  1356. coloffset = 1
  1357. for idx, idxval in enumerate(index_values):
  1358. yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
  1359. # Get a frame that will account for any duplicates in the column names.
  1360. col_mapped_frame = self.df.loc[:, self.columns]
  1361. # Write the body of the frame data series by series.
  1362. for colidx in range(len(self.columns)):
  1363. series = col_mapped_frame.iloc[:, colidx]
  1364. for i, val in enumerate(series):
  1365. yield ExcelCell(self.rowcounter + i, colidx + coloffset, val)
  1366. def _format_hierarchical_rows(self):
  1367. has_aliases = isinstance(self.header, (tuple, list, np.ndarray))
  1368. if has_aliases or self.header:
  1369. self.rowcounter += 1
  1370. gcolidx = 0
  1371. if self.index:
  1372. index_labels = self.df.index.names
  1373. # check for aliases
  1374. if self.index_label and isinstance(self.index_label,
  1375. (list, tuple, np.ndarray)):
  1376. index_labels = self.index_label
  1377. # if index labels are not empty go ahead and dump
  1378. if (any(x is not None for x in index_labels)
  1379. and self.header is not False):
  1380. if not self.merge_cells:
  1381. self.rowcounter -= 1
  1382. for cidx, name in enumerate(index_labels):
  1383. yield ExcelCell(self.rowcounter,
  1384. cidx,
  1385. name,
  1386. header_style)
  1387. self.rowcounter += 1
  1388. if self.merge_cells:
  1389. # Format hierarchical rows as merged cells.
  1390. level_strs = self.df.index.format(sparsify=True, adjoin=False,
  1391. names=False)
  1392. level_lengths = _get_level_lengths(level_strs)
  1393. for spans, levels, labels in zip(level_lengths,
  1394. self.df.index.levels,
  1395. self.df.index.labels):
  1396. values = levels.take(labels)
  1397. for i in spans:
  1398. if spans[i] > 1:
  1399. yield ExcelCell(self.rowcounter + i,
  1400. gcolidx,
  1401. values[i],
  1402. header_style,
  1403. self.rowcounter + i + spans[i] - 1,
  1404. gcolidx)
  1405. else:
  1406. yield ExcelCell(self.rowcounter + i,
  1407. gcolidx,
  1408. values[i],
  1409. header_style)
  1410. gcolidx += 1
  1411. else:
  1412. # Format hierarchical rows with non-merged values.
  1413. for indexcolvals in zip(*self.df.index):
  1414. for idx, indexcolval in enumerate(indexcolvals):
  1415. yield ExcelCell(self.rowcounter + idx,
  1416. gcolidx,
  1417. indexcolval,
  1418. header_style)
  1419. gcolidx += 1
  1420. # Get a frame that will account for any duplicates in the column names.
  1421. col_mapped_frame = self.df.loc[:, self.columns]
  1422. # Write the body of the frame data series by series.
  1423. for colidx in range(len(self.columns)):
  1424. series = col_mapped_frame.iloc[:, colidx]
  1425. for i, val in enumerate(series):
  1426. yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)
  1427. def get_formatted_cells(self):
  1428. for cell in itertools.chain(self._format_header(),
  1429. self._format_body()):
  1430. cell.val = self._format_value(cell.val)
  1431. yield cell
  1432. #----------------------------------------------------------------------
  1433. # Array formatters
  1434. def format_array(values, formatter, float_format=None, na_rep='NaN',
  1435. digits=None, space=None, justify='right'):
  1436. if com.is_float_dtype(values.dtype):
  1437. fmt_klass = FloatArrayFormatter
  1438. elif com.is_integer_dtype(values.dtype):
  1439. fmt_klass = IntArrayFormatter
  1440. elif com.is_datetime64_dtype(values.dtype):
  1441. fmt_klass = Datetime64Formatter
  1442. elif com.is_timedelta64_dtype(values.dtype):
  1443. fmt_klass = Timedelta64Formatter
  1444. else:
  1445. fmt_klass = GenericArrayFormatter
  1446. if space is None:
  1447. space = get_option("display.column_space")
  1448. if float_format is None:
  1449. float_format = get_option("display.float_format")
  1450. if digits is None:
  1451. digits = get_option("display.precision")
  1452. fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
  1453. float_format=float_format,
  1454. formatter=formatter, space=space,
  1455. justify=justify)
  1456. return fmt_obj.get_result()
  1457. class GenericArrayFormatter(object):
  1458. def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
  1459. space=12, float_format=None, justify='right'):
  1460. self.values = values
  1461. self.digits = digits
  1462. self.na_rep = na_rep
  1463. self.space = space
  1464. self.formatter = formatter
  1465. self.float_format = float_format
  1466. self.justify = justify
  1467. def get_result(self):
  1468. fmt_values = self._format_strings()
  1469. return _make_fixed_width(fmt_values, self.justify)
  1470. def _format_strings(self):
  1471. if self.float_format is None:
  1472. float_format = get_option("display.float_format")
  1473. if float_format is None:
  1474. fmt_str = '%% .%dg' % get_option("display.precision")
  1475. float_format = lambda x: fmt_str % x
  1476. else:
  1477. float_format = self.float_format
  1478. formatter = self.formatter if self.formatter is not None else \
  1479. (lambda x: com.pprint_thing(x, escape_chars=('\t', '\r', '\n')))
  1480. def _format(x):
  1481. if self.na_rep is not None and lib.checknull(x):
  1482. if x is None:
  1483. return 'None'
  1484. return self.na_rep
  1485. elif isinstance(x, PandasObject):
  1486. return '%s' % x
  1487. else:
  1488. # object dtype
  1489. return '%s' % formatter(x)
  1490. vals = self.values
  1491. is_float = lib.map_infer(vals, com.is_float) & notnull(vals)
  1492. leading_space = is_float.any()
  1493. fmt_values = []
  1494. for i, v in enumerate(vals):
  1495. if not is_float[i] and leading_space:
  1496. fmt_values.append(' %s' % _format(v))
  1497. elif is_float[i]:
  1498. fmt_values.append(float_format(v))
  1499. else:
  1500. fmt_values.append(' %s' % _format(v))
  1501. return fmt_values
  1502. class FloatArrayFormatter(GenericArrayFormatter):
  1503. """
  1504. """
  1505. def __init__(self, *args, **kwargs):
  1506. GenericArrayFormatter.__init__(self, *args, **kwargs)
  1507. if self.float_format is not None and self.formatter is None:
  1508. self.formatter = self.float_format
  1509. def _format_with(self, fmt_str):
  1510. def _val(x, threshold):
  1511. if notnull(x):
  1512. if (threshold is None or
  1513. abs(x) > get_option("display.chop_threshold")):
  1514. return fmt_str % x
  1515. else:
  1516. if fmt_str.endswith("e"): # engineering format
  1517. return "0"
  1518. else:
  1519. return fmt_str % 0
  1520. else:
  1521. return self.na_rep
  1522. threshold = get_option("display.chop_threshold")
  1523. fmt_values = [_val(x, threshold) for x in self.values]
  1524. return _trim_zeros(fmt_values, self.na_rep)
  1525. def _format_strings(self):
  1526. if self.formatter is not None:
  1527. fmt_values = [self.formatter(x) for x in self.values]
  1528. else:
  1529. fmt_str = '%% .%df' % (self.digits - 1)
  1530. fmt_values = self._format_with(fmt_str)
  1531. if len(fmt_values) > 0:
  1532. maxlen = max(len(x) for x in fmt_values)
  1533. else:
  1534. maxlen = 0
  1535. too_long = maxlen > self.digits + 5
  1536. abs_vals = np.abs(self.values)
  1537. # this is pretty arbitrary for now
  1538. has_large_values = (abs_vals > 1e8).any()
  1539. has_small_values = ((abs_vals < 10 ** (-self.digits)) &
  1540. (abs_vals > 0)).any()
  1541. if too_long and has_large_values:
  1542. fmt_str = '%% .%de' % (self.digits - 1)
  1543. fmt_values = self._format_with(fmt_str)
  1544. elif has_small_values:
  1545. fmt_str = '%% .%de' % (self.digits - 1)
  1546. fmt_values = self._format_with(fmt_str)
  1547. return fmt_values
  1548. class IntArrayFormatter(GenericArrayFormatter):
  1549. def _format_strings(self):
  1550. formatter = self.formatter or (lambda x: '% d' % x)
  1551. fmt_values = [formatter(x) for x in self.values]
  1552. return fmt_values
  1553. class Datetime64Formatter(GenericArrayFormatter):
  1554. def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
  1555. super(Datetime64Formatter, self).__init__(values, **kwargs)
  1556. self.nat_rep = nat_rep
  1557. self.date_format = date_format
  1558. def _format_strings(self):
  1559. formatter = self.formatter or _get_format_datetime64_from_values(
  1560. self.values,
  1561. nat_rep=self.nat_rep,
  1562. date_format=self.date_format)
  1563. fmt_values = [formatter(x) for x in self.values]
  1564. return fmt_values
  1565. def _format_datetime64(x, tz=None, nat_rep='NaT'):
  1566. if x is None or lib.checknull(x):
  1567. return nat_rep
  1568. if tz is not None or not isinstance(x, lib.Timestamp):
  1569. x = lib.Timestamp(x, tz=tz)
  1570. return str(x)
  1571. def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
  1572. if x is None or lib.checknull(x):
  1573. return nat_rep
  1574. if not isinstance(x, lib.Timestamp):
  1575. x = lib.Timestamp(x)
  1576. if date_format:
  1577. return x.strftime(date_format)
  1578. else:
  1579. return x._date_repr
  1580. def _is_dates_only(values):
  1581. for d in values:
  1582. if isinstance(d, np.datetime64):
  1583. d = lib.Timestamp(d)
  1584. if d is not None and not lib.checknull(d) and d._has_time_component():
  1585. return False
  1586. return True
  1587. def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
  1588. if is_dates_only:
  1589. return lambda x, tz=None: _format_datetime64_dateonly(x,
  1590. nat_rep=nat_rep,
  1591. date_format=date_format)
  1592. else:
  1593. return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
  1594. def _get_format_datetime64_from_values(values,
  1595. nat_rep='NaT',
  1596. date_format=None):
  1597. is_dates_only = _is_dates_only(values)
  1598. return _get_format_datetime64(is_dates_only=is_dates_only,
  1599. nat_rep=nat_rep,
  1600. date_format=date_format)
  1601. class Timedelta64Formatter(GenericArrayFormatter):
  1602. def _format_strings(self):
  1603. formatter = self.formatter or _get_format_timedelta64(self.values)
  1604. fmt_values = [formatter(x) for x in self.values]
  1605. return fmt_values
  1606. def _get_format_timedelta64(values):
  1607. values_int = values.astype(np.int64)
  1608. consider_values = values_int != iNaT
  1609. one_day_in_nanos = (86400 * 1e9)
  1610. even_days = np.logical_and(consider_values, values_int % one_day_in_nanos != 0).sum() == 0
  1611. all_sub_day = np.logical_and(consider_values, np.abs(values_int) >= one_day_in_nanos).sum() == 0
  1612. format_short = even_days or all_sub_day
  1613. format = "short" if format_short else "long"
  1614. def impl(x):
  1615. if x is None or lib.checknull(x):
  1616. return 'NaT'
  1617. elif format_short and com.is_integer(x) and x.view('int64') == 0:
  1618. return "0 days" if even_days else "00:00:00"
  1619. else:
  1620. return lib.repr_timedelta64(x, format=format)
  1621. return impl
  1622. def _make_fixed_width(strings, justify='right', minimum=None):
  1623. if len(strings) == 0 or justify == 'all':
  1624. return strings
  1625. _strlen = _strlen_func()
  1626. max_len = np.max([_strlen(x) for x in strings])
  1627. if minimum is not None:
  1628. max_len = max(minimum, max_len)
  1629. conf_max = get_option("display.max_colwidth")
  1630. if conf_max is not None and max_len > conf_max:
  1631. max_len = conf_max
  1632. if justify == 'left':
  1633. justfunc = lambda self, x: self.ljust(x)
  1634. else:
  1635. justfunc = lambda self, x: self.rjust(x)
  1636. def just(x):
  1637. eff_len = max_len
  1638. if conf_max is not None:
  1639. if (conf_max > 3) & (_strlen(x) > max_len):
  1640. x = x[:eff_len - 3] + '...'
  1641. return justfunc(x, eff_len)
  1642. result = [just(x) for x in strings]
  1643. return result
  1644. def _trim_zeros(str_floats, na_rep='NaN'):
  1645. """
  1646. Trims zeros and decimal points.
  1647. """
  1648. trimmed = str_floats
  1649. def _cond(values):
  1650. non_na = [x for x in values if x != na_rep]
  1651. return (len(non_na) > 0 and all([x.endswith('0') for x in non_na]) and
  1652. not(any([('e' in x) or ('E' in x) for x in non_na])))
  1653. while _cond(trimmed):
  1654. trimmed = [x[:-1] if x != na_rep else x for x in trimmed]
  1655. # trim decimal points
  1656. return [x[:-1] if x.endswith('.') and x != na_rep else x for x in trimmed]
  1657. def single_column_table(column, align=None, style=None):
  1658. table = '<table'
  1659. if align is not None:
  1660. table += (' align="%s"' % align)
  1661. if style is not None:
  1662. table += (' style="%s"' % style)
  1663. table += '><tbody>'
  1664. for i in column:
  1665. table += ('<tr><td>%s</td></tr>' % str(i))
  1666. table += '</tbody></table>'
  1667. return table
  1668. def single_row_table(row): # pragma: no cover
  1669. table = '<table><tbody><tr>'
  1670. for i in row:
  1671. table += ('<td>%s</td>' % str(i))
  1672. table += '</tr></tbody></table>'
  1673. return table
  1674. def _has_names(index):
  1675. if isinstance(index, MultiIndex):
  1676. return any([x is not None for x in index.names])
  1677. else:
  1678. return index.name is not None
  1679. #------------------------------------------------------------------------------
  1680. # Global formatting options
  1681. _initial_defencoding = None
  1682. def detect_console_encoding():
  1683. """
  1684. Try to find the most capable encoding supported by the console.
  1685. slighly modified from the way IPython handles the same issue.
  1686. """
  1687. import locale
  1688. global _initial_defencoding
  1689. encoding = None
  1690. try:
  1691. encoding = sys.stdout.encoding or sys.stdin.encoding
  1692. except AttributeError:
  1693. pass
  1694. # try again for something better
  1695. if not encoding or 'ascii' in encoding.lower():
  1696. try:
  1697. encoding = locale.getpreferredencoding()
  1698. except Exception:
  1699. pass
  1700. # when all else fails. this will usually be "ascii"
  1701. if not encoding or 'ascii' in encoding.lower():
  1702. encoding = sys.getdefaultencoding()
  1703. # GH3360, save the reported defencoding at import time
  1704. # MPL backends may change it. Make available for debugging.
  1705. if not _initial_defencoding:
  1706. _initial_defencoding = sys.getdefaultencoding()
  1707. return encoding
  1708. def get_console_size():
  1709. """Return console size as tuple = (width, height).
  1710. Returns (None,None) in non-interactive session.
  1711. """
  1712. display_width = get_option('display.width')
  1713. # deprecated.
  1714. display_height = get_option('display.height', silent=True)
  1715. # Consider
  1716. # interactive shell terminal, can detect term size
  1717. # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term
  1718. # size non-interactive script, should disregard term size
  1719. # in addition
  1720. # width,height have default values, but setting to 'None' signals
  1721. # should use Auto-Detection, But only in interactive shell-terminal.
  1722. # Simple. yeah.
  1723. if com.in_interactive_session():
  1724. if com.in_ipython_frontend():
  1725. # sane defaults for interactive non-shell terminal
  1726. # match default for width,height in config_init
  1727. from pandas.core.config import get_default_val
  1728. terminal_width = get_default_val('display.width')
  1729. terminal_height = get_default_val('display.height')
  1730. else:
  1731. # pure terminal
  1732. terminal_width, terminal_height = get_terminal_size()
  1733. else:
  1734. terminal_width, terminal_height = None, None
  1735. # Note if the User sets width/Height to None (auto-detection)
  1736. # and we're in a script (non-inter), this will return (None,None)
  1737. # caller needs to deal.
  1738. return (display_width or terminal_width, display_height or terminal_height)
  1739. class EngFormatter(object):
  1740. """
  1741. Formats float values according to engineering format.
  1742. Based on matplotlib.ticker.EngFormatter
  1743. """
  1744. # The SI engineering prefixes
  1745. ENG_PREFIXES = {
  1746. -24: "y",
  1747. -21: "z",
  1748. -18: "a",
  1749. -15: "f",
  1750. -12: "p",
  1751. -9: "n",
  1752. -6: "u",
  1753. -3: "m",
  1754. 0: "",
  1755. 3: "k",
  1756. 6: "M",
  1757. 9: "G",
  1758. 12: "T",
  1759. 15: "P",
  1760. 18: "E",
  1761. 21: "Z",
  1762. 24: "Y"
  1763. }
  1764. def __init__(self, accuracy=None, use_eng_prefix=False):
  1765. self.accuracy = accuracy
  1766. self.use_eng_prefix = use_eng_prefix
  1767. def __call__(self, num):
  1768. """ Formats a number in engineering notation, appending a letter
  1769. representing the power of 1000 of the original number. Some examples:
  1770. >>> format_eng(0) # for self.accuracy = 0
  1771. ' 0'
  1772. >>> format_eng(1000000) # for self.accuracy = 1,
  1773. # self.use_eng_prefix = True
  1774. ' 1.0M'
  1775. >>> format_eng("-1e-6") # for self.accuracy = 2
  1776. # self.use_eng_prefix = False
  1777. '-1.00E-06'
  1778. @param num: the value to represent
  1779. @type num: either a numeric value or a string that can be converted to
  1780. a numeric value (as per decimal.Decimal constructor)
  1781. @return: engineering formatted string
  1782. """
  1783. import decimal
  1784. import math
  1785. dnum = decimal.Decimal(str(num))
  1786. sign = 1
  1787. if dnum < 0: # pragma: no cover
  1788. sign = -1
  1789. dnum = -dnum
  1790. if dnum != 0:
  1791. pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
  1792. else:
  1793. pow10 = decimal.Decimal(0)
  1794. pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
  1795. pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
  1796. int_pow10 = int(pow10)
  1797. if self.use_eng_prefix:
  1798. prefix = self.ENG_PREFIXES[int_pow10]
  1799. else:
  1800. if int_pow10 < 0:
  1801. prefix = 'E-%02d' % (-int_pow10)
  1802. else:
  1803. prefix = 'E+%02d' % int_pow10
  1804. mant = sign * dnum / (10 ** pow10)
  1805. if self.accuracy is None: # pragma: no cover
  1806. format_str = u("% g%s")
  1807. else:
  1808. format_str = (u("%% .%if%%s") % self.accuracy)
  1809. formatted = format_str % (mant, prefix)
  1810. return formatted # .strip()
  1811. def set_eng_float_format(accuracy=3, use_eng_prefix=False):
  1812. """
  1813. Alter default behavior on how float is formatted in DataFrame.
  1814. Format float in engineering format. By accuracy, we mean the number of
  1815. decimal digits after the floating point.
  1816. See also EngFormatter.
  1817. """
  1818. set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
  1819. set_option("display.column_space", max(12, accuracy + 9))
  1820. def _put_lines(buf, lines):
  1821. if any(isinstance(x, compat.text_type) for x in lines):
  1822. lines = [compat.text_type(x) for x in lines]
  1823. buf.write('\n'.join(lines))
  1824. def _binify(cols, line_width):
  1825. adjoin_width = 1
  1826. bins = []
  1827. curr_width = 0
  1828. i_last_column = len(cols) - 1
  1829. for i, w in enumerate(cols):
  1830. w_adjoined = w + adjoin_width
  1831. curr_width += w_adjoined
  1832. if i_last_column == i:
  1833. wrap = curr_width + 1 > line_width and i > 0
  1834. else:
  1835. wrap = curr_width + 2 > line_width and i > 0
  1836. if wrap:
  1837. bins.append(i)
  1838. curr_width = w_adjoined
  1839. bins.append(len(cols))
  1840. return bins
  1841. if __name__ == '__main__':
  1842. arr = np.array([746.03, 0.00, 5620.00, 1592.36])
  1843. # arr = np.array([11111111.1, 1.55])
  1844. # arr = [314200.0034, 1.4125678]
  1845. arr = np.array([327763.3119, 345040.9076, 364460.9915, 398226.8688,
  1846. 383800.5172, 433442.9262, 539415.0568, 568590.4108,
  1847. 599502.4276, 620921.8593, 620898.5294, 552427.1093,
  1848. 555221.2193, 519639.7059, 388175.7, 379199.5854,
  1849. 614898.25, 504833.3333, 560600., 941214.2857,
  1850. 1134250., 1219550., 855736.85, 1042615.4286,
  1851. 722621.3043, 698167.1818, 803750.])
  1852. fmt = FloatArrayFormatter(arr, digits=7)
  1853. print(fmt.get_result())