/petl/test/transform/test_sorts.py
http://github.com/alimanfoo/petl · Python · 525 lines · 404 code · 106 blank · 15 comment · 8 complexity · a5deef1978d912c7773f9c17f424e6f5 MD5 · raw file
- from __future__ import absolute_import, print_function, division
- import os
- import gc
- import logging
- from datetime import datetime
- import sys
- from petl.compat import next
- from petl.test.helpers import ieq, eq_
- from petl.util import nrows
- from petl.transform.basics import cat
- from petl.transform.sorts import sort, mergesort, issorted
- logger = logging.getLogger(__name__)
- debug = logger.debug
- def test_sort_1():
- table = (('foo', 'bar'),
- ('C', '2'),
- ('A', '9'),
- ('A', '6'),
- ('F', '1'),
- ('D', '10'))
- result = sort(table, 'foo')
- expectation = (('foo', 'bar'),
- ('A', '9'),
- ('A', '6'),
- ('C', '2'),
- ('D', '10'),
- ('F', '1'))
- ieq(expectation, result)
- def test_sort_2():
- table = (('foo', 'bar'),
- ('C', '2'),
- ('A', '9'),
- ('A', '6'),
- ('F', '1'),
- ('D', '10'))
- result = sort(table, key=('foo', 'bar'))
- expectation = (('foo', 'bar'),
- ('A', '6'),
- ('A', '9'),
- ('C', '2'),
- ('D', '10'),
- ('F', '1'))
- ieq(expectation, result)
- result = sort(table) # default is lexical sort
- expectation = (('foo', 'bar'),
- ('A', '6'),
- ('A', '9'),
- ('C', '2'),
- ('D', '10'),
- ('F', '1'))
- ieq(expectation, result)
- def test_sort_3():
- table = (('foo', 'bar'),
- ('C', '2'),
- ('A', '9'),
- ('A', '6'),
- ('F', '1'),
- ('D', '10'))
- result = sort(table, 'bar')
- expectation = (('foo', 'bar'),
- ('F', '1'),
- ('D', '10'),
- ('C', '2'),
- ('A', '6'),
- ('A', '9'))
- ieq(expectation, result)
- def test_sort_4():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- result = sort(table, 'bar')
- expectation = (('foo', 'bar'),
- ('F', 1),
- ('C', 2),
- ('A', 6),
- ('A', 9),
- ('D', 10))
- ieq(expectation, result)
- def test_sort_5():
- table = (('foo', 'bar'),
- (2.3, 2),
- (1.2, 9),
- (2.3, 6),
- (3.2, 1),
- (1.2, 10))
- expectation = (('foo', 'bar'),
- (1.2, 9),
- (1.2, 10),
- (2.3, 2),
- (2.3, 6),
- (3.2, 1))
- # can use either field names or indices (from 1) to specify sort key
- result = sort(table, key=('foo', 'bar'))
- ieq(expectation, result)
- result = sort(table, key=(0, 1))
- ieq(expectation, result)
- result = sort(table, key=('foo', 1))
- ieq(expectation, result)
- result = sort(table, key=(0, 'bar'))
- ieq(expectation, result)
- def test_sort_6():
- table = (('foo', 'bar'),
- (2.3, 2),
- (1.2, 9),
- (2.3, 6),
- (3.2, 1),
- (1.2, 10))
- expectation = (('foo', 'bar'),
- (3.2, 1),
- (2.3, 6),
- (2.3, 2),
- (1.2, 10),
- (1.2, 9))
- result = sort(table, key=('foo', 'bar'), reverse=True)
- ieq(expectation, result)
- def test_sort_buffered():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- # test sort forwards
- expectation = (('foo', 'bar'),
- ('F', 1),
- ('C', 2),
- ('A', 6),
- ('A', 9),
- ('D', 10))
- result = sort(table, 'bar')
- ieq(expectation, result)
- result = sort(table, 'bar', buffersize=2)
- ieq(expectation, result)
- # sort in reverse
- expectation = (('foo', 'bar'),
- ('D', 10),
- ('A', 9),
- ('A', 6),
- ('C', 2),
- ('F', 1))
- result = sort(table, 'bar', reverse=True)
- ieq(expectation, result)
- result = sort(table, 'bar', reverse=True, buffersize=2)
- ieq(expectation, result)
- # no key
- expectation = (('foo', 'bar'),
- ('F', 1),
- ('D', 10),
- ('C', 2),
- ('A', 9),
- ('A', 6))
- result = sort(table, reverse=True)
- ieq(expectation, result)
- result = sort(table, reverse=True, buffersize=2)
- ieq(expectation, result)
- def test_sort_buffered_tempdir():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- # test sort forwards
- expectation = (('foo', 'bar'),
- ('F', 1),
- ('C', 2),
- ('A', 6),
- ('A', 9),
- ('D', 10))
- result = sort(table, 'bar')
- ieq(expectation, result)
- tempdir = 'tmp'
- if not os.path.exists(tempdir):
- os.mkdir(tempdir)
- result = sort(table, 'bar', buffersize=2, tempdir=tempdir)
- ieq(expectation, result)
- def test_sort_buffered_independent():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- expectation = (('foo', 'bar'),
- ('F', 1),
- ('C', 2),
- ('A', 6),
- ('A', 9),
- ('D', 10))
- result = sort(table, 'bar', buffersize=4)
- nrows(result) # cause data to be cached
- # check that two row iterators are independent, i.e., consuming rows
- # from one does not affect the other
- it1 = iter(result)
- it2 = iter(result)
- eq_(expectation[0], next(it1))
- eq_(expectation[1], next(it1))
- eq_(expectation[0], next(it2))
- eq_(expectation[1], next(it2))
- eq_(expectation[2], next(it2))
- eq_(expectation[2], next(it1))
- def _get_names(l):
- return [x.name for x in l]
- def test_sort_buffered_cleanup():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- result = sort(table, 'bar', buffersize=2)
- debug('initially filecache should be empty')
- eq_(None, result._filecache)
- debug('pull rows through, should populate file cache')
- eq_(5, nrows(result))
- eq_(3, len(result._filecache))
- debug('check all files exist')
- filenames = _get_names(result._filecache)
- for fn in filenames:
- assert os.path.exists(fn), fn
- debug('delete object and garbage collect')
- del result
- gc.collect()
- debug('check all files have been deleted')
- for fn in filenames:
- assert not os.path.exists(fn), fn
- import platform
- if platform.python_implementation() == 'PyPy':
- print('SKIP sort cleanup test (PyPy)', file=sys.stderr)
- else:
- def test_sort_buffered_cleanup_open_iterator():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', 6),
- ('F', 1),
- ('D', 10))
- # check if cleanup is robust against open iterators
- result = sort(table, 'bar', buffersize=2)
- debug('pull rows through, should populate file cache')
- eq_(5, nrows(result))
- eq_(3, len(result._filecache))
- debug('check all files exist')
- filenames = _get_names(result._filecache)
- for fn in filenames:
- assert os.path.exists(fn), fn
- debug(filenames)
- debug('open an iterator')
- it = iter(result)
- next(it)
- next(it)
- debug('delete objects and garbage collect')
- del result
- del it
- gc.collect()
- for fn in filenames:
- assert not os.path.exists(fn), fn
- def test_sort_empty():
- table = (('foo', 'bar'),)
- expect = (('foo', 'bar'),)
- actual = sort(table)
- ieq(expect, actual)
- def test_sort_none():
- table = (('foo', 'bar'),
- ('C', 2),
- ('A', 9),
- ('A', None),
- ('F', 1),
- ('D', 10))
- result = sort(table, 'bar')
- print(list(result))
- expectation = (('foo', 'bar'),
- ('A', None),
- ('F', 1),
- ('C', 2),
- ('A', 9),
- ('D', 10))
- ieq(expectation, result)
- dt = datetime.now().replace
- table = (('foo', 'bar'),
- ('C', dt(hour=5)),
- ('A', dt(hour=1)),
- ('A', None),
- ('F', dt(hour=9)),
- ('D', dt(hour=17)))
- result = sort(table, 'bar')
- expectation = (('foo', 'bar'),
- ('A', None),
- ('A', dt(hour=1)),
- ('C', dt(hour=5)),
- ('F', dt(hour=9)),
- ('D', dt(hour=17)))
- ieq(expectation, result)
- # TODO test sort with native comparison
- def test_mergesort_1():
- table1 = (('foo', 'bar'),
- ('A', 6),
- ('C', 2),
- ('D', 10),
- ('A', 9),
- ('F', 1))
- table2 = (('foo', 'bar'),
- ('B', 3),
- ('D', 10),
- ('A', 10),
- ('F', 4))
- # should be same as concatenate then sort (but more efficient, esp. when
- # presorted)
- expect = sort(cat(table1, table2))
- actual = mergesort(table1, table2)
- ieq(expect, actual)
- ieq(expect, actual)
- actual = mergesort(sort(table1), sort(table2), presorted=True)
- ieq(expect, actual)
- ieq(expect, actual)
- def test_mergesort_2():
- table1 = (('foo', 'bar'),
- ('A', 9),
- ('C', 2),
- ('D', 10),
- ('A', 6),
- ('F', 1))
- table2 = (('foo', 'baz'),
- ('B', 3),
- ('D', 10),
- ('A', 10),
- ('F', 4))
- # should be same as concatenate then sort (but more efficient, esp. when
- # presorted)
- expect = sort(cat(table1, table2), key='foo')
- actual = mergesort(table1, table2, key='foo')
- ieq(expect, actual)
- ieq(expect, actual)
- actual = mergesort(sort(table1, key='foo'), sort(table2, key='foo'),
- key='foo', presorted=True)
- ieq(expect, actual)
- ieq(expect, actual)
- def test_mergesort_3():
- table1 = (('foo', 'bar'),
- ('A', 9),
- ('C', 2),
- ('D', 10),
- ('A', 6),
- ('F', 1))
- table2 = (('foo', 'baz'),
- ('B', 3),
- ('D', 10),
- ('A', 10),
- ('F', 4))
- # should be same as concatenate then sort (but more efficient, esp. when
- # presorted)
- expect = sort(cat(table1, table2), key='foo', reverse=True)
- actual = mergesort(table1, table2, key='foo', reverse=True)
- ieq(expect, actual)
- ieq(expect, actual)
- actual = mergesort(sort(table1, key='foo', reverse=True),
- sort(table2, key='foo', reverse=True),
- key='foo', reverse=True, presorted=True)
- ieq(expect, actual)
- ieq(expect, actual)
- def test_mergesort_4():
- table1 = (('foo', 'bar', 'baz'),
- (1, 'A', True),
- (2, 'B', None),
- (4, 'C', True))
- table2 = (('bar', 'baz', 'quux'),
- ('A', True, 42.0),
- ('B', False, 79.3),
- ('C', False, 12.4))
- expect = sort(cat(table1, table2), key='bar')
- actual = mergesort(table1, table2, key='bar')
- ieq(expect, actual)
- ieq(expect, actual)
- def test_mergesort_empty():
- table1 = (('foo', 'bar'),
- ('A', 9),
- ('C', 2),
- ('D', 10),
- ('F', 1))
- table2 = (('foo', 'bar'),)
- expect = table1
- actual = mergesort(table1, table2, key='foo')
- ieq(expect, actual)
- ieq(expect, actual)
- def test_issorted():
- table1 = (('foo', 'bar', 'baz'),
- ('a', 1, True),
- ('b', 3, True),
- ('b', 2))
- assert issorted(table1, key='foo')
- assert not issorted(table1, key='foo', reverse=True)
- assert not issorted(table1, key='foo', strict=True)
- table2 = (('foo', 'bar', 'baz'),
- ('b', 2, True),
- ('a', 1, True),
- ('b', 3))
- assert not issorted(table2, key='foo')
- table3 = (('foo', 'bar', 'baz'),
- ('a', 1, True),
- ('b', 2, True),
- ('b', 3))
- assert issorted(table3, key=('foo', 'bar'))
- assert issorted(table3)
- table4 = (('foo', 'bar', 'baz'),
- ('a', 1, True),
- ('b', 3, True),
- ('b', 2))
- assert not issorted(table4, key=('foo', 'bar'))
- assert not issorted(table4)
- table5 = (('foo', 'bar', 'baz'),
- ('b', 3, True),
- ('b', 2),
- ('a', 1, True))
- assert not issorted(table5, key='foo')
- assert issorted(table5, key='foo', reverse=True)
- assert not issorted(table5, key='foo', reverse=True, strict=True)