PageRenderTime 99ms CodeModel.GetById 11ms app.highlight 83ms RepoModel.GetById 0ms app.codeStats 0ms

/bangkokhotel/lib/python2.5/site-packages/whoosh/codec/base.py

https://bitbucket.org/luisrodriguez/bangkokhotel
Python | 856 lines | 828 code | 1 blank | 27 comment | 0 complexity | 3db4c6dc46be67b29fa2d55bd10d7717 MD5 | raw file
  1# Copyright 2011 Matt Chaput. All rights reserved.
  2#
  3# Redistribution and use in source and binary forms, with or without
  4# modification, are permitted provided that the following conditions are met:
  5#
  6#    1. Redistributions of source code must retain the above copyright notice,
  7#       this list of conditions and the following disclaimer.
  8#
  9#    2. Redistributions in binary form must reproduce the above copyright
 10#       notice, this list of conditions and the following disclaimer in the
 11#       documentation and/or other materials provided with the distribution.
 12#
 13# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
 14# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 15# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 16# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 17# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 18# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
 19# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 20# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 21# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
 22# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 23#
 24# The views and conclusions contained in the software and documentation are
 25# those of the authors and should not be interpreted as representing official
 26# policies, either expressed or implied, of Matt Chaput.
 27
 28"""
 29This module contains base classes/interfaces for "codec" objects.
 30"""
 31
 32import random
 33from array import array
 34from struct import Struct, pack
 35from bisect import bisect_right
 36
 37from whoosh.compat import (loads, dumps, b, bytes_type, string_type, xrange,
 38                           array_frombytes, array_tobytes)
 39from whoosh.filedb.compound import CompoundStorage
 40from whoosh.matching import Matcher, ReadTooFar
 41from whoosh.reading import TermInfo
 42from whoosh.spans import Span
 43from whoosh.system import (_INT_SIZE, _FLOAT_SIZE, pack_long, unpack_long,
 44                           IS_LITTLE)
 45from whoosh.util import byte_to_length, length_to_byte
 46
 47
 48try:
 49    from zlib import compress, decompress
 50    can_compress = True
 51except ImportError:
 52    can_compress = False
 53
 54
 55# Base classes
 56
 57class Codec(object):
 58    # Per document value writer
 59    def per_document_writer(self, storage, segment):
 60        raise NotImplementedError
 61
 62    # Inverted index writer
 63    def field_writer(self, storage, segment):
 64        raise NotImplementedError
 65
 66    # Readers
 67
 68    def terms_reader(self, storage, segment):
 69        raise NotImplementedError
 70
 71    def lengths_reader(self, storage, segment):
 72        raise NotImplementedError
 73
 74    def vector_reader(self, storage, segment):
 75        raise NotImplementedError
 76
 77    def stored_fields_reader(self, storage, segment):
 78        raise NotImplementedError
 79
 80    def graph_reader(self, storage, segment):
 81        raise NotImplementedError
 82
 83    # Segments and generations
 84
 85    def new_segment(self, storage, indexname):
 86        raise NotImplementedError
 87
 88    def commit_toc(self, storage, indexname, schema, segments, generation):
 89        raise NotImplementedError
 90
 91
 92# Writer classes
 93
 94class PerDocumentWriter(object):
 95    def start_doc(self, docnum):
 96        raise NotImplementedError
 97
 98    def add_field(self, fieldname, fieldobj, value, length):
 99        raise NotImplementedError
100
101    def add_vector_items(self, fieldname, fieldobj, items):
102        raise NotImplementedError
103
104    def add_vector_matcher(self, fieldname, fieldobj, vmatcher):
105        def readitems():
106            while vmatcher.is_active():
107                text = vmatcher.id()
108                weight = vmatcher.weight()
109                valuestring = vmatcher.value()
110                yield (text, None, weight, valuestring)
111                vmatcher.next()
112        self.add_vector_items(fieldname, fieldobj, readitems())
113
114    def finish_doc(self):
115        pass
116
117    def lengths_reader(self):
118        raise NotImplementedError
119
120
121class FieldWriter(object):
122    def add_postings(self, schema, lengths, items):
123        start_field = self.start_field
124        start_term = self.start_term
125        add = self.add
126        finish_term = self.finish_term
127        finish_field = self.finish_field
128
129        # items = (fieldname, text, docnum, weight, valuestring) ...
130        lastfn = None
131        lasttext = None
132        dfl = lengths.doc_field_length
133        for fieldname, text, docnum, weight, valuestring in items:
134            # Items where docnum is None indicate words that should be added
135            # to the spelling graph
136            if docnum is None and (fieldname != lastfn or text != lasttext):
137                self.add_spell_word(fieldname, text)
138                lastfn = fieldname
139                lasttext = text
140                continue
141
142            # This comparison is so convoluted because Python 3 removed the
143            # ability to compare a string to None
144            if ((lastfn is not None and fieldname < lastfn)
145                or (fieldname == lastfn and lasttext is not None
146                    and text < lasttext)):
147                raise Exception("Postings are out of order: %r:%s .. %r:%s" %
148                                (lastfn, lasttext, fieldname, text))
149            if fieldname != lastfn or text != lasttext:
150                if lasttext is not None:
151                    finish_term()
152                if fieldname != lastfn:
153                    if lastfn is not None:
154                        finish_field()
155                    start_field(fieldname, schema[fieldname])
156                    lastfn = fieldname
157                start_term(text)
158                lasttext = text
159            length = dfl(docnum, fieldname)
160            add(docnum, weight, valuestring, length)
161        if lasttext is not None:
162            finish_term()
163            finish_field()
164
165    def start_field(self, fieldname, fieldobj):
166        raise NotImplementedError
167
168    def start_term(self, text):
169        raise NotImplementedError
170
171    def add(self, docnum, weight, valuestring, length):
172        raise NotImplementedError
173
174    def add_spell_word(self, fieldname, text):
175        raise NotImplementedError
176
177    def finish_term(self):
178        raise NotImplementedError
179
180    def finish_field(self):
181        pass
182
183    def close(self):
184        pass
185
186
187# Reader classes
188
189class TermsReader(object):
190    def __contains__(self, term):
191        raise NotImplementedError
192
193    def terms(self):
194        raise NotImplementedError
195
196    def terms_from(self, fieldname, prefix):
197        raise NotImplementedError
198
199    def items(self):
200        raise NotImplementedError
201
202    def items_from(self, fieldname, prefix):
203        raise NotImplementedError
204
205    def terminfo(self, fieldname, text):
206        raise NotImplementedError
207
208    def frequency(self, fieldname, text):
209        return self.terminfo(fieldname, text).weight()
210
211    def doc_frequency(self, fieldname, text):
212        return self.terminfo(fieldname, text).doc_frequency()
213
214    def graph_reader(self, fieldname, text):
215        raise NotImplementedError
216
217    def matcher(self, fieldname, text, format_, scorer=None):
218        raise NotImplementedError
219
220    def close(self):
221        pass
222
223
224class VectorReader(object):
225    def __contains__(self, key):
226        raise NotImplementedError
227
228    def matcher(self, docnum, fieldname, format_):
229        raise NotImplementedError
230
231
232class LengthsReader(object):
233    def doc_count_all(self):
234        raise NotImplementedError
235
236    def doc_field_length(self, docnum, fieldname, default=0):
237        raise NotImplementedError
238
239    def field_length(self, fieldname):
240        raise NotImplementedError
241
242    def min_field_length(self, fieldname):
243        raise NotImplementedError
244
245    def max_field_length(self, fieldname):
246        raise NotImplementedError
247
248    def close(self):
249        pass
250
251
252class MultiLengths(LengthsReader):
253    def __init__(self, lengths, offset=0):
254        self.lengths = []
255        self.doc_offsets = []
256        self._count = 0
257        for lr in lengths:
258            if lr.doc_count_all():
259                self.lengths.append(lr)
260                self.doc_offsets.append(self._count)
261                self._count += lr.doc_count_all()
262        self.is_closed = False
263
264    def _document_reader(self, docnum):
265        return max(0, bisect_right(self.doc_offsets, docnum) - 1)
266
267    def _reader_and_docnum(self, docnum):
268        lnum = self._document_reader(docnum)
269        offset = self.doc_offsets[lnum]
270        return lnum, docnum - offset
271
272    def doc_count_all(self):
273        return self._count
274
275    def doc_field_length(self, docnum, fieldname, default=0):
276        x, y = self._reader_and_docnum(docnum)
277        return self.lengths[x].doc_field_length(y, fieldname, default=default)
278
279    def min_field_length(self):
280        return min(lr.min_field_length() for lr in self.lengths)
281
282    def max_field_length(self):
283        return max(lr.max_field_length() for lr in self.lengths)
284
285    def close(self):
286        for lr in self.lengths:
287            lr.close()
288        self.is_closed = True
289
290
291class StoredFieldsReader(object):
292    def __iter__(self):
293        raise NotImplementedError
294
295    def __getitem__(self, docnum):
296        raise NotImplementedError
297
298    def cell(self, docnum, fieldname):
299        fielddict = self.get(docnum)
300        return fielddict.get(fieldname)
301
302    def column(self, fieldname):
303        for fielddict in self:
304            yield fielddict.get(fieldname)
305
306    def close(self):
307        pass
308
309
310# File posting matcher middleware
311
312class FilePostingMatcher(Matcher):
313    # Subclasses need to set
314    #   self._term -- (fieldname, text) or None
315    #   self.scorer -- a Scorer object or None
316    #   self.format -- Format object for the posting values
317
318    def __repr__(self):
319        return "%s(%r, %r, %s)" % (self.__class__.__name__, str(self.postfile),
320                                   self.term(), self.is_active())
321
322    def term(self):
323        return self._term
324
325    def items_as(self, astype):
326        decoder = self.format.decoder(astype)
327        for id, value in self.all_items():
328            yield (id, decoder(value))
329
330    def supports(self, astype):
331        return self.format.supports(astype)
332
333    def value_as(self, astype):
334        decoder = self.format.decoder(astype)
335        return decoder(self.value())
336
337    def spans(self):
338        if self.supports("characters"):
339            return [Span(pos, startchar=startchar, endchar=endchar)
340                    for pos, startchar, endchar in self.value_as("characters")]
341        elif self.supports("positions"):
342            return [Span(pos) for pos in self.value_as("positions")]
343        else:
344            raise Exception("Field does not support positions (%r)"
345                            % self._term)
346
347    def supports_block_quality(self):
348        return self.scorer and self.scorer.supports_block_quality()
349
350    def max_quality(self):
351        return self.scorer.max_quality
352
353    def block_quality(self):
354        return self.scorer.block_quality(self)
355
356
357class BlockPostingMatcher(FilePostingMatcher):
358    # Subclasses need to set
359    #   self.block -- BlockBase object for the current block
360    #   self.i -- Numerical index to the current place in the block
361    # And implement
362    #   _read_block()
363    #   _next_block()
364    #   _skip_to_block()
365
366    def id(self):
367        return self.block.ids[self.i]
368
369    def weight(self):
370        weights = self.block.weights
371        if not weights:
372            weights = self.block.read_weights()
373        return weights[self.i]
374
375    def value(self):
376        values = self.block.values
377        if values is None:
378            values = self.block.read_values()
379        return values[self.i]
380
381    def all_ids(self):
382        nextoffset = self.baseoffset
383        for _ in xrange(self.blockcount):
384            block = self._read_block(nextoffset)
385            nextoffset = block.nextoffset
386            ids = block.read_ids()
387            for id in ids:
388                yield id
389
390    def next(self):
391        if self.i == self.block.count - 1:
392            self._next_block()
393            return True
394        else:
395            self.i += 1
396            return False
397
398    def skip_to(self, id):
399        if not self.is_active():
400            raise ReadTooFar
401
402        i = self.i
403        # If we're already in the block with the target ID, do nothing
404        if id <= self.block.ids[i]:
405            return
406
407        # Skip to the block that would contain the target ID
408        if id > self.block.maxid:
409            self._skip_to_block(lambda: id > self.block.maxid)
410        if not self.is_active():
411            return
412
413        # Iterate through the IDs in the block until we find or pass the
414        # target
415        ids = self.block.ids
416        i = self.i
417        while ids[i] < id:
418            i += 1
419            if i == len(ids):
420                self._active = False
421                return
422        self.i = i
423
424    def skip_to_quality(self, minquality):
425        bq = self.block_quality
426        if bq() > minquality:
427            return 0
428        return self._skip_to_block(lambda: bq() <= minquality)
429
430    def block_min_length(self):
431        return self.block.min_length()
432
433    def block_max_length(self):
434        return self.block.max_length()
435
436    def block_max_weight(self):
437        return self.block.max_weight()
438
439    def block_max_wol(self):
440        return self.block.max_wol()
441
442
443# File TermInfo
444
445NO_ID = 0xffffffff
446
447
448class FileTermInfo(TermInfo):
449    # Freq, Doc freq, min len, max length, max weight, unused, min ID, max ID
450    struct = Struct("!fIBBffII")
451
452    def __init__(self, *args, **kwargs):
453        self.postings = None
454        if "postings" in kwargs:
455            self.postings = kwargs["postings"]
456            del kwargs["postings"]
457        TermInfo.__init__(self, *args, **kwargs)
458
459    # filedb specific methods
460
461    def add_block(self, block):
462        self._weight += sum(block.weights)
463        self._df += len(block)
464
465        ml = block.min_length()
466        if self._minlength is None:
467            self._minlength = ml
468        else:
469            self._minlength = min(self._minlength, ml)
470
471        self._maxlength = max(self._maxlength, block.max_length())
472        self._maxweight = max(self._maxweight, block.max_weight())
473        if self._minid is None:
474            self._minid = block.ids[0]
475        self._maxid = block.ids[-1]
476
477    def to_string(self):
478        # Encode the lengths as 0-255 values
479        ml = 0 if self._minlength is None else length_to_byte(self._minlength)
480        xl = length_to_byte(self._maxlength)
481        # Convert None values to the out-of-band NO_ID constant so they can be
482        # stored as unsigned ints
483        mid = NO_ID if self._minid is None else self._minid
484        xid = NO_ID if self._maxid is None else self._maxid
485
486        # Pack the term info into bytes
487        st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight,
488                              0, mid, xid)
489
490        if isinstance(self.postings, tuple):
491            # Postings are inlined - dump them using the pickle protocol
492            isinlined = 1
493            st += dumps(self.postings, -1)[2:-1]
494        else:
495            # Append postings pointer as long to end of term info bytes
496            isinlined = 0
497            # It's possible for a term info to not have a pointer to postings
498            # on disk, in which case postings will be None. Convert a None
499            # value to -1 so it can be stored as a long.
500            p = -1 if self.postings is None else self.postings
501            st += pack_long(p)
502
503        # Prepend byte indicating whether the postings are inlined to the term
504        # info bytes
505        return pack("B", isinlined) + st
506
507    @classmethod
508    def from_string(cls, s):
509        assert isinstance(s, bytes_type)
510
511        if isinstance(s, string_type):
512            hbyte = ord(s[0])  # Python 2.x - str
513        else:
514            hbyte = s[0]  # Python 3 - bytes
515
516        if hbyte < 2:
517            st = cls.struct
518            # Weight, Doc freq, min len, max len, max w, unused, min ID, max ID
519            w, df, ml, xl, xw, _, mid, xid = st.unpack(s[1:st.size + 1])
520            mid = None if mid == NO_ID else mid
521            xid = None if xid == NO_ID else xid
522            # Postings
523            pstr = s[st.size + 1:]
524            if hbyte == 0:
525                p = unpack_long(pstr)[0]
526            else:
527                p = loads(pstr + b("."))
528        else:
529            # Old format was encoded as a variable length pickled tuple
530            v = loads(s + b("."))
531            if len(v) == 1:
532                w = df = 1
533                p = v[0]
534            elif len(v) == 2:
535                w = df = v[1]
536                p = v[0]
537            else:
538                w, p, df = v
539            # Fake values for stats which weren't stored before
540            ml = 1
541            xl = 255
542            xw = 999999999
543            mid = -1
544            xid = -1
545
546        ml = byte_to_length(ml)
547        xl = byte_to_length(xl)
548        obj = cls(w, df, ml, xl, xw, mid, xid)
549        obj.postings = p
550        return obj
551
552    @classmethod
553    def read_weight(cls, dbfile, datapos):
554        return dbfile.get_float(datapos + 1)
555
556    @classmethod
557    def read_doc_freq(cls, dbfile, datapos):
558        return dbfile.get_uint(datapos + 1 + _FLOAT_SIZE)
559
560    @classmethod
561    def read_min_and_max_length(cls, dbfile, datapos):
562        lenpos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE
563        ml = byte_to_length(dbfile.get_byte(lenpos))
564        xl = byte_to_length(dbfile.get_byte(lenpos + 1))
565        return ml, xl
566
567    @classmethod
568    def read_max_weight(cls, dbfile, datapos):
569        weightspos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE + 2
570        return dbfile.get_float(weightspos)
571
572
573# Segment base class
574
575class Segment(object):
576    """Do not instantiate this object directly. It is used by the Index object
577    to hold information about a segment. A list of objects of this class are
578    pickled as part of the TOC file.
579    
580    The TOC file stores a minimal amount of information -- mostly a list of
581    Segment objects. Segments are the real reverse indexes. Having multiple
582    segments allows quick incremental indexing: just create a new segment for
583    the new documents, and have the index overlay the new segment over previous
584    ones for purposes of reading/search. "Optimizing" the index combines the
585    contents of existing segments into one (removing any deleted documents
586    along the way).
587    """
588
589    # These must be valid separate characters in CASE-INSENSTIVE filenames
590    IDCHARS = "0123456789abcdefghijklmnopqrstuvwxyz"
591    # Extension for compound segment files
592    COMPOUND_EXT = ".seg"
593
594    # self.indexname
595    # self.segid
596
597    @classmethod
598    def _random_id(cls, size=12):
599        return "".join(random.choice(cls.IDCHARS) for _ in xrange(size))
600
601    def __repr__(self):
602        return "<%s %s>" % (self.__class__.__name__, getattr(self, "segid", ""))
603
604    def codec(self):
605        raise NotImplementedError
606
607    def segment_id(self):
608        if hasattr(self, "name"):
609            # Old segment class
610            return self.name
611        else:
612            return "%s_%s" % (self.indexname, self.segid)
613
614    def is_compound(self):
615        if not hasattr(self, "compound"):
616            return False
617        return self.compound
618
619    # File convenience methods
620
621    def make_filename(self, ext):
622        return "%s%s" % (self.segment_id(), ext)
623
624    def list_files(self, storage):
625        prefix = "%s." % self.segment_id()
626        return [name for name in storage.list() if name.startswith(prefix)]
627
628    def create_file(self, storage, ext, **kwargs):
629        """Convenience method to create a new file in the given storage named
630        with this segment's ID and the given extension. Any keyword arguments
631        are passed to the storage's create_file method.
632        """
633
634        fname = self.make_filename(ext)
635        return storage.create_file(fname, **kwargs)
636
637    def open_file(self, storage, ext, **kwargs):
638        """Convenience method to open a file in the given storage named with
639        this segment's ID and the given extension. Any keyword arguments are
640        passed to the storage's open_file method.
641        """
642
643        fname = self.make_filename(ext)
644        return storage.open_file(fname, **kwargs)
645
646    def create_compound_file(self, storage):
647        segfiles = self.list_files(storage)
648        assert not any(name.endswith(self.COMPOUND_EXT) for name in segfiles)
649        cfile = self.create_file(storage, self.COMPOUND_EXT)
650        CompoundStorage.assemble(cfile, storage, segfiles)
651        for name in segfiles:
652            storage.delete_file(name)
653
654    def open_compound_file(self, storage):
655        name = self.make_filename(self.COMPOUND_EXT)
656        return CompoundStorage(storage, name)
657
658    # Abstract methods dealing with document counts and deletions
659
660    def doc_count_all(self):
661        """
662        Returns the total number of documents, DELETED OR UNDELETED, in this
663        segment.
664        """
665
666        raise NotImplementedError
667
668    def doc_count(self):
669        """
670        :returns: the number of (undeleted) documents in this segment.
671        """
672
673        raise NotImplementedError
674
675    def has_deletions(self):
676        """
677        :returns: True if any documents in this segment are deleted.
678        """
679
680        raise NotImplementedError
681
682    def deleted_count(self):
683        """
684        :returns: the total number of deleted documents in this segment.
685        """
686
687        raise NotImplementedError
688
689    def delete_document(self, docnum, delete=True):
690        """Deletes the given document number. The document is not actually
691        removed from the index until it is optimized.
692
693        :param docnum: The document number to delete.
694        :param delete: If False, this undeletes a deleted document.
695        """
696
697        raise NotImplementedError
698
699    def is_deleted(self, docnum):
700        """:returns: True if the given document number is deleted."""
701
702        raise NotImplementedError
703
704
705# Posting block format
706
707class BlockBase(object):
708    def __init__(self, postingsize, stringids=False):
709        self.postingsize = postingsize
710        self.stringids = stringids
711        self.ids = [] if stringids else array("I")
712        self.weights = array("f")
713        self.values = None
714
715        self.minlength = None
716        self.maxlength = 0
717        self.maxweight = 0
718
719    def __len__(self):
720        return len(self.ids)
721
722    def __nonzero__(self):
723        return bool(self.ids)
724
725    def min_id(self):
726        if self.ids:
727            return self.ids[0]
728        else:
729            raise IndexError
730
731    def max_id(self):
732        if self.ids:
733            return self.ids[-1]
734        else:
735            raise IndexError
736
737    def min_length(self):
738        return self.minlength
739
740    def max_length(self):
741        return self.maxlength
742
743    def max_weight(self):
744        return self.maxweight
745
746    def add(self, id_, weight, valuestring, length=None):
747        self.ids.append(id_)
748        self.weights.append(weight)
749        if weight > self.maxweight:
750            self.maxweight = weight
751        if valuestring:
752            if self.values is None:
753                self.values = []
754            self.values.append(valuestring)
755        if length:
756            if self.minlength is None or length < self.minlength:
757                self.minlength = length
758            if length > self.maxlength:
759                self.maxlength = length
760
761    def to_file(self, postfile):
762        raise NotImplementedError
763
764
765# Utility functions
766
767def minimize_ids(arry, stringids, compression=0):
768    amax = arry[-1]
769
770    if stringids:
771        typecode = ''
772        string = dumps(arry)
773    else:
774        typecode = arry.typecode
775        if amax <= 255:
776            typecode = "B"
777        elif amax <= 65535:
778            typecode = "H"
779
780        if typecode != arry.typecode:
781            arry = array(typecode, iter(arry))
782        if not IS_LITTLE:
783            arry.byteswap()
784        string = array_tobytes(arry)
785    if compression:
786        string = compress(string, compression)
787    return (typecode, string)
788
789
790def deminimize_ids(typecode, count, string, compression=0):
791    if compression:
792        string = decompress(string)
793    if typecode == '':
794        return loads(string)
795    else:
796        arry = array(typecode)
797        array_frombytes(arry, string)
798        if not IS_LITTLE:
799            arry.byteswap()
800        return arry
801
802
803def minimize_weights(weights, compression=0):
804    if all(w == 1.0 for w in weights):
805        string = b("")
806    else:
807        if not IS_LITTLE:
808            weights.byteswap()
809        string = array_tobytes(weights)
810    if string and compression:
811        string = compress(string, compression)
812    return string
813
814
815def deminimize_weights(count, string, compression=0):
816    if not string:
817        return array("f", (1.0 for _ in xrange(count)))
818    if compression:
819        string = decompress(string)
820    arry = array("f")
821    array_frombytes(arry, string)
822    if not IS_LITTLE:
823        arry.byteswap()
824    return arry
825
826
827def minimize_values(postingsize, values, compression=0):
828    if postingsize < 0:
829        string = dumps(values, -1)[2:]
830    elif postingsize == 0:
831        string = b('')
832    else:
833        string = b('').join(values)
834    if string and compression:
835        string = compress(string, compression)
836    return string
837
838
839def deminimize_values(postingsize, count, string, compression=0):
840    if compression:
841        string = decompress(string)
842
843    if postingsize < 0:
844        return loads(string)
845    elif postingsize == 0:
846        return [None] * count
847    else:
848        return [string[i:i + postingsize] for i
849                in xrange(0, len(string), postingsize)]
850
851
852
853
854
855
856