/bangkokhotel/lib/python2.5/site-packages/whoosh/codec/base.py
Python | 856 lines | 828 code | 1 blank | 27 comment | 0 complexity | 3db4c6dc46be67b29fa2d55bd10d7717 MD5 | raw file
1# Copyright 2011 Matt Chaput. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are met:
5#
6# 1. Redistributions of source code must retain the above copyright notice,
7# this list of conditions and the following disclaimer.
8#
9# 2. Redistributions in binary form must reproduce the above copyright
10# notice, this list of conditions and the following disclaimer in the
11# documentation and/or other materials provided with the distribution.
12#
13# THIS SOFTWARE IS PROVIDED BY MATT CHAPUT ``AS IS'' AND ANY EXPRESS OR
14# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
15# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
16# EVENT SHALL MATT CHAPUT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
17# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
18# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
19# OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
21# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
22# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23#
24# The views and conclusions contained in the software and documentation are
25# those of the authors and should not be interpreted as representing official
26# policies, either expressed or implied, of Matt Chaput.
27
28"""
29This module contains base classes/interfaces for "codec" objects.
30"""
31
32import random
33from array import array
34from struct import Struct, pack
35from bisect import bisect_right
36
37from whoosh.compat import (loads, dumps, b, bytes_type, string_type, xrange,
38 array_frombytes, array_tobytes)
39from whoosh.filedb.compound import CompoundStorage
40from whoosh.matching import Matcher, ReadTooFar
41from whoosh.reading import TermInfo
42from whoosh.spans import Span
43from whoosh.system import (_INT_SIZE, _FLOAT_SIZE, pack_long, unpack_long,
44 IS_LITTLE)
45from whoosh.util import byte_to_length, length_to_byte
46
47
48try:
49 from zlib import compress, decompress
50 can_compress = True
51except ImportError:
52 can_compress = False
53
54
55# Base classes
56
57class Codec(object):
58 # Per document value writer
59 def per_document_writer(self, storage, segment):
60 raise NotImplementedError
61
62 # Inverted index writer
63 def field_writer(self, storage, segment):
64 raise NotImplementedError
65
66 # Readers
67
68 def terms_reader(self, storage, segment):
69 raise NotImplementedError
70
71 def lengths_reader(self, storage, segment):
72 raise NotImplementedError
73
74 def vector_reader(self, storage, segment):
75 raise NotImplementedError
76
77 def stored_fields_reader(self, storage, segment):
78 raise NotImplementedError
79
80 def graph_reader(self, storage, segment):
81 raise NotImplementedError
82
83 # Segments and generations
84
85 def new_segment(self, storage, indexname):
86 raise NotImplementedError
87
88 def commit_toc(self, storage, indexname, schema, segments, generation):
89 raise NotImplementedError
90
91
92# Writer classes
93
94class PerDocumentWriter(object):
95 def start_doc(self, docnum):
96 raise NotImplementedError
97
98 def add_field(self, fieldname, fieldobj, value, length):
99 raise NotImplementedError
100
101 def add_vector_items(self, fieldname, fieldobj, items):
102 raise NotImplementedError
103
104 def add_vector_matcher(self, fieldname, fieldobj, vmatcher):
105 def readitems():
106 while vmatcher.is_active():
107 text = vmatcher.id()
108 weight = vmatcher.weight()
109 valuestring = vmatcher.value()
110 yield (text, None, weight, valuestring)
111 vmatcher.next()
112 self.add_vector_items(fieldname, fieldobj, readitems())
113
114 def finish_doc(self):
115 pass
116
117 def lengths_reader(self):
118 raise NotImplementedError
119
120
121class FieldWriter(object):
122 def add_postings(self, schema, lengths, items):
123 start_field = self.start_field
124 start_term = self.start_term
125 add = self.add
126 finish_term = self.finish_term
127 finish_field = self.finish_field
128
129 # items = (fieldname, text, docnum, weight, valuestring) ...
130 lastfn = None
131 lasttext = None
132 dfl = lengths.doc_field_length
133 for fieldname, text, docnum, weight, valuestring in items:
134 # Items where docnum is None indicate words that should be added
135 # to the spelling graph
136 if docnum is None and (fieldname != lastfn or text != lasttext):
137 self.add_spell_word(fieldname, text)
138 lastfn = fieldname
139 lasttext = text
140 continue
141
142 # This comparison is so convoluted because Python 3 removed the
143 # ability to compare a string to None
144 if ((lastfn is not None and fieldname < lastfn)
145 or (fieldname == lastfn and lasttext is not None
146 and text < lasttext)):
147 raise Exception("Postings are out of order: %r:%s .. %r:%s" %
148 (lastfn, lasttext, fieldname, text))
149 if fieldname != lastfn or text != lasttext:
150 if lasttext is not None:
151 finish_term()
152 if fieldname != lastfn:
153 if lastfn is not None:
154 finish_field()
155 start_field(fieldname, schema[fieldname])
156 lastfn = fieldname
157 start_term(text)
158 lasttext = text
159 length = dfl(docnum, fieldname)
160 add(docnum, weight, valuestring, length)
161 if lasttext is not None:
162 finish_term()
163 finish_field()
164
165 def start_field(self, fieldname, fieldobj):
166 raise NotImplementedError
167
168 def start_term(self, text):
169 raise NotImplementedError
170
171 def add(self, docnum, weight, valuestring, length):
172 raise NotImplementedError
173
174 def add_spell_word(self, fieldname, text):
175 raise NotImplementedError
176
177 def finish_term(self):
178 raise NotImplementedError
179
180 def finish_field(self):
181 pass
182
183 def close(self):
184 pass
185
186
187# Reader classes
188
189class TermsReader(object):
190 def __contains__(self, term):
191 raise NotImplementedError
192
193 def terms(self):
194 raise NotImplementedError
195
196 def terms_from(self, fieldname, prefix):
197 raise NotImplementedError
198
199 def items(self):
200 raise NotImplementedError
201
202 def items_from(self, fieldname, prefix):
203 raise NotImplementedError
204
205 def terminfo(self, fieldname, text):
206 raise NotImplementedError
207
208 def frequency(self, fieldname, text):
209 return self.terminfo(fieldname, text).weight()
210
211 def doc_frequency(self, fieldname, text):
212 return self.terminfo(fieldname, text).doc_frequency()
213
214 def graph_reader(self, fieldname, text):
215 raise NotImplementedError
216
217 def matcher(self, fieldname, text, format_, scorer=None):
218 raise NotImplementedError
219
220 def close(self):
221 pass
222
223
224class VectorReader(object):
225 def __contains__(self, key):
226 raise NotImplementedError
227
228 def matcher(self, docnum, fieldname, format_):
229 raise NotImplementedError
230
231
232class LengthsReader(object):
233 def doc_count_all(self):
234 raise NotImplementedError
235
236 def doc_field_length(self, docnum, fieldname, default=0):
237 raise NotImplementedError
238
239 def field_length(self, fieldname):
240 raise NotImplementedError
241
242 def min_field_length(self, fieldname):
243 raise NotImplementedError
244
245 def max_field_length(self, fieldname):
246 raise NotImplementedError
247
248 def close(self):
249 pass
250
251
252class MultiLengths(LengthsReader):
253 def __init__(self, lengths, offset=0):
254 self.lengths = []
255 self.doc_offsets = []
256 self._count = 0
257 for lr in lengths:
258 if lr.doc_count_all():
259 self.lengths.append(lr)
260 self.doc_offsets.append(self._count)
261 self._count += lr.doc_count_all()
262 self.is_closed = False
263
264 def _document_reader(self, docnum):
265 return max(0, bisect_right(self.doc_offsets, docnum) - 1)
266
267 def _reader_and_docnum(self, docnum):
268 lnum = self._document_reader(docnum)
269 offset = self.doc_offsets[lnum]
270 return lnum, docnum - offset
271
272 def doc_count_all(self):
273 return self._count
274
275 def doc_field_length(self, docnum, fieldname, default=0):
276 x, y = self._reader_and_docnum(docnum)
277 return self.lengths[x].doc_field_length(y, fieldname, default=default)
278
279 def min_field_length(self):
280 return min(lr.min_field_length() for lr in self.lengths)
281
282 def max_field_length(self):
283 return max(lr.max_field_length() for lr in self.lengths)
284
285 def close(self):
286 for lr in self.lengths:
287 lr.close()
288 self.is_closed = True
289
290
291class StoredFieldsReader(object):
292 def __iter__(self):
293 raise NotImplementedError
294
295 def __getitem__(self, docnum):
296 raise NotImplementedError
297
298 def cell(self, docnum, fieldname):
299 fielddict = self.get(docnum)
300 return fielddict.get(fieldname)
301
302 def column(self, fieldname):
303 for fielddict in self:
304 yield fielddict.get(fieldname)
305
306 def close(self):
307 pass
308
309
310# File posting matcher middleware
311
312class FilePostingMatcher(Matcher):
313 # Subclasses need to set
314 # self._term -- (fieldname, text) or None
315 # self.scorer -- a Scorer object or None
316 # self.format -- Format object for the posting values
317
318 def __repr__(self):
319 return "%s(%r, %r, %s)" % (self.__class__.__name__, str(self.postfile),
320 self.term(), self.is_active())
321
322 def term(self):
323 return self._term
324
325 def items_as(self, astype):
326 decoder = self.format.decoder(astype)
327 for id, value in self.all_items():
328 yield (id, decoder(value))
329
330 def supports(self, astype):
331 return self.format.supports(astype)
332
333 def value_as(self, astype):
334 decoder = self.format.decoder(astype)
335 return decoder(self.value())
336
337 def spans(self):
338 if self.supports("characters"):
339 return [Span(pos, startchar=startchar, endchar=endchar)
340 for pos, startchar, endchar in self.value_as("characters")]
341 elif self.supports("positions"):
342 return [Span(pos) for pos in self.value_as("positions")]
343 else:
344 raise Exception("Field does not support positions (%r)"
345 % self._term)
346
347 def supports_block_quality(self):
348 return self.scorer and self.scorer.supports_block_quality()
349
350 def max_quality(self):
351 return self.scorer.max_quality
352
353 def block_quality(self):
354 return self.scorer.block_quality(self)
355
356
357class BlockPostingMatcher(FilePostingMatcher):
358 # Subclasses need to set
359 # self.block -- BlockBase object for the current block
360 # self.i -- Numerical index to the current place in the block
361 # And implement
362 # _read_block()
363 # _next_block()
364 # _skip_to_block()
365
366 def id(self):
367 return self.block.ids[self.i]
368
369 def weight(self):
370 weights = self.block.weights
371 if not weights:
372 weights = self.block.read_weights()
373 return weights[self.i]
374
375 def value(self):
376 values = self.block.values
377 if values is None:
378 values = self.block.read_values()
379 return values[self.i]
380
381 def all_ids(self):
382 nextoffset = self.baseoffset
383 for _ in xrange(self.blockcount):
384 block = self._read_block(nextoffset)
385 nextoffset = block.nextoffset
386 ids = block.read_ids()
387 for id in ids:
388 yield id
389
390 def next(self):
391 if self.i == self.block.count - 1:
392 self._next_block()
393 return True
394 else:
395 self.i += 1
396 return False
397
398 def skip_to(self, id):
399 if not self.is_active():
400 raise ReadTooFar
401
402 i = self.i
403 # If we're already in the block with the target ID, do nothing
404 if id <= self.block.ids[i]:
405 return
406
407 # Skip to the block that would contain the target ID
408 if id > self.block.maxid:
409 self._skip_to_block(lambda: id > self.block.maxid)
410 if not self.is_active():
411 return
412
413 # Iterate through the IDs in the block until we find or pass the
414 # target
415 ids = self.block.ids
416 i = self.i
417 while ids[i] < id:
418 i += 1
419 if i == len(ids):
420 self._active = False
421 return
422 self.i = i
423
424 def skip_to_quality(self, minquality):
425 bq = self.block_quality
426 if bq() > minquality:
427 return 0
428 return self._skip_to_block(lambda: bq() <= minquality)
429
430 def block_min_length(self):
431 return self.block.min_length()
432
433 def block_max_length(self):
434 return self.block.max_length()
435
436 def block_max_weight(self):
437 return self.block.max_weight()
438
439 def block_max_wol(self):
440 return self.block.max_wol()
441
442
443# File TermInfo
444
445NO_ID = 0xffffffff
446
447
448class FileTermInfo(TermInfo):
449 # Freq, Doc freq, min len, max length, max weight, unused, min ID, max ID
450 struct = Struct("!fIBBffII")
451
452 def __init__(self, *args, **kwargs):
453 self.postings = None
454 if "postings" in kwargs:
455 self.postings = kwargs["postings"]
456 del kwargs["postings"]
457 TermInfo.__init__(self, *args, **kwargs)
458
459 # filedb specific methods
460
461 def add_block(self, block):
462 self._weight += sum(block.weights)
463 self._df += len(block)
464
465 ml = block.min_length()
466 if self._minlength is None:
467 self._minlength = ml
468 else:
469 self._minlength = min(self._minlength, ml)
470
471 self._maxlength = max(self._maxlength, block.max_length())
472 self._maxweight = max(self._maxweight, block.max_weight())
473 if self._minid is None:
474 self._minid = block.ids[0]
475 self._maxid = block.ids[-1]
476
477 def to_string(self):
478 # Encode the lengths as 0-255 values
479 ml = 0 if self._minlength is None else length_to_byte(self._minlength)
480 xl = length_to_byte(self._maxlength)
481 # Convert None values to the out-of-band NO_ID constant so they can be
482 # stored as unsigned ints
483 mid = NO_ID if self._minid is None else self._minid
484 xid = NO_ID if self._maxid is None else self._maxid
485
486 # Pack the term info into bytes
487 st = self.struct.pack(self._weight, self._df, ml, xl, self._maxweight,
488 0, mid, xid)
489
490 if isinstance(self.postings, tuple):
491 # Postings are inlined - dump them using the pickle protocol
492 isinlined = 1
493 st += dumps(self.postings, -1)[2:-1]
494 else:
495 # Append postings pointer as long to end of term info bytes
496 isinlined = 0
497 # It's possible for a term info to not have a pointer to postings
498 # on disk, in which case postings will be None. Convert a None
499 # value to -1 so it can be stored as a long.
500 p = -1 if self.postings is None else self.postings
501 st += pack_long(p)
502
503 # Prepend byte indicating whether the postings are inlined to the term
504 # info bytes
505 return pack("B", isinlined) + st
506
507 @classmethod
508 def from_string(cls, s):
509 assert isinstance(s, bytes_type)
510
511 if isinstance(s, string_type):
512 hbyte = ord(s[0]) # Python 2.x - str
513 else:
514 hbyte = s[0] # Python 3 - bytes
515
516 if hbyte < 2:
517 st = cls.struct
518 # Weight, Doc freq, min len, max len, max w, unused, min ID, max ID
519 w, df, ml, xl, xw, _, mid, xid = st.unpack(s[1:st.size + 1])
520 mid = None if mid == NO_ID else mid
521 xid = None if xid == NO_ID else xid
522 # Postings
523 pstr = s[st.size + 1:]
524 if hbyte == 0:
525 p = unpack_long(pstr)[0]
526 else:
527 p = loads(pstr + b("."))
528 else:
529 # Old format was encoded as a variable length pickled tuple
530 v = loads(s + b("."))
531 if len(v) == 1:
532 w = df = 1
533 p = v[0]
534 elif len(v) == 2:
535 w = df = v[1]
536 p = v[0]
537 else:
538 w, p, df = v
539 # Fake values for stats which weren't stored before
540 ml = 1
541 xl = 255
542 xw = 999999999
543 mid = -1
544 xid = -1
545
546 ml = byte_to_length(ml)
547 xl = byte_to_length(xl)
548 obj = cls(w, df, ml, xl, xw, mid, xid)
549 obj.postings = p
550 return obj
551
552 @classmethod
553 def read_weight(cls, dbfile, datapos):
554 return dbfile.get_float(datapos + 1)
555
556 @classmethod
557 def read_doc_freq(cls, dbfile, datapos):
558 return dbfile.get_uint(datapos + 1 + _FLOAT_SIZE)
559
560 @classmethod
561 def read_min_and_max_length(cls, dbfile, datapos):
562 lenpos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE
563 ml = byte_to_length(dbfile.get_byte(lenpos))
564 xl = byte_to_length(dbfile.get_byte(lenpos + 1))
565 return ml, xl
566
567 @classmethod
568 def read_max_weight(cls, dbfile, datapos):
569 weightspos = datapos + 1 + _FLOAT_SIZE + _INT_SIZE + 2
570 return dbfile.get_float(weightspos)
571
572
573# Segment base class
574
575class Segment(object):
576 """Do not instantiate this object directly. It is used by the Index object
577 to hold information about a segment. A list of objects of this class are
578 pickled as part of the TOC file.
579
580 The TOC file stores a minimal amount of information -- mostly a list of
581 Segment objects. Segments are the real reverse indexes. Having multiple
582 segments allows quick incremental indexing: just create a new segment for
583 the new documents, and have the index overlay the new segment over previous
584 ones for purposes of reading/search. "Optimizing" the index combines the
585 contents of existing segments into one (removing any deleted documents
586 along the way).
587 """
588
589 # These must be valid separate characters in CASE-INSENSTIVE filenames
590 IDCHARS = "0123456789abcdefghijklmnopqrstuvwxyz"
591 # Extension for compound segment files
592 COMPOUND_EXT = ".seg"
593
594 # self.indexname
595 # self.segid
596
597 @classmethod
598 def _random_id(cls, size=12):
599 return "".join(random.choice(cls.IDCHARS) for _ in xrange(size))
600
601 def __repr__(self):
602 return "<%s %s>" % (self.__class__.__name__, getattr(self, "segid", ""))
603
604 def codec(self):
605 raise NotImplementedError
606
607 def segment_id(self):
608 if hasattr(self, "name"):
609 # Old segment class
610 return self.name
611 else:
612 return "%s_%s" % (self.indexname, self.segid)
613
614 def is_compound(self):
615 if not hasattr(self, "compound"):
616 return False
617 return self.compound
618
619 # File convenience methods
620
621 def make_filename(self, ext):
622 return "%s%s" % (self.segment_id(), ext)
623
624 def list_files(self, storage):
625 prefix = "%s." % self.segment_id()
626 return [name for name in storage.list() if name.startswith(prefix)]
627
628 def create_file(self, storage, ext, **kwargs):
629 """Convenience method to create a new file in the given storage named
630 with this segment's ID and the given extension. Any keyword arguments
631 are passed to the storage's create_file method.
632 """
633
634 fname = self.make_filename(ext)
635 return storage.create_file(fname, **kwargs)
636
637 def open_file(self, storage, ext, **kwargs):
638 """Convenience method to open a file in the given storage named with
639 this segment's ID and the given extension. Any keyword arguments are
640 passed to the storage's open_file method.
641 """
642
643 fname = self.make_filename(ext)
644 return storage.open_file(fname, **kwargs)
645
646 def create_compound_file(self, storage):
647 segfiles = self.list_files(storage)
648 assert not any(name.endswith(self.COMPOUND_EXT) for name in segfiles)
649 cfile = self.create_file(storage, self.COMPOUND_EXT)
650 CompoundStorage.assemble(cfile, storage, segfiles)
651 for name in segfiles:
652 storage.delete_file(name)
653
654 def open_compound_file(self, storage):
655 name = self.make_filename(self.COMPOUND_EXT)
656 return CompoundStorage(storage, name)
657
658 # Abstract methods dealing with document counts and deletions
659
660 def doc_count_all(self):
661 """
662 Returns the total number of documents, DELETED OR UNDELETED, in this
663 segment.
664 """
665
666 raise NotImplementedError
667
668 def doc_count(self):
669 """
670 :returns: the number of (undeleted) documents in this segment.
671 """
672
673 raise NotImplementedError
674
675 def has_deletions(self):
676 """
677 :returns: True if any documents in this segment are deleted.
678 """
679
680 raise NotImplementedError
681
682 def deleted_count(self):
683 """
684 :returns: the total number of deleted documents in this segment.
685 """
686
687 raise NotImplementedError
688
689 def delete_document(self, docnum, delete=True):
690 """Deletes the given document number. The document is not actually
691 removed from the index until it is optimized.
692
693 :param docnum: The document number to delete.
694 :param delete: If False, this undeletes a deleted document.
695 """
696
697 raise NotImplementedError
698
699 def is_deleted(self, docnum):
700 """:returns: True if the given document number is deleted."""
701
702 raise NotImplementedError
703
704
705# Posting block format
706
707class BlockBase(object):
708 def __init__(self, postingsize, stringids=False):
709 self.postingsize = postingsize
710 self.stringids = stringids
711 self.ids = [] if stringids else array("I")
712 self.weights = array("f")
713 self.values = None
714
715 self.minlength = None
716 self.maxlength = 0
717 self.maxweight = 0
718
719 def __len__(self):
720 return len(self.ids)
721
722 def __nonzero__(self):
723 return bool(self.ids)
724
725 def min_id(self):
726 if self.ids:
727 return self.ids[0]
728 else:
729 raise IndexError
730
731 def max_id(self):
732 if self.ids:
733 return self.ids[-1]
734 else:
735 raise IndexError
736
737 def min_length(self):
738 return self.minlength
739
740 def max_length(self):
741 return self.maxlength
742
743 def max_weight(self):
744 return self.maxweight
745
746 def add(self, id_, weight, valuestring, length=None):
747 self.ids.append(id_)
748 self.weights.append(weight)
749 if weight > self.maxweight:
750 self.maxweight = weight
751 if valuestring:
752 if self.values is None:
753 self.values = []
754 self.values.append(valuestring)
755 if length:
756 if self.minlength is None or length < self.minlength:
757 self.minlength = length
758 if length > self.maxlength:
759 self.maxlength = length
760
761 def to_file(self, postfile):
762 raise NotImplementedError
763
764
765# Utility functions
766
767def minimize_ids(arry, stringids, compression=0):
768 amax = arry[-1]
769
770 if stringids:
771 typecode = ''
772 string = dumps(arry)
773 else:
774 typecode = arry.typecode
775 if amax <= 255:
776 typecode = "B"
777 elif amax <= 65535:
778 typecode = "H"
779
780 if typecode != arry.typecode:
781 arry = array(typecode, iter(arry))
782 if not IS_LITTLE:
783 arry.byteswap()
784 string = array_tobytes(arry)
785 if compression:
786 string = compress(string, compression)
787 return (typecode, string)
788
789
790def deminimize_ids(typecode, count, string, compression=0):
791 if compression:
792 string = decompress(string)
793 if typecode == '':
794 return loads(string)
795 else:
796 arry = array(typecode)
797 array_frombytes(arry, string)
798 if not IS_LITTLE:
799 arry.byteswap()
800 return arry
801
802
803def minimize_weights(weights, compression=0):
804 if all(w == 1.0 for w in weights):
805 string = b("")
806 else:
807 if not IS_LITTLE:
808 weights.byteswap()
809 string = array_tobytes(weights)
810 if string and compression:
811 string = compress(string, compression)
812 return string
813
814
815def deminimize_weights(count, string, compression=0):
816 if not string:
817 return array("f", (1.0 for _ in xrange(count)))
818 if compression:
819 string = decompress(string)
820 arry = array("f")
821 array_frombytes(arry, string)
822 if not IS_LITTLE:
823 arry.byteswap()
824 return arry
825
826
827def minimize_values(postingsize, values, compression=0):
828 if postingsize < 0:
829 string = dumps(values, -1)[2:]
830 elif postingsize == 0:
831 string = b('')
832 else:
833 string = b('').join(values)
834 if string and compression:
835 string = compress(string, compression)
836 return string
837
838
839def deminimize_values(postingsize, count, string, compression=0):
840 if compression:
841 string = decompress(string)
842
843 if postingsize < 0:
844 return loads(string)
845 elif postingsize == 0:
846 return [None] * count
847 else:
848 return [string[i:i + postingsize] for i
849 in xrange(0, len(string), postingsize)]
850
851
852
853
854
855
856