PageRenderTime 22ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Data/FlexSerializer.pm

http://github.com/tsee/Data-FlexSerializer
Perl | 721 lines | 381 code | 55 blank | 285 comment | 26 complexity | 947dd516866422334c79b282eb661f6d MD5 | raw file
  1. package Data::FlexSerializer;
  2. use Moose;
  3. use MooseX::ClassAttribute;
  4. use MooseX::Types::Moose qw(ArrayRef HashRef Maybe Bool Int Str Object CodeRef);
  5. use MooseX::Types::Structured qw(Dict Tuple Map);
  6. use MooseX::Types -declare => [ qw(
  7. FormatHandler
  8. FormatBool
  9. ) ];
  10. use autodie;
  11. our $VERSION = '1.10';
  12. # Get the DEBUG constant from $Data::FlexSerializer::DEBUG or
  13. # $ENV{DATA_FLEXSERIALIZER_DEBUG}
  14. use Constant::FromGlobal DEBUG => { int => 1, default => 0, env => 1 };
  15. use List::Util qw(min);
  16. use Storable qw();
  17. use JSON::XS qw();
  18. use Sereal::Decoder qw();
  19. use Sereal::Encoder qw();
  20. use Compress::Zlib qw(Z_DEFAULT_COMPRESSION);
  21. use IO::Uncompress::AnyInflate qw();
  22. use Carp ();
  23. use Data::Dumper qw(Dumper);
  24. subtype FormatHandler,
  25. as Dict [
  26. detect => CodeRef,
  27. serialize => CodeRef,
  28. deserialize => CodeRef,
  29. ],
  30. message { 'A format needs to be passed as an hashref with "serialize", "deserialize" and "detect" keys that point to a coderef to perform the respective action' };
  31. subtype FormatBool,
  32. as Map[Str, Bool];
  33. coerce FormatBool,
  34. from ArrayRef,
  35. via { { map lc $_ => 1, @$_ } },
  36. from Str,
  37. via { { lc $_ => 1 } },
  38. ;
  39. class_has formats => (
  40. traits => ['Hash'],
  41. is => 'rw',
  42. isa => HashRef[FormatHandler],
  43. default => sub {
  44. {
  45. json => {
  46. detect => sub { $_[1] =~ /^(?:\{|\[)/ },
  47. serialize => sub { shift; goto \&JSON::XS::encode_json },
  48. deserialize => sub { shift; goto \&JSON::XS::decode_json },
  49. },
  50. storable => {
  51. detect => sub { $_[1] =~ s/^pst0// }, # this is not a real detector.
  52. # It just removes the storable
  53. # file magic if necessary.
  54. # Tho' storable needs to be last
  55. serialize => sub { shift; goto \&Storable::nfreeze },
  56. deserialize => sub { shift; goto \&Storable::thaw },
  57. },
  58. sereal => {
  59. detect => sub { shift->{sereal_decoder}->looks_like_sereal(@_) },
  60. serialize => sub { shift->{sereal_encoder}->encode(@_) },
  61. deserialize => sub { my $structure; shift->{sereal_decoder}->decode($_[0], $structure); $structure },
  62. },
  63. }
  64. },
  65. handles => {
  66. add_format => 'set',
  67. get_format => 'get',
  68. has_format => 'exists',
  69. supported_formats => 'keys',
  70. },
  71. );
  72. has output_format => (
  73. is => 'ro',
  74. isa => Str,
  75. default => 'json',
  76. );
  77. has detect_formats => (
  78. traits => ['Hash'],
  79. is => 'ro',
  80. isa => FormatBool,
  81. default => sub { { json => 1, sereal => 0, storable => 0 } },
  82. coerce => 1,
  83. handles => {
  84. detect_json => [ get => 'json' ],
  85. detect_storable => [ get => 'storable' ],
  86. detect_sereal => [ get => 'sereal' ],
  87. _set_detect_json => [ set => 'json' ],
  88. _set_detect_storable => [ set => 'storable' ],
  89. _set_detect_sereal => [ set => 'sereal' ],
  90. list_detect_formats => 'kv',
  91. }
  92. );
  93. has assume_compression => (
  94. is => 'ro',
  95. isa => Bool,
  96. default => 1,
  97. );
  98. has detect_compression => (
  99. is => 'ro',
  100. isa => Bool,
  101. default => 0,
  102. );
  103. has compress_output => (
  104. is => 'ro',
  105. isa => Bool,
  106. default => 1,
  107. );
  108. has compression_level => (
  109. is => 'ro',
  110. isa => Maybe[Int],
  111. );
  112. has sereal_encoder => (
  113. is => 'ro',
  114. isa => Object,
  115. lazy_build => 1,
  116. );
  117. sub _build_sereal_encoder { Sereal::Encoder->new }
  118. has sereal_decoder => (
  119. is => 'ro',
  120. isa => Object,
  121. lazy_build => 1,
  122. );
  123. sub _build_sereal_decoder { Sereal::Decoder->new }
  124. around BUILDARGS => sub {
  125. my ( $orig, $class, %args ) = @_;
  126. # We change the default on assume_compression to "off" if the
  127. # user sets detect_compression explicitly
  128. if (exists $args{detect_compression} and
  129. not exists $args{assume_compression}) {
  130. $args{assume_compression} = 0;
  131. }
  132. if ($args{assume_compression} and $args{detect_compression}) {
  133. die "Can't assume compression and auto-detect compression at the same time. That makes no sense.";
  134. }
  135. my %detect_formats = map {
  136. exists $args{"detect_$_"} ? ($_ => $args{"detect_$_"}) : ()
  137. } $class->supported_formats;
  138. if (%detect_formats) {
  139. if ($args{detect_formats}) {
  140. $args{detect_formats} = [ $args{detect_formats} ] unless ref $args{detect_formats};
  141. if (ref $args{detect_formats} eq 'ARRAY') {
  142. for my $format (@{$args{detect_formats}}) {
  143. die "Can't have $format in detect_formats and detect_$format set to false at the same time"
  144. if exists $detect_formats{$format} && !$detect_formats{$format};
  145. $detect_formats{$format} = 1;
  146. }
  147. } else {
  148. for my $format (keys %{$args{detect_formats}}) {
  149. die "Can't have $format in detect_formats and detect_$format set to false at the same time"
  150. if exists $detect_formats{$format}
  151. && exists $args{detect_formats}{$format}
  152. && $detect_formats{$format} != $args{detect_formats}{$format};
  153. $detect_formats{$format} = 1;
  154. }
  155. }
  156. } else {
  157. $args{detect_formats} = \%detect_formats;
  158. }
  159. }
  160. $args{output_format} = lc $args{output_format} if $args{output_format};
  161. for my $format (
  162. ( $args{output_format} ? $args{output_format} : () ),
  163. ( $args{detect_formats} ? keys %{$args{detect_formats}} : () )) {
  164. die "'$format' is not a supported format" unless $class->has_format($format);
  165. }
  166. my $rv = $class->$orig(%args);
  167. if (DEBUG) {
  168. warn "Dumping the new FlexSerializer object.\n" . Dumper($rv);
  169. }
  170. return $rv;
  171. };
  172. sub BUILD {
  173. my ($self) = @_;
  174. # build Sereal::{Decoder,Encoder} objects if necessary
  175. $self->sereal_decoder if $self->detect_sereal;
  176. $self->sereal_encoder if $self->output_format eq 'sereal';
  177. # For legacy reasons json should be on by default
  178. $self->_set_detect_json(1) unless defined $self->detect_json;
  179. $self->{serializer_coderef} = $self->make_serializer;
  180. $self->{deserializer_coderef} = $self->make_deserializer;
  181. return;
  182. }
  183. sub serialize { goto $_[0]->{serializer_coderef} }
  184. sub deserialize { goto $_[0]->{deserializer_coderef} }
  185. sub make_serializer {
  186. my $self = shift;
  187. my $compress_output = $self->compress_output;
  188. my $output_format = $self->output_format;
  189. my $comp_level;
  190. $comp_level = $self->compression_level if $compress_output;
  191. if (DEBUG) {
  192. warn(sprintf(
  193. "FlexSerializer using the following options for serialization: "
  194. . "compress_output=%s, compression_level=%s, output_format=%s",
  195. map {defined $self->{$_} ? $self->{$_} : '<undef>'}
  196. qw(compress_output compression_level output_format)
  197. ));
  198. }
  199. {
  200. no strict 'refs';
  201. my $class = ref $self;
  202. *{"$class\::__serialize_$output_format"} =
  203. $self->get_format($output_format)->{serialize}
  204. or die "PANIC: unknown output format '$output_format'";
  205. }
  206. my $code = "__serialize_$output_format(\$self, \$_)";
  207. if ($compress_output) {
  208. my $comp_level_code = defined $comp_level ? $comp_level : 'Z_DEFAULT_COMPRESSION';
  209. $code = "Compress::Zlib::compress(\\$code,$comp_level_code)";
  210. }
  211. $code = sprintf q{
  212. sub {
  213. # local *__ANON__= "__ANON__serialize__";
  214. my $self = shift;
  215. my @out;
  216. push @out, %s for @_;
  217. return wantarray ? @out
  218. : @out > 1 ? die( sprintf "You have %%d serialized structures, please call this method in list context", scalar @out )
  219. : $out[0];
  220. return @out;
  221. };
  222. }, $code;
  223. warn $code if DEBUG >= 2;
  224. my $coderef = eval $code or do{
  225. my $error = $@ || 'Zombie error';
  226. die "Couldn't create the deserialization coderef: $error\n The code is: $code\n";
  227. };
  228. return $coderef;
  229. }
  230. sub make_deserializer {
  231. my $self = shift;
  232. my $assume_compression = $self->assume_compression;
  233. my $detect_compression = $self->detect_compression;
  234. my %detectors = %{$self->detect_formats};
  235. # Move storable to the end of the detectors list.
  236. # We don't know how to detect it.
  237. delete $detectors{storable} if exists $detectors{storable};
  238. my @detectors = grep $detectors{$_}, $self->supported_formats;
  239. push @detectors, 'storable' if $self->detect_storable;
  240. if (DEBUG) {
  241. warn "Detectors: @detectors";
  242. warn("FlexSerializer using the following options for deserialization: ",
  243. join ', ', (map {defined $self->$_ ? "$_=@{[$self->$_]}" : "$_=<undef>"}
  244. qw(assume_compression detect_compression)),
  245. map { "detect_$_->[0]=$_->[1]" } $self->list_detect_formats
  246. );
  247. }
  248. my $uncompress_code;
  249. if ($assume_compression) {
  250. $uncompress_code = '
  251. local $_ = Compress::Zlib::uncompress(\$serialized);
  252. unless (defined $_) {
  253. die "You\'ve told me to assume compression but calling uncompress() on your input string returns undef";
  254. }';
  255. }
  256. elsif ($detect_compression) {
  257. $uncompress_code = '
  258. local $_;
  259. my $inflatedok = IO::Uncompress::AnyInflate::anyinflate(\$serialized => \$_);
  260. warn "FlexSerializer: Detected that the input was " . ($inflatedok ? "" : "not ") . "compressed"
  261. if DEBUG >= 3;
  262. $_ = $serialized if not $inflatedok;';
  263. }
  264. else {
  265. warn "FlexSerializer: Not using compression" if DEBUG;
  266. $uncompress_code = '
  267. local $_ = $serialized;';
  268. }
  269. my $code_detect = q!
  270. warn "FlexSerializer: %2$s that the input was %1$s" if DEBUG >= 3;
  271. warn sprintf "FlexSerializer: This was the %1$s input: '%s'",
  272. substr($_, 0, min(length($_), 100)) if DEBUG >= 3;
  273. push @out, __deserialize_%1$s($self, $_)!;
  274. my $detector = '__detect_%1$s($self, $_)';
  275. my $body = "\n$code_detect\n }";
  276. my $code = @detectors == 1
  277. # Just one detector => skip the if()else gobbledigook
  278. ? sprintf $code_detect, $detectors[0], 'Assuming'
  279. # Multiple detectors
  280. : join('', map {
  281. sprintf(
  282. ($_ == 0 ? "if ( $detector ) { $body"
  283. :$_ == $#detectors ? " else { $detector; $body"
  284. : " elsif ( $detector ) { $body"),
  285. $detectors[$_],
  286. ($_ == $#detectors ? 'Assuming' : 'Detected'),
  287. );
  288. } 0..$#detectors
  289. );
  290. $code = sprintf(q{
  291. sub {
  292. # local *__ANON__= "__ANON__deserialize__";
  293. my $self = shift;
  294. my @out;
  295. for my $serialized (@_) {
  296. %s
  297. %s
  298. }
  299. return wantarray ? @out
  300. : @out > 1 ? die( sprintf "You have %%d deserialized structures, please call this method in list context", scalar @out )
  301. : $out[0];
  302. return @out;
  303. };},
  304. $uncompress_code, $code
  305. );
  306. warn $code if DEBUG >= 2;
  307. # inject the deserializers and detectors in the symbol table
  308. # before we eval the code.
  309. for (@detectors) {
  310. my $class = ref $self;
  311. no strict 'refs';
  312. my $format = $self->get_format($_);
  313. *{"$class\::__deserialize_$_"} = $format->{deserialize};
  314. *{"$class\::__detect_$_"} = $format->{detect};
  315. }
  316. my $coderef = eval $code or do{
  317. my $error = $@ || 'Clobbed';
  318. die "Couldn't create the deserialization coderef: $error\n The code is: $code\n";
  319. };
  320. return $coderef;
  321. }
  322. sub deserialize_from_file {
  323. my $self = shift;
  324. my $file = shift;
  325. if (not defined $file or not -r $file) {
  326. Carp::croak("Need filename argument or can't read file");
  327. }
  328. open my $fh, '<', $file;
  329. local $/;
  330. my $data = <$fh>;
  331. my ($rv) = $self->deserialize($data);
  332. return $rv;
  333. }
  334. sub serialize_to_file {
  335. my $self = shift;
  336. my $data = shift;
  337. my $file = shift;
  338. if (not defined $file) {
  339. Carp::croak("Need filename argument");
  340. }
  341. open my $fh, '>', $file;
  342. print $fh $self->serialize($data);
  343. close $fh;
  344. return 1;
  345. }
  346. sub deserialize_from_fh {
  347. my $self = shift;
  348. my $fd = shift;
  349. if (not defined $fd) {
  350. Carp::croak("Need file descriptor argument");
  351. }
  352. local $/;
  353. my $data = <$fd>;
  354. my ($rv) = $self->deserialize($data);
  355. return $rv;
  356. }
  357. sub serialize_to_fh {
  358. my $self = shift;
  359. my $data = shift;
  360. my $fd = shift;
  361. if (not defined $fd) {
  362. Carp::croak("Need file descriptor argument");
  363. }
  364. print $fd $self->serialize($data);
  365. return 1;
  366. }
  367. 1;
  368. __END__
  369. =pod
  370. =encoding utf8
  371. =head1 NAME
  372. Data::FlexSerializer - Pluggable (de-)serialization to/from compressed/uncompressed JSON/Storable/Sereal/Whatever
  373. =head1 DESCRIPTION
  374. This module was written to convert away from Storable throughout the
  375. Booking.com codebase to other serialization formats such as Sereal and
  376. JSON.
  377. Since we needed to do these migrations in production we had to do them
  378. with zero downtime and deal with data stored on disk, in memcached or
  379. in a database that we could only gradually migrate to the new format
  380. as we read/wrote it.
  381. So we needed a module that deals with dynamically detecting what kind
  382. of existing serialized data you have, and can dynamically convert it
  383. to something else as it's written again.
  384. That's what this module does. Depending on the options you give it it
  385. can read/write any combination of
  386. B<compressed>/B<uncompressed>/B<maybe compressed>
  387. B<Storable>/B<JSON>/B<Sereal> data. You can also easily extend it to
  388. add support for your own input/output format in addition to the
  389. defaults.
  390. =head1 SYNOPSIS
  391. When we originally wrote this we meant to convert everything over from
  392. Storable to JSON. Since then mostly due to various issues with JSON
  393. not accurately being able to represent Perl datastructures
  394. (e.g. preserve encoding flags) we've started to migrate to
  395. L<Sereal::Encoder|Sereal> (a L<new serialization
  396. format|http://blog.booking.com/sereal-a-binary-data-serialization-format.html>
  397. we wrote) instead.
  398. However the API of this module is now slightly awkward because now it
  399. needs to deal with the possible detection and emission of multiple
  400. formats, and it still uses the JSON format by default which is no
  401. longer the recommended way to use it.
  402. # For all of the below
  403. use Data::FlexSerializer;
  404. =head2 Reading and writing compressed JSON
  405. # We *only* read/write compressed JSON by default:
  406. my $strict_serializer = Data::FlexSerializer->new;
  407. my @blobs = $strict_serializer->serialize(@perl_datastructures);
  408. my @perl_datastructures = $strict_serializer->deserialize(@blobs);
  409. =head2 Reading maybe compressed JSON and writing compressed JSON
  410. # We can optionally detect compressed JSON as well, will accept
  411. # mixed compressed/uncompressed data. This works for all the input
  412. # formats.
  413. my $lax_serializer = Data::FlexSerializer->new(
  414. detect_compression => 1,
  415. );
  416. =head2 Reading definitely compressed JSON and writing compressed JSON
  417. # If we know that all our data is compressed we can skip the
  418. # detection step. This works for all the input formats.
  419. my $lax_compress = Data::FlexSerializer->new(
  420. assume_compression => 1,
  421. compress_output => 1, # This is the default
  422. );
  423. =head2 Migrate from maybe compressed Storable to compressed JSON
  424. my $storable_to_json = Data::FlexSerializer->new(
  425. detect_compression => 1, # check whether the input is compressed
  426. detect_storable => 1, # accept Storable images as input
  427. compress_output => 1, # This is the default
  428. );
  429. =head2 Migrate from maybe compressed JSON to Sereal
  430. my $storable_to_sereal = Data::FlexSerializer->new(
  431. detect_sereal => 1,
  432. output_format => 'sereal',
  433. );
  434. =head2 Migrate from Sereal to JSON
  435. my $sereal_backcompat = Data::FlexSerializer->new(
  436. detect_sereal => 1, # accept Sereal images as input
  437. );
  438. =head2 Migrate from JSON OR Storable to Sereal
  439. my $flex_to_json = Data::FlexSerializer->new(
  440. detect_compression => 1,
  441. detect_json => 1, # this is the default
  442. detect_sereal => 1,
  443. detect_storable => 1,
  444. output_format => 'sereal',
  445. );
  446. =head2 Migrate from JSON OR Storable to Sereal with custom Sereal objects
  447. my $flex_to_json = Data::FlexSerializer->new(
  448. detect_compression => 1,
  449. detect_json => 1, # this is the default
  450. detect_sereal => 1,
  451. detect_storable => 1,
  452. output_format => 'sereal',
  453. sereal_decoder => Sereal::Decoder->new(...),
  454. sereal_encoder => Sereal::Encoder->new(...),
  455. );
  456. =head2 Add your own format using Data::Dumper.
  457. See L<the documentation for add_format|add_format> below.
  458. =head1 ATTRIBUTES
  459. This is a L<Moose>-powered module so all of these are keys you can
  460. pass to L</new>. They're all read-only after the class is constructed,
  461. so you can look but you can't touch.
  462. =head1 METHODS
  463. =head2 assume_compression
  464. C<assume_compression> is a boolean flag that makes the deserialization
  465. assume that the data will be compressed. It won't have to guess,
  466. making the deserialization faster. Defaults to true.
  467. You almost definitely want to turn L</compress_output> off too if you
  468. turn this off, unless you're doing a one-off migration or something.
  469. =head2 detect_compression
  470. C<detect_compression> is a boolean flag that also affects only the
  471. deserialization step.
  472. If set, it'll auto-detect whether the input is compressed. Mutually
  473. exclusive with C<assume_compression> (we'll die if you try to set
  474. both).
  475. If you set C<detect_compression> we'll disable this for you, since it
  476. doesn't make any sense to try to detect when you're going to assume.
  477. Defaults to false.
  478. =head2 compress_output
  479. C<compress_output> is a flag indicating whether compressed or uncompressed
  480. dumps are to be generated during the serialization. Defaults to true.
  481. You probably to turn L</assume_compression> off too if you turn this
  482. off, unless you're doing a one-off migration or something.
  483. =head2 compression_level
  484. C<compression_level> is an integer indicating the compression level (0-9).
  485. =head2 output_format
  486. C<output_format> can be either set to the string C<json> (default),
  487. C<storable>, C<sereal> or your own format that you've added via L</add_format>.
  488. =head2 detect_FORMAT_NAME
  489. Whether we should detect this incoming format. By default only
  490. C<detect_json> is true. You can also set C<detect_storable>,
  491. C<detect_sereal> or C<detect_YOUR_FORMAT> for formats added via
  492. L</add_format>.
  493. =head2 sereal_encoder
  494. =head2 sereal_decoder
  495. You can supply C<sereal_encoder> or C<sereal_decoder> arguments with
  496. your own Serial decoder/encoder objects. Handy if you want to pass
  497. custom options to the encoder or decoder.
  498. By default we create objects for you at BUILD time. So you don't need
  499. to supply this for optimization purposes either.
  500. =head1 METHODS
  501. =head2 serialize
  502. Given a list of things to serialize, this does the job on each of them and
  503. returns a list of serialized blobs.
  504. In scalar context, this will return a single serialized blob instead of a
  505. list. If called in scalar context, but passed a list of things to serialize,
  506. this will croak because the call makes no sense.
  507. =head2 deserialize
  508. The opposite of C<serialize>, doh.
  509. =head2 deserialize_from_file
  510. Given a (single!) file name, reads the file contents and deserializes them.
  511. Returns the resulting Perl data structure.
  512. Since this works on one file at a time, this doesn't return a list of
  513. data structures like C<deserialize()> does.
  514. =head2 serialize_to_file
  515. $serializer->serialize_to_file(
  516. $data_structure => '/tmp/foo/bar'
  517. );
  518. Given a (single!) Perl data structure, and a (single!) file name,
  519. serializes the data structure and writes the result to the given file.
  520. Returns true on success, dies on failure.
  521. =head1 CLASS METHODS
  522. =head2 add_format
  523. C<add_format> class method to add support for custom formats.
  524. Data::FlexSerializer->add_format(
  525. data_dumper => {
  526. serialize => sub { shift; goto \&Data::Dumper::Dumper },
  527. deserialize => sub { shift; my $VAR1; eval "$_[0]" },
  528. detect => sub { $_[1] =~ /\$[\w]+\s*=/ },
  529. }
  530. );
  531. my $flex_to_dd = Data::FlexSerializer->new(
  532. detect_data_dumper => 1,
  533. output_format => 'data_dumper',
  534. );
  535. =head1 AUTHOR
  536. Steffen Mueller <smueller@cpan.org>
  537. Ævar Arnfjörð Bjarmason <avar@cpan.org>
  538. Burak Gürsoy <burak@cpan.org>
  539. Elizabeth Matthijsen <liz@dijkmat.nl>
  540. Caio Romão Costa Nascimento <cpan@caioromao.com>
  541. Jonas Galhordas Duarte Alves <jgda@cpan.org>
  542. =head1 ACKNOWLEDGMENT
  543. This module was originally developed at and for Booking.com.
  544. With approval from Booking.com, this module was generalized
  545. and put on CPAN, for which the authors would like to express
  546. their gratitude.
  547. =head1 COPYRIGHT AND LICENSE
  548. (C) 2011, 2012, 2013 Steffen Mueller and others. All rights reserved.
  549. This code is available under the same license as Perl version
  550. 5.8.1 or higher.
  551. This program is distributed in the hope that it will be useful,
  552. but WITHOUT ANY WARRANTY; without even the implied warranty of
  553. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  554. =cut