PageRenderTime 55ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/Fuse/PDF/ContentFS.pm

https://github.com/gitpan/Fuse-PDF
Perl | 936 lines | 590 code | 100 blank | 246 comment | 104 complexity | 1cd8888c74045251f57ca040bfe885c3 MD5 | raw file
  1. #######################################################################
  2. # $URL: svn+ssh://equilibrious@equilibrious.net/home/equilibrious/svnrepos/chrisdolan/Fuse-PDF/lib/Fuse/PDF/ContentFS.pm $
  3. # $Date: 2008-06-06 22:47:54 -0500 (Fri, 06 Jun 2008) $
  4. # $Author: equilibrious $
  5. # $Revision: 767 $
  6. ########################################################################
  7. package Fuse::PDF::ContentFS;
  8. use warnings;
  9. use strict;
  10. use 5.008;
  11. use Carp qw(carp);
  12. use Readonly;
  13. use POSIX qw(:errno_h);
  14. use Fcntl qw(:mode);
  15. use English qw(-no_match_vars);
  16. use CAM::PDF;
  17. use CAM::PDF::Node;
  18. use CAM::PDF::Renderer::Images; # included so PAR picks it up
  19. use CAM::PDF::Renderer::Text; # included so PAR picks it up
  20. use Fuse::PDF::ErrnoHacks;
  21. use Fuse::PDF::FS;
  22. use Fuse::PDF::ImageTemplate;
  23. our $VERSION = '0.09';
  24. Readonly::Scalar my $PATHLEN => 255;
  25. Readonly::Scalar my $BLOCKSIZE => 4096;
  26. Readonly::Scalar my $ELOOP_LIMIT => 100;
  27. Readonly::Hash my %PERMS => (
  28. d => S_IFDIR() | oct 555,
  29. l => S_IFLNK() | oct 777,
  30. f => S_IFREG() | oct 444,
  31. );
  32. Readonly::Scalar my $USED_FILES => 1000;
  33. Readonly::Scalar my $FREE_FILES => 1_000_000;
  34. Readonly::Scalar my $MAX_BLOCKS => 1_000_000;
  35. Readonly::Scalar my $FREE_BLOCKS => 500_000;
  36. Readonly::Scalar my $FS_ROOT_KEY => 'FusePDF'; # track value from Fuse::PDF::FS
  37. Readonly::Hash my %SCALARS => (map {$_ => 1} qw(string hexstring number boolean label));
  38. Readonly::Scalar my $IMAGE_CACHE_TIMEOUT => 15; # seconds
  39. # --------------------------------------------------
  40. sub new {
  41. my ($pkg, $options) = @_;
  42. return if ! $options;
  43. return if ! $options->{pdf};
  44. my $self = bless { %{$options} }, $pkg;
  45. $self->{pdf_mtime} ||= $BASETIME; # aka $^T
  46. return $self;
  47. }
  48. sub compact { ## no critic(ArgUnpacking)
  49. my ($self, $boolean) = @_;
  50. return $self->{compact} if @_ == 1;
  51. $self->{compact} = $boolean ? 1 : undef;
  52. return;
  53. }
  54. sub backup { ## no critic(ArgUnpacking)
  55. my ($self, $boolean) = @_;
  56. return $self->{backup} if @_ == 1;
  57. $self->{backup} = $boolean ? 1 : undef;
  58. return;
  59. }
  60. sub autosave_filename { ## no critic(ArgUnpacking)
  61. my ($self, $filename) = @_;
  62. return $self->{autosave_filename} if @_ == 1;
  63. $self->{autosave_filename} = $filename;
  64. return;
  65. }
  66. sub previous_revision {
  67. my ($self) = @_;
  68. my $prev_pdf = $self->{pdf}->previousRevision();
  69. return if !$prev_pdf;
  70. return __PACKAGE__->new({
  71. pdf => $prev_pdf,
  72. pdf_mtime => $self->{pdf_mtime},
  73. });
  74. }
  75. sub all_revisions {
  76. my ($self) = @_;
  77. my @revs;
  78. for (my $fs = $self; $fs; $fs = $fs->previous_revision) { ## no critic(ProhibitCStyleForLoops)
  79. push @revs, $fs;
  80. }
  81. return @revs;
  82. }
  83. sub statistics {
  84. my ($self) = @_;
  85. my %stats;
  86. $stats{pages} = $self->{pdf}->numPages;
  87. return \%stats;
  88. }
  89. sub to_string {
  90. my ($self) = @_;
  91. my @stats = ($self->statistics);
  92. my $fs = $self;
  93. while ($fs = $fs->previous_revision) {
  94. push @stats, $fs->statistics;
  95. }
  96. my @rows = (
  97. 'Name: ' . $stats[0]->{name},
  98. );
  99. for my $i (0 .. $#stats) {
  100. my $s = $stats[$i];
  101. push @rows, 'Revision: ' . (@stats - $i);
  102. push @rows, ' Pages: ' . $s->{pages};
  103. }
  104. return join "\n", @rows, q{};
  105. }
  106. # --------------------------------------------------
  107. sub fs_getattr {
  108. my ($self, $abspath) = @_;
  109. my ($f, $path) = $self->_file($abspath);
  110. if (defined $path) {
  111. return -EIO() if !$f->can('fs_getattr');
  112. return $f->fs_getattr($path);
  113. }
  114. return -$f if !ref $f;
  115. my $type = $f->{type};
  116. my $size = 'd' eq $type ? 0 : length $f->{content};
  117. my $blocks = 0 == $size ? 0 : (($size - 1) % $BLOCKSIZE) + 1; # round up
  118. return
  119. 0, # dev
  120. 0, # inode
  121. $PERMS{$type},
  122. ('d' eq $type ? (2 + scalar keys %{$f->{content}}) : 1), # nlink
  123. $EFFECTIVE_USER_ID, # uid
  124. 0+$EFFECTIVE_GROUP_ID, # gid
  125. 0, # rdev
  126. $size,
  127. $self->{pdf_mtime},
  128. $self->{pdf_mtime},
  129. $self->{pdf_mtime},
  130. $BLOCKSIZE,
  131. $blocks;
  132. }
  133. sub fs_readlink {
  134. my ($self, $abspath) = @_;
  135. my ($f, $path) = $self->_file($abspath);
  136. if (defined $path) {
  137. return -EIO() if !$f->can('fs_readlink');
  138. return $f->fs_readlink($path);
  139. }
  140. return -$f if !ref $f;
  141. my $type = $f->{type};
  142. return -EINVAL() if 'l' ne $type;
  143. return $f->{content};
  144. }
  145. sub fs_getdir {
  146. my ($self, $abspath) = @_;
  147. my ($f, $path) = $self->_file($abspath);
  148. if (defined $path) {
  149. return -EIO() if !$f->can('fs_getdir');
  150. return $f->fs_getdir($path);
  151. }
  152. return -$f if !ref $f;
  153. return q{.}, q{..}, (keys %{$f->{content}}), 0;
  154. }
  155. sub fs_open {
  156. my ($self, $abspath, $flags) = @_;
  157. my ($f, $path) = $self->_file($abspath);
  158. if (defined $path) {
  159. return -EIO() if !$f->can('fs_open');
  160. return $f->fs_open($path);
  161. }
  162. return -$f if !ref $f;
  163. # check flags?
  164. return 0;
  165. }
  166. sub fs_read {
  167. my ($self, $abspath, $size, $offset) = @_;
  168. my ($f, $path) = $self->_file($abspath);
  169. if (defined $path) {
  170. return -EIO() if !$f->can('fs_read');
  171. return $f->fs_read($path);
  172. }
  173. return -$f if !ref $f;
  174. return substr $f->{content}, $offset, $size;
  175. }
  176. sub fs_statfs {
  177. my ($self) = @_;
  178. return $PATHLEN, $USED_FILES, $FREE_FILES, $MAX_BLOCKS, $FREE_BLOCKS, $BLOCKSIZE;
  179. }
  180. sub fs_mknod {
  181. my ($self, $abspath, $perms, $dev) = @_;
  182. my ($f, $path) = $self->_file($abspath);
  183. if (defined $path) {
  184. return -EIO() if !$f->can('fs_mknod');
  185. return $f->fs_mknod($path, $perms, $dev);
  186. }
  187. return -EIO();
  188. }
  189. sub fs_mkdir {
  190. my ($self, $abspath, $perm) = @_;
  191. my ($f, $path) = $self->_file($abspath);
  192. if (defined $path) {
  193. return -EIO() if !$f->can('fs_mkdir');
  194. return $f->fs_mkdir($path, $perm);
  195. }
  196. return -EIO();
  197. }
  198. sub fs_unlink {
  199. my ($self, $abspath) = @_;
  200. my ($f, $path) = $self->_file($abspath);
  201. if (defined $path) {
  202. return -EIO() if !$f->can('fs_unlink');
  203. return $f->fs_unlink($path);
  204. }
  205. return -EIO();
  206. }
  207. sub fs_rmdir {
  208. my ($self, $abspath) = @_;
  209. my ($f, $path) = $self->_file($abspath);
  210. if (defined $path) {
  211. return -EIO() if !$f->can('fs_rmdir');
  212. return $f->fs_rmdir($path);
  213. }
  214. return -EIO();
  215. }
  216. sub fs_symlink {
  217. my ($self, $link, $abspath) = @_;
  218. my ($f, $path) = $self->_file($abspath);
  219. if (defined $path) {
  220. return -EIO() if !$f->can('fs_symlink');
  221. return $f->fs_symlink($link, $path);
  222. }
  223. return -EIO();
  224. }
  225. sub fs_rename {
  226. my ($self, $srcpath, $destpath) = @_;
  227. my ($f_s, $src) = $self->_file($srcpath);
  228. if (defined $src) {
  229. return -EIO() if !$f_s->can('fs_rename');
  230. my ($f_d, $dest) = $self->_file($destpath);
  231. if (defined $dest) {
  232. return -EXDEV() if $f_s != $f_d;
  233. return $f_s->fs_rename($src, $dest);
  234. }
  235. }
  236. return -EIO();
  237. }
  238. sub fs_link {
  239. return -EIO();
  240. }
  241. sub fs_chmod {
  242. my ($self, $abspath, $perms) = @_;
  243. my ($f, $path) = $self->_file($abspath);
  244. if (defined $path) {
  245. return -EIO() if !$f->can('fs_chmod');
  246. return $f->fs_chmod($path, $perms);
  247. }
  248. return -EIO();
  249. }
  250. sub fs_chown {
  251. my ($self, $abspath, $uid, $gid) = @_;
  252. my ($f, $path) = $self->_file($abspath);
  253. if (defined $path) {
  254. return -EIO() if !$f->can('fs_chown');
  255. return $f->fs_chown($path, $uid, $gid);
  256. }
  257. return -EIO();
  258. }
  259. sub fs_truncate {
  260. my ($self, $abspath, $length) = @_;
  261. my ($f, $path) = $self->_file($abspath);
  262. if (defined $path) {
  263. return -EIO() if !$f->can('fs_truncate');
  264. return $f->fs_truncate($path, $length);
  265. }
  266. return -EIO();
  267. }
  268. sub fs_utime {
  269. my ($self, $abspath, $atime, $mtime) = @_;
  270. my ($f, $path) = $self->_file($abspath);
  271. if (defined $path) {
  272. return -EIO() if !$f->can('fs_utime');
  273. return $f->fs_utime($path, $atime, $mtime);
  274. }
  275. return -EIO();
  276. }
  277. sub fs_write {
  278. my ($self, $abspath, $str, $offset) = @_;
  279. my ($f, $path) = $self->_file($abspath);
  280. if (defined $path) {
  281. return -EIO() if !$f->can('fs_write');
  282. return $f->fs_write($path, $str, $offset);
  283. }
  284. return -EIO();
  285. }
  286. sub fs_flush {
  287. my ($self, $abspath) = @_;
  288. my ($f, $path) = $self->_file($abspath);
  289. if (defined $path) {
  290. return -EIO() if !$f->can('fs_flush');
  291. return $f->fs_flush($path);
  292. }
  293. return 0;
  294. }
  295. sub fs_release {
  296. my ($self, $abspath, $flags) = @_;
  297. my ($f, $path) = $self->_file($abspath);
  298. if (defined $path) {
  299. return -EIO() if !$f->can('fs_release');
  300. return $f->fs_release($path, $flags);
  301. }
  302. return 0;
  303. }
  304. sub fs_fsync {
  305. my ($self, $abspath, $flags) = @_;
  306. my ($f, $path) = $self->_file($abspath);
  307. if (defined $path) {
  308. return -EIO() if !$f->can('fs_fsync');
  309. return $f->fs_fsync($path, $flags);
  310. }
  311. return 0;
  312. }
  313. sub fs_setxattr {
  314. my ($self, $abspath, $key, $value, $flags) = @_;
  315. my ($f, $path) = $self->_file($abspath);
  316. if (defined $path) {
  317. return -EIO() if !$f->can('fs_setxattr');
  318. return $f->fs_setxattr($path, $key, $value, $flags);
  319. }
  320. return -EIO();
  321. }
  322. sub fs_getxattr {
  323. my ($self, $abspath, $key) = @_;
  324. my ($f, $path) = $self->_file($abspath);
  325. if (defined $path) {
  326. return -EIO() if !$f->can('fs_getxattr');
  327. return $f->fs_getxattr($path, $key);
  328. }
  329. return 0;
  330. }
  331. sub fs_listxattr {
  332. my ($self, $abspath, $key) = @_;
  333. my ($f, $path) = $self->_file($abspath);
  334. if (defined $path) {
  335. return -EIO() if !$f->can('fs_listxattr');
  336. return $f->fs_listxattr($path, $key);
  337. }
  338. return 0;
  339. }
  340. sub fs_removexattr {
  341. my ($self, $abspath, $key) = @_;
  342. my ($f, $path) = $self->_file($abspath);
  343. if (defined $path) {
  344. return -EIO() if !$f->can('fs_removexattr');
  345. return $f->fs_removexattr($path, $key);
  346. }
  347. return -EIO();
  348. }
  349. # --------------------------------------------------
  350. sub _filesystems {
  351. my ($self) = @_;
  352. $self->{filesystems} ||= {};
  353. my %filesystems;
  354. # lookup fs object in PDF
  355. my $root = $self->{pdf}->getRootDict();
  356. if ($root->{$FS_ROOT_KEY}) {
  357. my $fs_holder = $root->{$FS_ROOT_KEY}->{value};
  358. for my $fs_name (keys %{$fs_holder}) {
  359. $filesystems{$fs_name} = $self->{filesystems}->{$fs_name} || Fuse::PDF::FS->new({
  360. pdf => $self->{pdf},
  361. pdf_mtime => $self->{pdf_mtime},
  362. fs_name => $fs_name,
  363. autosave_filename => $self->{autosave_filename},
  364. compact => $self->{compact},
  365. backup => $self->{backup},
  366. });
  367. }
  368. }
  369. $self->{filesystems} = \%filesystems;
  370. return {
  371. type => 'd',
  372. content => {
  373. %filesystems,
  374. },
  375. };
  376. }
  377. sub _page_content {
  378. my ($self, $i, $path) = @_;
  379. my $pagenum = $path->[$i - 1];
  380. return {
  381. type => 'f',
  382. content => $self->{pdf}->getPageContent($pagenum),
  383. };
  384. }
  385. sub _page_text {
  386. my ($self, $i, $path) = @_;
  387. my $pagenum = $path->[$i - 2];
  388. return {
  389. type => 'f',
  390. content => $self->{pdf}->getPageText($pagenum),
  391. };
  392. }
  393. sub _page_textfb {
  394. my ($self, $i, $path) = @_;
  395. my $pagenum = $path->[$i - 2];
  396. my $gs = $self->{pdf}->getPageContentTree($pagenum)->render('CAM::PDF::Renderer::Text');
  397. return {
  398. type => 'f',
  399. content => $gs->toString(),
  400. };
  401. }
  402. sub _page_font {
  403. my ($self, $i, $path) = @_;
  404. my $pagenum = $path->[$i - 2];
  405. my $fontname = $path->[$i];
  406. my $font = $self->{pdf}->getFont($pagenum, $fontname);
  407. my %meta = %{$font};
  408. my @keys = grep { $SCALARS{$meta{$_}->{type}} } keys %meta;
  409. return {
  410. type => 'd',
  411. content => {
  412. map { $_ => { type => 'f', content => $meta{$_}->{value} } } @keys,
  413. },
  414. };
  415. }
  416. sub _page_fonts {
  417. my ($self, $i, $path) = @_;
  418. my $pagenum = $path->[$i - 1];
  419. return {
  420. type => 'd',
  421. content => {
  422. map { $_ => \&_page_font } $self->{pdf}->getFontNames($pagenum),
  423. },
  424. };
  425. }
  426. sub _page_image {
  427. my ($self, $i, $path) = @_;
  428. my $pagenum = $path->[$i - 2];
  429. my ($imagenum) = $path->[$i] =~ m/\A(\d+)/xms;
  430. $self->{image_cache} ||= {};
  431. $self->{image_cache}->{$pagenum} ||= {};
  432. my $cache = $self->{image_cache}->{$pagenum}->{$imagenum} ||= {};
  433. my $now = time;
  434. if (!$cache->{timestamp} || $now - $cache->{timestamp} > $IMAGE_CACHE_TIMEOUT) {
  435. my $content_tree = $self->{pdf}->getPageContentTree($pagenum);
  436. my $gs = $content_tree->findImages();
  437. my $image_node = $gs->{images}->[$imagenum - 1];
  438. return if !$image_node;
  439. #use Data::Dumper; print STDERR Dumper($image_node);
  440. my $image;
  441. if ('Do' eq $image_node->{type}) {
  442. my $label = $image_node->{value}->[0];
  443. $image = $self->{pdf}->dereference(q{/} . $label, $pagenum);
  444. if ($image) {
  445. $image = $image->{value};
  446. }
  447. } elsif ('BI' eq $image_node->{type}) {
  448. $image = $image_node->{value}->[0];
  449. }
  450. return if !$image;
  451. #{
  452. # local $image->{value}->{StreamData}->{value}
  453. # = q{.} x length($image->{value}->{StreamData}->{value});
  454. # use Data::Dumper; print STDERR "image $imagenum\n", Dumper($image);
  455. #}
  456. my $w = $image->{value}->{Width} || $image->{value}->{W} || 0;
  457. if ($w) {
  458. $w = $self->{pdf}->getValue($w);
  459. }
  460. my $h = $image->{value}->{Height} || $image->{value}->{H} || 0;
  461. if ($h) {
  462. $h = $self->{pdf}->getValue($h);
  463. }
  464. my $tmpl = Fuse::PDF::ImageTemplate->get_template_pdf();
  465. my $media_array = $tmpl->getValue($tmpl->getPage(1)->{MediaBox});
  466. $media_array->[2]->{value} = $w;
  467. $media_array->[3]->{value} = $h; ## no critic(MagicNumber)
  468. my $page = $tmpl->getPageContent(1);
  469. $page =~ s/xxx/$w/igxms;
  470. $page =~ s/yyy/$h/igxms;
  471. $tmpl->setPageContent(1, $page);
  472. my $tmpl_im_objnum = $tmpl->dereference('/Im0', 1)->{objnum};
  473. if ($image->{objnum}) {
  474. $tmpl->replaceObject($tmpl_im_objnum, $self->{pdf}, $image->{objnum}, 1);
  475. } else {
  476. $tmpl->replaceObject($tmpl_im_objnum, undef, CAM::PDF::Node->new('object', $image), 1);
  477. }
  478. $tmpl->cleanse();
  479. $tmpl->cleansave(); # writes to RAM, not disk
  480. #my $image_bytes = $image->{value}->{StreamData}->{value};
  481. #my $image_bytes = $self->{pdf}->decodeOne($image);
  482. $cache->{timestamp} = $now;
  483. $cache->{content} = $tmpl->{content};
  484. }
  485. return {
  486. type => 'f',
  487. content => $cache->{content},
  488. };
  489. }
  490. sub _page_images {
  491. my ($self, $i, $path) = @_;
  492. my $pagenum = $path->[$i - 1];
  493. my $content_tree = $self->{pdf}->getPageContentTree($pagenum);
  494. my $gs = $content_tree->findImages();
  495. return {
  496. type => 'd',
  497. content => {
  498. map { ($_ . '.pdf') => \&_page_image } 1 .. @{$gs->{images}},
  499. },
  500. };
  501. }
  502. sub _page {
  503. my ($self, $i, $path) = @_;
  504. my $pagenum = $path->[$i];
  505. return {
  506. type => 'd',
  507. content => {
  508. 'layout.txt' => \&_page_content,
  509. 'fonts' => \&_page_fonts,
  510. 'images' => \&_page_images,
  511. 'text' => {
  512. type => 'd',
  513. content => {
  514. 'plain_text.txt' => \&_page_text,
  515. 'formatted_text.txt' => \&_page_textfb,
  516. },
  517. },
  518. },
  519. };
  520. }
  521. sub _pages {
  522. my ($self) = @_;
  523. return {
  524. type => 'd',
  525. content => {
  526. map { $_ => \&_page } 1 .. $self->{pdf}->numPages,
  527. },
  528. };
  529. }
  530. sub _revisions {
  531. my ($self) = @_;
  532. my @revisions = map { $_->{pdf}->{content} } $self->all_revisions;
  533. return {
  534. type => 'd',
  535. content => {
  536. map { @revisions - $_ => { type => 'f', content => $revisions[$_] } } 0 .. $#revisions,
  537. },
  538. };
  539. }
  540. sub _metadata {
  541. my ($self) = @_;
  542. my $trailer = $self->{pdf}->{trailer};
  543. my %meta;
  544. if ($trailer->{Info}) {
  545. %meta = (%{$self->{pdf}->getValue($trailer->{Info})}, %meta);
  546. }
  547. if ($trailer->{ID} && 'array' eq $trailer->{ID}->{type}) {
  548. $meta{ID} = CAM::PDF::Node->new('string', $self->{pdf}->writeAny($trailer->{ID}));
  549. }
  550. #print STDERR "@{[sort keys %meta]}\n";
  551. my @keys = grep { $SCALARS{$meta{$_}->{type}} } keys %meta;
  552. return {
  553. type => 'd',
  554. content => {
  555. map { $_ => { type => 'f', content => $meta{$_}->{value} } } @keys,
  556. },
  557. };
  558. }
  559. sub _root {
  560. my ($self) = @_;
  561. return {
  562. type => 'd',
  563. content => {
  564. metadata => \&_metadata,
  565. revisions => \&_revisions,
  566. pages => \&_pages,
  567. filesystems => \&_filesystems,
  568. },
  569. };
  570. }
  571. sub _file {
  572. my ($self, $path) = @_;
  573. my $nsymlinks = 0;
  574. my @dirs = ($self->_root);
  575. my @path = split m{/}xms, $path;
  576. for (my $i = 0; $i < @path; ++$i) { ##no critic(ProhibitCStyleForLoops)
  577. my $entry = $path[$i];
  578. next if q{} eq $entry;
  579. my $type = $dirs[-1]->{type};
  580. return ENOTDIR() if 'd' ne $type;
  581. next if q{.} eq $entry;
  582. if (q{..} eq $entry) {
  583. pop @dirs;
  584. return EACCESS() if !@dirs; # tried to get parent of root
  585. }
  586. my $next = $dirs[-1]->{content}->{$entry};
  587. return ENOENT() if !$next;
  588. if ('CODE' eq ref $next) {
  589. $next = $self->$next($i, \@path);
  590. }
  591. return ENOENT() if !$next;
  592. if ('HASH' ne ref $next) {
  593. my $rest_of_path = join q{/}, q{}, @path[$i+1 .. $#path];
  594. #print STDERR "passing on $rest_of_path to ".ref($next)."\n";
  595. return ($next, $rest_of_path);
  596. }
  597. my $f = $next;
  598. if ('l' eq $f->{type}) {
  599. if ($i != $#path) {
  600. return ELOOP() if ++$nsymlinks >= $ELOOP_LIMIT;
  601. my $linkpath = $f->{content};
  602. # cannot leave the filesystem; must be relative
  603. return EACCESS() if $linkpath =~ m{\A /}xms;
  604. splice @path, $i + 1, 0, split m{/}xms, $linkpath;
  605. }
  606. }
  607. push @dirs, $f;
  608. }
  609. return $dirs[-1] || ENOENT();
  610. }
  611. 1;
  612. __END__
  613. =pod
  614. =for stopwords pdf runtime EIO
  615. =head1 NAME
  616. Fuse::PDF::ContentFS - Represent actual PDF document properties as files
  617. =head1 SYNOPSIS
  618. use Fuse::PDF::ContentFS;
  619. my $fs = Fuse::PDF::ContentFS->new({pdf => CAM::PDF->new('my_doc.pdf')});
  620. $fs->fs_read('/');
  621. or
  622. % mount_pdf --all my_doc.pdf /Volumes/my_doc_pdf
  623. % cd /Volumes/my_doc_pdf
  624. % ls
  625. filesystems metadata pages revisions
  626. % ls metadata/
  627. CreationDate Creator ID ModDate Producer
  628. % cat metadata/Producer
  629. Adobe PDF library 5.00
  630. % ls pages
  631. 1
  632. % ls pages/1
  633. fonts images layout.txt text
  634. % ls pages/1/text
  635. formatted_text.txt plain_text.txt
  636. % cat pages/1/text/plain_text.txt
  637. F u s e : : P D F - E m b e d a f i l e s y s t e m i n a P D F d o c u
  638. m e n t
  639. C h r i s D o l a n < c d o l a n @ c p a n . o r g >
  640. T o g e t s o f t w a r e t h a t c a n i n t e r a c t w i t h t h i s
  641. f i l e s y s t e m , s e e
  642. h t t p : / / s e a r c h . c p a n . o r g / d i s t / F u s e - P D F /
  643. % cat pages/1/fonts/TT0/BaseFont
  644. HISDQN+Helvetica
  645. % ls pages/1/images/
  646. 1.pdf 2.pdf 3.pdf 4.pdf
  647. % open pages/1/images/1.pdf
  648. % cd /
  649. % umount /Volumes/my_doc_pdf
  650. =head1 LICENSE
  651. Copyright 2007-2008 Chris Dolan, I<cdolan@cpan.org>
  652. This library is free software; you can redistribute it and/or modify it
  653. under the same terms as Perl itself.
  654. =head1 DESCRIPTION
  655. This is a read-only filesystem that represents the metadata of a PDF document
  656. as a filesystem. The metadata that are available are the ones that I've
  657. explicitly coded for. Much more is possible.
  658. =head1 FILESYSTEM STRUCTURE
  659. /pages/<num> - one folder per page of the document; count from 1
  660. /pages/<num>/fonts/<ID> - one folder per referenced font, e.g. 'TT0'
  661. /pages/<num>/fonts/<ID>/Type - always 'Font'
  662. /pages/<num>/fonts/<ID>/Subtype - e.g. 'TrueType'
  663. /pages/<num>/fonts/<ID>/BaseFont - name of the font, e.g. 'Helvetica'
  664. /pages/<num>/fonts/<ID>/FirstChar - ordinal of the first available glyph
  665. /pages/<num>/fonts/<ID>/LastChar - ordinal of the last available glyph
  666. /pages/<num>/layout.txt - raw PDF markup for a page
  667. /pages/<num>/text/plain_text.txt - strings extracted from the page (rough!)
  668. /pages/<num>/text/formatted_text.txt - very rough text rendering of the page
  669. /pages/<num>/images/<num>.pdf - images used in the page, wrapped in a minimal PDF
  670. /metadata/ - one file for every metadata key/value in the root dict
  671. /metadata/ID - hexadecimal ID, hopefully unique
  672. /metadata/Author - usually the author's username; depends on authoring tool
  673. /metadata/Creator - name of generating application
  674. /metadata/Producer - name of generating application
  675. /metadata/CreationDate - e.g. D:20080104091746-06'00'
  676. /metadata/ModDate - date last modified (usually the same as the CreationDate)
  677. /filesystems/<name>/ - any embedded filesystems created by Fuse::PDF
  678. /revisions/<num> - look at older versions of annotated PDFs
  679. =head1 METHODS
  680. =over
  681. =item $pkg->new($hash_of_options)
  682. Create a new filesystem instance. The only
  683. required option is the C<pdf> key, like so:
  684. my $fs = Fuse::PDF::ContentFS->new({pdf => CAM::PDF->new('file.pdf')});
  685. All other options are currently unused, although they are passed to
  686. L<Fuse::PDF::FS> instances created for the F</filesystem> folder.
  687. =item $self->all_revisions()
  688. Return a list of one instance for each revision of the PDF. The first item on
  689. the list is this instance (the newest) and the last item on the list is the
  690. first revision of the PDF (the oldest). Unedited PDFs (the most common) will
  691. return just a one-element list.
  692. =item $self->previous_revision()
  693. If there is an older version of the PDF, extract that and return a new
  694. C<Fuse::PDF::ContentFS> instance which applies to that revision. Multiple
  695. versions is feature supported by the PDF specification, so this action
  696. is consistent with other PDF revision editing tools.
  697. If there are no previous revisions, this will return C<undef>.
  698. =item $self->statistics()
  699. Return a hashref with some global information about the filesystem.
  700. =item $self->to_string()
  701. Return a human-readable representation of the statistics for each
  702. revision of the filesystem.
  703. =back
  704. =head1 FUSE-COMPATIBLE METHODS
  705. The following methods are independent of L<Fuse>, but uses almost the
  706. exact same API expected by that package (except for fs_setxattr), so
  707. they can easily be converted to a FUSE implementation.
  708. =over
  709. =item $self->fs_getattr($file)
  710. =item $self->fs_readlink($file)
  711. =item $self->fs_getdir($file)
  712. =item $self->fs_mknod($file, $modes, $dev)
  713. =item $self->fs_mkdir($file, $perms)
  714. =item $self->fs_unlink($file)
  715. =item $self->fs_rmdir($file)
  716. =item $self->fs_symlink($link, $file)
  717. =item $self->fs_rename($oldfile, $file)
  718. =item $self->fs_link($srcfile, $file)
  719. =item $self->fs_chmod($file, $perms)
  720. =item $self->fs_chown($file, $uid, $gid)
  721. =item $self->fs_truncate($file, $length)
  722. =item $self->fs_utime($file, $atime, $utime)
  723. =item $self->fs_open($file, $mode)
  724. =item $self->fs_read($file, $size, $offset)
  725. =item $self->fs_write($file, $str, $offset)
  726. =item $self->fs_statfs()
  727. =item $self->fs_flush($file)
  728. =item $self->fs_release($file, $mode)
  729. =item $self->fs_fsync($file, $flags)
  730. =item $self->fs_setxattr($file, $key, $value, \%flags)
  731. =item $self->fs_getxattr($file, $key)
  732. =item $self->fs_listxattr($file)
  733. =item $self->fs_removexattr($file, $key)
  734. =back
  735. =head1 PASS-THROUGH METHODS
  736. These methods exist only to pass parameters through to L<Fuse::PDF::FS> via
  737. the F</filesystem/*> sub-filesystems. See the methods of the same name in
  738. that module.
  739. =over
  740. =item $self->autosave_filename()
  741. =item $self->autosave_filename($filename)
  742. =item $self->compact()
  743. =item $self->compact($boolean)
  744. =item $self->backup()
  745. =item $self->backup($boolean)
  746. =back
  747. =head1 SEE ALSO
  748. L<Fuse::PDF>
  749. L<CAM::PDF>
  750. =head1 AUTHOR
  751. Chris Dolan, I<cdolan@cpan.org>
  752. =cut
  753. # Local Variables:
  754. # mode: perl
  755. # perl-indent-level: 3
  756. # cperl-indent-level: 3
  757. # fill-column: 78
  758. # indent-tabs-mode: nil
  759. # c-indentation-style: bsd
  760. # End:
  761. # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab :