Text-PDF-0.31/0000755000175000017500000000000012757431131011402 5ustar bobhbobhText-PDF-0.31/t/0000755000175000017500000000000012757431131011645 5ustar bobhbobhText-PDF-0.31/t/hello.t0000644000175000017500000000336312752511217013141 0ustar bobhbobh#!/usr/bin/perl use strict; use Test::Simple tests => 7; use Text::PDF::File; use Text::PDF::Page; # pulls in Pages use Text::PDF::Utils; # not strictly needed use Text::PDF::SFont; my ($testpdf) = 't/temp.pdf'; my ($testdata) = 'BT 1 0 0 1 250 600 Tm /F0 14 Tf (Hello World!) Tj ET'; unlink($testpdf); ok(!(-f $testpdf), 'verify pdf does not pre-exist'); # Create a Hello world PDF my ($pdf, $root, $page, $font); $pdf = Text::PDF::File->new; # Make up a new document $root = Text::PDF::Pages->new($pdf); # Make a page tree in the document $root->proc_set("PDF", "Text"); # Say that all pages have PDF and Text instructions $root->bbox(0, 0, 595, 840); # hardwired page size A4 (for this app.) for all pages $page = Text::PDF::Page->new($pdf, $root); # Make a new page in the tree $font = Text::PDF::SFont->new($pdf, 'Helvetica', 'F0'); # Make a new font in the document $root->add_font($font); # Tell all pages about the font $page->add($testdata); # put some content on the page $pdf->out_file($testpdf); # output the document to a file $pdf->release; ok(-f $testpdf, "write temporary file $testpdf"); # Now try to read the PDF my ($file, $offset, $res, $str); $file = Text::PDF::File->open($testpdf); ok($file, 'open pdf'); $offset = $file->locate_obj(5, 0); ok($offset, 'find object'); seek($file->{' INFILE'}, $offset, 0); ($res, $str) = $file->readval(""); ok(defined($res->{' stream'}), 'got stream'); my ($data) = $res->read_stream(1)->{' stream'}; $file->release; $data =~ s/\s+$//; ok( $data eq $testdata, 'correct content'); # Finally make sure we can delete the PDF unlink($testpdf); ok(!(-f $testpdf), "delete temporary file $testpdf"); # all done! Text-PDF-0.31/MANIFEST0000644000175000017500000000165612757431132012544 0ustar bobhbobhChanges examples/boon_graph.pdf examples/call_conf.txt examples/CD.CFG examples/graph.pl examples/hello.pl examples/pdfaddobj.pl examples/pdfaddpg.pl examples/pdfcrop.pl examples/pdfgetobj.pl examples/pdflines.pl examples/squ.pdf examples/squares.pdf examples/test.pdf lib/Text/PDF.pm lib/Text/PDF/Array.pm lib/Text/PDF/Bool.pm lib/Text/PDF/Dict.pm lib/Text/PDF/File.pm lib/Text/PDF/Filter.pm lib/Text/PDF/Name.pm lib/Text/PDF/Null.pm lib/Text/PDF/Number.pm lib/Text/PDF/Objind.pm lib/Text/PDF/Page.pm lib/Text/PDF/Pages.pm lib/Text/PDF/SFont.pm lib/Text/PDF/String.pm lib/Text/PDF/TTFont.pm lib/Text/PDF/TTFont0.pm lib/Text/PDF/Utils.pm LICENSE Makefile.PL MANIFEST This list of files MANIFEST.SKIP readme.txt scripts/pdfbklt scripts/pdfrevert scripts/pdfstamp t/hello.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) Text-PDF-0.31/readme.txt0000755000175000017500000000464412752511217013412 0ustar bobhbobh Text::PDF There seem to be a growing plethora of Perl modules for creating and manipulating PDF files. This module is no exception. Beyond the standard features you would expect from a PDF manipulation module there are: FEATURES . Works with more than one PDF file open at once . Supports TrueType fonts as well as the base 14 (requires Font::TTF module) including Type0 glyph based fonts (for Unicode), and subsetting UN-FEATURES (which may one day be fixed) . No nice higher level interface for rendering and Page description insertion . No support for Type1 or Type3 fonts . No higher level support of annotations, bookmarks, hot-links, etc. . This is beta code although new features should be considered alpha In summary, this module provides a strong (IMO) base for working with PDF files but lacks some finesse. Users should know their way around the PDF specification. Included in the examples directory are some example programs starting from very simple PDF creation programs and working up. More extensive samples are included as scripts. SCRIPTS Installed with this package are the following scripts: pdfbklt Turns documents into booklets pdfrevert Removes one layer of edits from a PDF file pdfstamp Adds the given text in a given font, size to all pages at given location EXAMPLES Included in the examples directory are some smaller utilities which are also useful, so don't throw them away! graph Makes graph paper - not very complex hello The "Hello World" program pdfaddobj Debug aid to insert data as an object in a file pdfaddpg Adds a blank page to a PDF file at any location pdfcrop Adds crop marks to a page (see cd.cfg) pdfgetobj Extracts a particular object from a PDF file (debugging aid) REQUIREMENTS This module set requires Compress::Zlib. It is used for compressed streams and within the Standard Fonts. INSTALLATION If you want to have TrueType support in your application, then you will need to install the Font::TTF module (available from CPAN) as well. Installation is as per the standard module installation approach: perl Makefile.PL make make test make install If working on Win32 platform, then try: perl Makefile.PL dmake dmake test dmake install Your mileage may vary CONTACT Bugs, comments and offers of collaboration to: Martin_Hosken@sil.org Text-PDF-0.31/lib/0000755000175000017500000000000012757431131012150 5ustar bobhbobhText-PDF-0.31/lib/Text/0000755000175000017500000000000012757431131013074 5ustar bobhbobhText-PDF-0.31/lib/Text/PDF/0000755000175000017500000000000012757431131013505 5ustar bobhbobhText-PDF-0.31/lib/Text/PDF/Filter.pm0000755000175000017500000002372312754625334015311 0ustar bobhbobhpackage Text::PDF::Filter; =head1 NAME PDF::Filter - Abstract superclass for PDF stream filters =head1 SYNOPSIS $f = Text::PDF::Filter->new; $str = $f->outfilt($str, 1); print OUTFILE $str; while (read(INFILE, $dat, 4096)) { $store .= $f->infilt($dat, 0); } $store .= $f->infilt("", 1); =head1 DESCRIPTION A Filter object contains state information for the process of outputting and inputting data through the filter. The precise state information stored is up to the particular filter and may range from nothing to whole objects created and destroyed. Each filter stores different state information for input and output and thus may handle one input filtering process and one output filtering process at the same time. =head1 METHODS =head2 Text::PDF::Filter->new Creates a new filter object with empty state information ready for processing data both input and output. =head2 $dat = $f->infilt($str, $isend) Filters from output to input the data. Notice that $isend == 0 implies that there is more data to come and so following it $f may contain state information (usually due to the break-off point of $str not being tidy). Subsequent calls will incorporate this stored state information. $isend == 1 implies that there is no more data to follow. The final state of $f will be that the state information is empty. Error messages are most likely to occur here since if there is required state information to be stored following this data, then that would imply an error in the data. =head2 $str = $f->outfilt($dat, $isend) Filter stored data ready for output. Parallels C. =cut sub new { my ($class) = @_; my ($self) = {}; bless $self, $class; } sub release { my ($self) = @_; # delete stuff that we know we can, here my @tofree = map { delete $self->{$_} } keys %{$self}; while (my $item = shift @tofree) { my $ref = ref($item); if (UNIVERSAL::can($item, 'release')) { $item->release(); } elsif ($ref eq 'ARRAY') { push( @tofree, @{$item} ); } elsif (UNIVERSAL::isa($ref, 'HASH')) { release($item); } } # check that everything has gone - it better had! foreach my $key (keys %{$self}) { warn ref($self) . " still has '$key' key left after release.\n"; } } package Text::PDF::ASCII85Decode; use strict; use vars qw(@ISA); @ISA = qw(Text::PDF::Filter); # no warnings qw(uninitialized); =head1 NAME Text::PDF::ASCII85Decode - Ascii85 filter for PDF streams. Inherits from L =cut sub outfilt { my ($self, $str, $isend) = @_; my ($res, $i, $j, $b, @c); if ($self->{'outcache'} ne "") { $str = $self->{'outcache'} . $str; $self->{'outcache'} = ""; } for ($i = 0; $i < length($str); $i += 4) { $b = unpack("N", substr($str, $i, 4)); if ($b == 0) { $res .= "z"; next; } for ($j = 3; $j >= 0; $j--) { $c[$j] = $b - int($b / 85) * 85 + 33; $b /= 85; } $res .= pack("C5", $b + 33, @c); $res .= "\n" if ($i % 60 == 56); } if ($isend && $i > length($str)) { $b = unpack("N", substr($str, $i - 4) . "\000\000\000"); for ($j = 0; $j < 4; $j++) { $c[$j] = $b - int($b / 85) * 85 + 33; $b /= 85; } $res .= substr(pack("C5", @c, $b), 0, $i - length($str) + 1) . "->"; } elsif ($i > length($str)) { $self->{'outcache'} = substr($str, $i - 4); } $res; } sub infilt { my ($self, $str, $isend) = @_; my ($res, $i, $j, @c, $b, $num); if ($self->{'incache'} ne "") { $str = $self->{'incache'} . $str; $self->{'incache'} = ""; } $str =~ s/(\r|\n)\n?//og; for ($i = 0; $i < length($str); $i += 5) { $b = 0; if (substr($str, $i, 1) eq "z") { $i -= 4; $res .= pack("N", 0); next; } elsif ($isend && substr($str, $i, 6) =~ m/^(.{2,4})\~\>$/o) { $num = 5 - length($1); @c = unpack("C5", $1 . ("u" x (4 - $num))); # pad with 84 to sort out rounding $i = length($str); } else { @c = unpack("C5", substr($str, $i, 5)); } for ($j = 0; $j < 5; $j++) { $b *= 85; $b += $c[$j] - 33; } $res .= substr(pack("N", $b), 0, 4 - $num); } if (!$isend && $i > length($str)) { $self->{'incache'} = substr($str, $i - 5); } $res; } package Text::PDF::RunLengthDecode; use strict; use vars qw(@ISA); @ISA = qw(Text::PDF::Filter); # no warnings qw(uninitialized); =head1 NAME Text::PDF::RunLengthDecode - Run Length encoding filter for PDF streams. Inherits from L =cut sub outfilt { my ($self, $str, $isend) = @_; my ($res, $s, $r); # no state information, just slight inefficiency at block boundaries while ($str ne "") { if ($str =~ m/^(.*?)((.)\2{2,127})(.*?)$/so) { $s = $1; $r = $2; $str = $3; } else { $s = $str; $r = ''; $str = ''; } while (length($s) > 127) { $res .= pack("C", 127) . substr($s, 0, 127); substr($s, 0, 127) = ''; } $res .= pack("C", length($s)) . $s if length($s) > 0; $res .= pack("C", 257 - length($r)); } $res .= "\x80" if ($isend); $res; } sub infilt { my ($self, $str, $isend) = @_; my ($res, $l, $d); if ($self->{'incache'} ne "") { $str = $self->{'incache'} . $str; $self->{'incache'} = ""; } while ($str ne "") { $l = unpack("C", $str); if ($l == 128) { $isend = 1; return $res; } if ($l > 128) { if (length($str) < 2) { warn "Premature end to data in RunLengthEncoded data" if $isend; $self->{'incache'} = $str; return $res; } $res .= substr($str, 1, 1) x (257 - $l); substr($str, 0, 2) = ""; } else { if (length($str) < $l + 1) { warn "Premature end to data in RunLengthEncoded data" if $isend; $self->{'incache'} = $str; return $res; } $res .= substr($str, 1, $l); substr($str, 0, $l + 1) = ""; } } $res; } package Text::PDF::ASCIIHexDecode; use strict; use vars qw(@ISA); @ISA = qw(Text::PDF::Filter); # no warnings qw(uninitialized); =head1 NAME Text::PDF::ASCIIHexDecode - Ascii Hex encoding (very inefficient) for PDF streams. Inherits from L =cut sub outfilt { my ($self, $str, $isend) = @_; $str =~ s/(.)/sprintf("%02x", ord($1))/oge; $str .= ">" if $isend; $str; } sub infilt { my ($self, $str, $isend) = @_; $isend = ($str =~ s/>$//og); $str =~ s/\s//oig; $str =~ s/([0-9a-z])/pack("C", hex($1 . "0"))/oige if ($isend && length($str) & 1); $str =~ s/([0-9a-z]{2})/pack("C", hex($1))/oige; $str; } package Text::PDF::FlateDecode; use strict; use vars qw(@ISA $havezlib); @ISA = qw(Text::PDF::Filter); BEGIN { eval {require "Compress/Zlib.pm";}; $havezlib = !$@; } sub new { return undef unless $havezlib; my ($class) = @_; my ($self) = {}; $self->{'outfilt'} = Compress::Zlib::deflateInit(); $self->{'infilt'} = Compress::Zlib::inflateInit(); bless $self, $class; } sub outfilt { my ($self, $str, $isend) = @_; my ($res); $res = $self->{'outfilt'}->deflate($str); $res .= $self->{'outfilt'}->flush() if ($isend); $res; } sub infilt { my ($self, $dat, $last) = @_; my ($res, $status) = $self->{'infilt'}->inflate("$dat"); $res; } package Text::PDF::LZWDecode; use vars qw(@ISA @basedict); @ISA = qw(Text::PDF::FlateDecode); @basedict = map {pack("C", $_)} (0 .. 255, 0, 0); # no warnings qw(uninitialized); sub new { my ($class) = @_; my ($self) = {}; $self->{'indict'} = [@basedict]; $self->{'count'} = 258; $self->{'insize'} = 9; $self->{'cache'} = 0; $self->{'cache_size'} = 0; # $self->{'outfilt'} = Compress::Zlib::deflateInit(); # patent precludes LZW encoding bless $self, $class; } sub infilt { my ($self, $dat, $last) = @_; my ($num, $res); $res = ''; while ($dat ne '' || $self->{'cache_size'} >= $self->{'insize'}) { $num = $self->read_dat(\$dat); last if $num < 0; return $res if ($num == 257); # End of Data if ($num == 256) # Clear table { $self->{'indict'} = [@basedict]; $self->{'insize'} = 9; $self->{'count'} = 258; next; } if ($self->{'count'} > 258) { ($self->{'indict'}[$self->{'count'}-1]) .= substr($self->{'indict'}[$num], 0, 1); } if ($self->{'count'} < 4096) { $self->{'indict'}[$self->{'count'}] = $self->{'indict'}[$num]; $self->{'count'}++; } $res .= $self->{'indict'}[$num]; if ($self->{'count'} >= 4096) { # don't do anything on table full, the encoder tells us when to clear } elsif ($self->{'count'} == 512) { $self->{'insize'} = 10; } elsif ($self->{'count'} == 1024) { $self->{'insize'} = 11; } elsif ($self->{'count'} == 2048) { $self->{'insize'} = 12; } } return $res; } sub read_dat { my ($self, $rdat) = @_; my ($res); while ($self->{'cache_size'} < $self->{'insize'}) { return -1 if $$rdat eq ''; # oops -- not enough data in this chunk $self->{'cache'} = ($self->{'cache'} << 8) + unpack("C", $$rdat); substr($$rdat, 0, 1) = ''; $self->{'cache_size'} += 8; } $res = $self->{'cache'} >> ($self->{'cache_size'} - $self->{'insize'}); $self->{'cache'} &= (1 << ($self->{'cache_size'} - $self->{'insize'})) - 1; $self->{'cache_size'} -= $self->{'insize'}; return $res; } 1; Text-PDF-0.31/lib/Text/PDF/Pages.pm0000755000175000017500000002374712750671061015123 0ustar bobhbobhpackage Text::PDF::Pages; use strict; use vars qw(@ISA %inst); @ISA = qw(Text::PDF::Dict); # no warnings qw(uninitialized); use Text::PDF::Dict; use Text::PDF::Utils; %inst = map {$_ => 1} qw(Parent Type); =head1 NAME Text::PDF::Pages - a PDF pages hierarchical element. Inherits from L =head1 DESCRIPTION A Pages object is the parent to other pages objects or to page objects themselves. =head1 METHODS =head2 Text::PDF::Pages->new($pdfs,$parent) This creates a new Pages object. Notice that $parent here is not the file context for the object but the parent pages object for this pages. If we are using this class to create a root node, then $parent should point to the file context, which is identified by not having a Type of Pages. $pdfs is the file object (or objects) in which to create the new Pages object. =cut sub new { my ($class, $pdfs, $parent) = @_; my ($self); $class = ref $class if ref $class; $self = $class->SUPER::new($pdfs, $parent); $self->{'Type'} = PDFName("Pages"); $self->{'Parent'} = $parent if defined $parent; $self->{'Count'} = PDFNum(0); $self->{'Kids'} = Text::PDF::Array->new; $self->{' outto'} = ref $pdfs eq 'ARRAY' ? $pdfs : [$pdfs]; $self->out_obj(1); $self; } sub init { my ($self, $pdf) = @_; $self->{' outto'} = [$pdf]; $self; } =head2 $p->out_obj($isnew) Tells all the files that this thing is destined for that they should output this object come time to output. If this object has no parent, then it must be the root. So set as the root for the files in question and tell it to be output too. If $isnew is set, then call new_obj rather than out_obj to create as a new object in the file. =cut sub out_obj { my ($self, $isnew) = @_; foreach (@{$self->{' outto'}}) { if ($isnew) { $_->new_obj($self); } else { $_->out_obj($self); } unless (defined $self->{'Parent'}) { $_->{'Root'}{'Pages'} = $self; $_->out_obj($_->{'Root'}); } } $self; } =head2 $p->find_page($pnum) Returns the given page, using the page count values in the pages tree. Pages start at 0. =cut sub find_page { my ($self, $pnum) = @_; my ($top) = $self->get_top; $top->find_page_recurse(\$pnum); } sub find_page_recurse { my ($self, $rpnum) = @_; my ($res, $k); if ($self->{'Count'}->realise->val <= $$rpnum) { $$rpnum -= $self->{'Count'}->val; return undef; } foreach $k ($self->{'Kids'}->realise->elementsof) { if ($k->{'Type'}->realise->val eq 'Page') { return $k if ($$rpnum == 0); $$rpnum--; } elsif ($res = $k->realise->find_page_recurse($rpnum)) { return $res; } } return undef; } =head2 $p->add_page($page, $pnum) Inserts the page before the given $pnum. $pnum can be -ve to count from the END of the document. -1 is after the last page. Likewise $pnum can be greater than the number of pages currently in the document, to append. This method only guarantees to provide a reasonable pages tree if pages are appended or prepended to the document. Pages inserted in the middle of the document may simply be inserted in the appropriate leaf in the pages tree without adding any new branches or leaves. To tidy up such a mess, it is best to call $p->rebuild_tree to rebuild the pages tree into something efficient. =cut sub add_page { my ($self, $page, $pnum) = @_; my ($top) = $self->get_top; my ($ppage, $ppages, $pindex, $ppnum); $pnum = -1 unless (defined $pnum && $pnum <= $top->{'Count'}->val); if ($pnum == -1) { $ppage = $top->find_page($top->{'Count'}->val - 1); } else { $pnum = $top->{'Count'}->val + $pnum + 1 if ($pnum < 0); $ppage = $top->find_page($pnum); } if (defined $ppage->{'Parent'}) { $ppages = $ppage->{'Parent'}->realise; } else { $ppages = $self; } $ppnum = scalar $ppages->{'Kids'}->realise->elementsof; if ($pnum == -1) { $pindex = -1; } else { for ($pindex = 0; $pindex < $ppnum; $pindex++) { last if ($ppages->{'Kids'}{' val'}[$pindex] eq $ppage); } $pindex = -1 if ($pindex == $ppnum); } $ppages->add_page_recurse($page->realise, $pindex); for ($ppages = $page->{'Parent'}; defined $ppages->{'Parent'}; $ppages = $ppages->{'Parent'}->realise) { $ppages->out_obj->{'Count'}->realise->{'val'}++; } $ppages->out_obj->{'Count'}->realise->{'val'}++; $page; } sub add_page_recurse { my ($self, $page, $index) = @_; my ($newpages, $ppages, $pindex, $ppnum); if (scalar $self->{'Kids'}->elementsof >= 8 && $self->{'Parent'} && $index < 1) { $ppages = $self->{'Parent'}->realise; $newpages = $self->new($self->{' outto'}, $ppages); if ($ppages) { $ppnum = scalar $ppages->{'Kids'}->realise->elementsof; for ($pindex = 0; $pindex < $ppnum; $pindex++) { last if ($ppages->{'Kids'}{' val'}[$pindex] eq $self); } $pindex = -1 if ($pindex == $ppnum); $ppages->add_page_recurse($newpages, $pindex); } } else { $newpages = $self->out_obj; } if ($index < 0) { push (@{$newpages->{'Kids'}->realise->{' val'}}, $page); } else { splice (@{$newpages->{'Kids'}{' val'}}, $index, 0, $page); } $page->{'Parent'} = $newpages; } =head2 $root_pages = $p->rebuild_tree([@pglist]) Rebuilds the pages tree to make a nice balanced tree that conforms to Adobe recommendations. If passed a pglist then the tree is built for that list of pages. No check is made of whether the pglist contains pages. Returns the top of the tree for insertion in the root object. =cut sub rebuild_tree { my ($self, @pglist) = @_; } =head2 @pglist = $p->get_pages Returns a list of page objects in the document in page order =cut sub get_pages { my ($self) = @_; return $self->get_top->get_kids; } # only call this on the top level or anything you want pages below sub get_kids { my ($self) = @_; my ($pgref, @pglist); foreach $pgref ($self->{'Kids'}->elementsof) { $pgref->realise; if ($pgref->{'Type'}->val =~ m/^Pages$/oi) { push (@pglist, $pgref->get_kids()); } else { push (@pglist, $pgref); } } @pglist; } =head2 $p->find_prop($key) Searches up through the inheritance tree to find a property. =cut sub find_prop { my ($self, $prop) = @_; if (defined $self->{$prop}) { if (ref $self->{$prop} && $self->{$prop}->isa("Text::PDF::Objind")) { return $self->{$prop}->realise; } else { return $self->{$prop}; } } elsif (defined $self->{'Parent'}) { return $self->{'Parent'}->find_prop($prop); } } # defined $_[0]->{$_[1]} && $_[0]->{$_[1]}->realised or # defined $_[0]->{'Parent'} && $_[0]->{'Parent'}->find_prop($_[1]); } =head2 $p->add_font($pdf, $font) Creates or edits the resource dictionary at this level in the hierarchy. If the font is already supported even through the hierarchy, then it is not added. =cut sub add_font { my ($self, $font, $pdf) = @_; my ($name) = $font->{'Name'}->val; my ($dict) = $self->find_prop('Resources'); my ($rdict); return $self if ($dict ne "" && defined $dict->{'Font'} && defined $dict->{'Font'}{$name}); unless (defined $self->{'Resources'}) { $dict = $dict ne "" ? $dict->copy($pdf) : PDFDict(); $self->{'Resources'} = $dict; } else { $dict = $self->{'Resources'}; } $dict->{'Font'} = PDFDict() unless defined $self->{'Resources'}{'Font'}; $rdict = $dict->{'Font'}->val; $rdict->{$name} = $font unless ($rdict->{$name}); if (ref $dict ne 'HASH' && $dict->is_obj($pdf)) { $pdf->out_obj($dict); } if (ref $rdict ne 'HASH' && $rdict->is_obj($pdf)) { $pdf->out_obj($rdict); } $self; } =head2 $p->bbox($xmin, $ymin, $xmax, $ymax, [$param]) Specifies the bounding box for this and all child pages. If the values are identical to those inherited then no change is made. $param specifies the attribute name so that other 'bounding box'es can be set with this method. =cut sub bbox { my ($self, @bbox) = @_; my ($str) = $bbox[4] || 'MediaBox'; my ($inh) = $self->find_prop($str); my ($test, $i, $e); if ($inh ne "") { $test = 1; $i = 0; foreach $e ($inh->elementsof) { $test &&= $e->val == $bbox[$i++]; } return $self if $test && $i == 4; } $inh = Text::PDF::Array->new; foreach $e (@bbox[0..3]) { $inh->add_elements(PDFNum($e)); } $self->{$str} = $inh; $self; } =head2 $p->proc_set(@entries) Ensures that the current resource contains all the entries in the proc_sets listed. If necessary it creates a local resource dictionary to achieve this. =cut sub proc_set { my ($self, @entries) = @_; my (@temp) = @entries; my ($dict, $e); $dict = $self->find_prop('Resource'); if ($dict ne "" && defined $dict->{'ProcSet'}) { foreach $e ($dict->{'ProcSet'}->elementsof) { @temp = grep($_ ne $e, @temp); } return $self if (scalar @temp == 0); @entries = @temp if defined $self->{'Resources'}; } unless (defined $self->{'Resources'}) { $self->{'Resources'} = $dict ne "" ? $dict->copy : PDFDict(); } $self->{'Resources'}{'ProcSet'} = PDFArray() unless defined $self->{'ProcSet'}; foreach $e (@entries) { $self->{'Resources'}{'ProcSet'}->add_elements(PDFName($e)); } $self; } sub empty { my ($self) = @_; my ($parent) = $self->{'Parent'} if defined ($self->{'Parent'}); $self->SUPER::empty; $self->{'Parent'} = $parent if defined $parent; $self; } sub dont_copy { return $inst{$_[1]} || $_[0]->SUPER::dont_copy($_[1]); } =head2 $p->get_top Returns the top of the pages tree =cut sub get_top { my ($self) = @_; my ($p); for ($p = $self; defined $p->{'Parent'}; $p = $p->{'Parent'}) { } $p->realise; } 1; Text-PDF-0.31/lib/Text/PDF/File.pm0000755000175000017500000011160712754625334014742 0ustar bobhbobhpackage Text::PDF::File; =head1 NAME Text::PDF::File - Holds the trailers and cross-reference tables for a PDF file =head1 SYNOPSIS $p = Text::PDF::File->open("filename.pdf", 1); $p->new_obj($obj_ref); $p->free_obj($obj_ref); $p->append_file; $p->close_file; $p->release; # IMPORTANT! =head1 DESCRIPTION This class keeps track of the directory aspects of a PDF file. There are two parts to the directory: the main directory object which is the parent to all other objects and a chain of cross-reference tables and corresponding trailer dictionaries starting with the main directory object. =head1 INSTANCE VARIABLES Within this class hierarchy, rather than making everything visible via methods, which would be a lot of work, there are various instance variables which are accessible via associative array referencing. To distinguish instance variables from content variables (which may come from the PDF content itself), each such variable will start with a space. Variables which do not start with a space directly reflect elements in a PDF dictionary. In the case of a Text::PDF::File, the elements reflect those in the trailer dictionary. Since some variables are not designed for class users to access, variables are marked in the documentation with (R) to indicate that such an entry should only be used as read-only information. (P) indicates that the information is private and not designed for user use at all, but is included in the documentation for completeness and to ensure that nobody else tries to use it. =over =item newroot This variable allows the user to create a new root entry to occur in the trailer dictionary which is output when the file is written or appended. If you wish to over-ride the root element in the dictionary you have, use this entry to indicate that without losing the current Root entry. Notice that newroot should point to a PDF level object and not just to a dictionary which does not have object status. =item INFILE (R) Contains the filehandle used to read this information into this PDF directory. Is an IO object. =item fname (R) This is the filename which is reflected by INFILE, or the original IO object passed in. =item update (R) This indicates that the read file has been opened for update and that at some point, $p->appendfile() can be called to update the file with the changes that have been made to the memory representation. =item maxobj (R) Contains the first useable object number above any that have already appeared in the file so far. =item outlist (P) This is a list of Objind which are to be output when the next appendfile or outfile occurs. =item firstfree (P) Contains the first free object in the free object list. Free objects are removed from the front of the list and added to the end. =item lastfree (P) Contains the last free object in the free list. It may be the same as the firstfree if there is only one free object. =item objcache (P) All objects are held in the cache to ensure that a system only has one occurrence of each object. In effect, the objind class acts as a container type class to hold the PDF object structure and it would be unfortunate if there were two identical place-holders floating around a system. =item epos (P) The end location of the read-file. =back Each trailer dictionary contains a number of private instance variables which hold the chain together. =over =item loc (P) Contains the location of the start of the cross-reference table preceding the trailer. =item xref (P) Contains an anonymous array of each cross-reference table entry. =item prev (P) A reference to the previous table. Note this differs from the Prev entry which is in PDF which contains the location of the previous cross-reference table. =back =head1 METHODS =cut use strict; no strict "refs"; use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types $VERSION); # no warnings qw(uninitialized); use IO::File; # Now for the basic PDF types use Text::PDF::Utils; use Text::PDF::Array; use Text::PDF::Bool; use Text::PDF::Dict; use Text::PDF::Name; use Text::PDF::Number; use Text::PDF::Objind; use Text::PDF::String; use Text::PDF::Page; use Text::PDF::Pages; use Text::PDF::Null; # VERSION now taken from Text::PDF.pm #$VERSION = "0.27"; # MJPH 15-MAY-2006 Fix minor bug in Pages.pm #$VERSION = "0.26"; # MJPH 19-MAY-2005 Get a release out! #$VERSION = "0.25"; # MJPH 20-JAN-2003 fix realised in read_obj x y R, fix Text::PDF::Pages::add_page #$VERSION = "0.24"; # MJPH 28-AUG-2002 out_obj may call new_obj #$VERSION = "0.23"; # MJPH 14-AUG-2002 Fix MANIFEST #$VERSION = "0.22"; # MJPH 26-JUL-2002 Add Text::PDF::File::copy, tidy up update(), sort out out_trailer # Fix to not remove string final CRs when reading dictionaries #$VERSION = "0.21"; # GJ 8-JUN-2002 Tidy up regexps, add Text::PDF::Null #$VERSION = "0.20"; # MJPH 27-APR-2002 $trailer->{'Size'} becomes max num objects, fix line end problem, # remove warnings, update release code #$VERSION = "0.19"; # MJPH 5-FEB-2002 fix hex keys and ASCII85 filter #$VERSION = "0.18"; # MJPH 1-DEC-2001 add encryption hooks #$VERSION = "0.17"; # GST 18-JUL-2001 Handle \) in strings and tidy up endobj handling, no uninitialized warnings #$VERSION = "0.16"; # GST 18-JUL-2001 Major performance tweaks #$VERSION = "0.15"; # GST 30-MAY-2001 Memory leaks fixed #$VERSION = "0.14"; # MJPH 2-MAY-2001 More little bug fixes, added read_objnum #$VERSION = "0.13"; # MJPH 23-MAR-2001 General bug fix release #$VERSION = "0.12"; # MJPH 29-JUL-2000 Add font subsetting, random page insertion #$VERSION = "0.11"; # MJPH 18-JUL-2000 Add pdfstamp.plx and more debugging #$VERSION = "0.10"; # MJPH 27-JUN-2000 Tidy up some bugs - names #$VERSION = "0.09"; # MJPH 31-MAR-2000 Copy trailer dictionary properly #$VERSION = "0.08"; # MJPH 07-FEB-2000 Add null element #$VERSION = "0.07"; # MJPH 01-DEC-1999 Debug for pdfbklt #$VERSION = "0.06"; # MJPH 11-SEP-1999 Sort out unixisms #$VERSION = "0.05"; # MJPH 9-SEP-1999 Add ship_out #$VERSION = "0.04"; # MJPH 14-JUL-1999 Correct paths for tarball release #$VERSION = "0.03"; # MJPH 14-JUL-1999 Correct paths for tarball release #$VERSION = "0.02"; # MJPH 30-JUN-1999 Transfer from old library BEGIN { my ($t, $type); $ws_char = '[ \t\r\n\f\0]'; $delim_char = '[][<>{}()/%]'; $reg_char = '[^][<>{}()/% \t\r\n\f\0]'; $irreg_char = '[][<>{}()/% \t\r\n\f\0]'; $cr = "$ws_char*(?:\015|\012|(?:\015\012))"; %types = ( 'Page' => 'Text::PDF::Page', 'Pages' => 'Text::PDF::Pages' ); foreach $type (keys %types) { $t = $types{$type}; $t =~ s|::|/|og; require "$t.pm"; } } =head2 Text::PDF::File->new Creates a new, empty file object which can act as the host to other PDF objects. Since there is no file associated with this object, it is assumed that the object is created in readiness for creating a new PDF file. =cut sub new { my ($class, $root) = @_; my ($self) = $class->_new; unless ($root) { $root = PDFDict(); $root->{'Type'} = PDFName("Catalog"); } $self->new_obj($root); $self->{'Root'} = $root; $self; } =head2 $p = Text::PDF::File->open($filename, $update) Opens the file and reads all the trailers and cross reference tables to build a complete directory of objects. $update specifies whether this file is being opened for updating and editing, or simply to be read. $filename may be an IO object =cut sub open { my ($class, $fname, $update) = @_; my ($self, $buf, $xpos, $end, $tdict, $k); my ($fh); $self = $class->_new; if (ref $fname) { $self->{' INFILE'} = $fname; $fh = $fname; } else { $fh = IO::File->new(($update ? "+" : "") . "<$fname") || return undef; $self->{' INFILE'} = $fh; } binmode $fh; if ($update) { $self->{' update'} = 1; $self->{' OUTFILE'} = $fh; $self->{' fname'} = $fname; } $fh->read($buf, 255); if ($buf !~ m/^\%pdf\-1\.(\d)\s*$cr/moi) { die "$fname not a PDF file version 1.x"; } else { $self->{' Version'} = $1; } $fh->seek(0, 2); # go to end of file $end = $fh->tell(); $self->{' epos'} = $end; if (!$fh->seek(($end > 1024 ? $end - 1024 : 0, 0))) { die "Seek failed when reading PDF file $fname"; } $fh->read($buf, 1024); if ($buf !~ m/startxref$cr([0-9]+)$cr\%\%eof.*?$/oi) { die "Malformed PDF file $fname"; } $xpos = $1; $tdict = $self->readxrtr($xpos, $self); foreach $k (keys %{$tdict}) { $self->{$k} = $tdict->{$k}; } return $self; } =head2 $p->release() Releases ALL of the memory used by the PDF document and all of its component objects. After calling this method, do B expect to have anything left in the C object (so if you need to save, be sure to do it before calling this method). B, that it is important that you call this method on any C object when you wish to destruct it and free up its memory. Internally, PDF files have an enormous number of cross-references and this causes circular references within the internal data structures. Calling 'C' forces a brute-force cleanup of the data structures, freeing up all of the memory. Once you've called this method, though, don't expect to be able to do anything else with the C object; it'll have B internal state whatsoever. B As part of the brute-force cleanup done here, this method will throw a warning message whenever unexpected key values are found within the C object. This is done to help ensure that any unexpected and unfreed values are brought to your attention so that you can bug us to keep the module updated properly; otherwise the potential for memory leaks due to dangling circular references will exist. =cut sub release { my ($self, $force) = @_; my (@tofree); # first, close the input file if it is still open close($self->{' INFILE'}); # delete stuff that we know we can, here if ($force) { foreach my $key (keys %{$self}) { push(@tofree,$self->{$key}); $self->{$key}=undef; delete($self->{$key}); } } else { @tofree = map { delete $self->{$_} } keys %{$self}; } while (my $item = shift @tofree) { my $ref = ref($item); if (UNIVERSAL::can($item, 'release')) { $item->release($force); } elsif ($ref eq 'ARRAY') { push( @tofree, @{$item} ); } elsif (UNIVERSAL::isa($ref, 'HASH')) { release($item, $force); } } # check that everything has gone - it better had! foreach my $key (keys %{$self}) { warn ref($self) . " still has '$key' key left after release.\n"; } } =head2 $p->append_file() Appends the objects for output to the read file and then appends the appropriate tale. =cut sub append_file { my ($self) = @_; my ($tdict, $fh, $t); return undef unless ($self->{' update'}); $fh = $self->{' INFILE'}; if ($self->{' version'} > $self->{' Version'}) { $fh->seek(0,0); $fh->print("%PDF-1.$self->{' version'}\n"); } $tdict = PDFDict(); $tdict->{'Prev'} = PDFNum($self->{' loc'}); $tdict->{'Info'} = $self->{'Info'}; if (defined $self->{' newroot'}) { $tdict->{'Root'} = $self->{' newroot'}; } else { $tdict->{'Root'} = $self->{'Root'}; } $tdict->{'Size'} = $self->{'Size'}; # added v0.09 foreach $t (grep ($_ !~ m/^[\s\-]/o, keys %$self)) { $tdict->{$t} = $self->{$t} unless defined $tdict->{$t}; } $fh->seek($self->{' epos'}, 0); $self->out_trailer($tdict, $self->{' update'}); close($self->{' OUTFILE'}); } =head2 $p->out_file($fname) Writes a PDF file to a file of the given filename based on the current list of objects to be output. It creates the trailer dictionary based on information in $self. $fname may be an IO object; =cut sub out_file { my ($self, $fname) = @_; $self->create_file($fname); $self->close_file; $self; } =head2 $p->create_file($fname) Creates a new output file (no check is made of an existing open file) of the given filename or IO object. Note, make sure that $p->{' version'} is set correctly before calling this function. =cut sub create_file { my ($self, $fname) = @_; my ($fh); $self->{' fname'} = $fname; if (ref $fname) { $fh = $fname; } else { $fh = IO::File->new(">$fname") || die "Unable to open $fname for writing"; binmode $fh; } $self->{' OUTFILE'} = $fh; $fh->print('%PDF-1.' . ($self->{' version'} || '2') . "\n"); $fh->print("%쏢\n"); # and some binary stuff in a comment $self; } =head2 $p->close_file Closes up the open file for output by outputting the trailer etc. =cut sub close_file { my ($self) = @_; my ($fh, $tdict, $t); $tdict = PDFDict(); $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'}; $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne "") ? $self->{' newroot'} : $self->{'Root'}; # remove all freed objects from the outlist, AND the outlist_cache if not updating # NO! Don't do that thing! In fact, let out_trailer do the opposite! $tdict->{'Size'} = $self->{'Size'} || PDFNum(1); $tdict->{'Prev'} = PDFNum($self->{' loc'}) if ($self->{' loc'}); if ($self->{' update'}) { foreach $t (grep ($_ !~ m/^[\s\-]/o, keys %$self)) { $tdict->{$t} = $self->{$t} unless defined $tdict->{$t}; } $fh = $self->{' INFILE'}; $fh->seek($self->{' epos'}, 0); } $self->out_trailer($tdict, $self->{' update'}); close($self->{' OUTFILE'}); MacPerl::SetFileInfo("CARO", "TEXT", $self->{' fname'}) if ($^O eq "MacOS" && !ref($self->{' fname'})); $self; } =head2 ($value, $str) = $p->readval($str, %opts) Reads a PDF value from the current position in the file. If $str is too short then read some more from the current location in the file until the whole object is read. This is a recursive call which may slurp in a whole big stream (unprocessed). Returns the recursive data structure read and also the current $str that has been read from the file. =cut sub readval { my ($self, $str, %opts) = @_; my ($fh) = $self->{' INFILE'}; my ($res, $key, $value, $k); $str = update($fh, $str); if ($str =~ m/^<>/o) { if ($str =~ s|^/($reg_char+)||o) { $k = Text::PDF::Name::name_to_string ($1, $self); ($value, $str) = $self->readval($str, %opts); $res->{$k} = $value; } $str = update($fh, $str); # thanks gareth.jones@stud.man.ac.uk } $str =~ s/^>>//o; $str = update($fh, $str); # streams can't be followed by a lone carriage-return. if (($str =~ s/^stream(?:(?:\015\012)|\012)//o) && ($res->{'Length'}->val != 0)) # stream { $k = $res->{'Length'}->val; $res->{' streamsrc'} = $fh; $res->{' streamloc'} = $fh->tell - length($str); unless ($opts{'nostreams'}) { if ($k > length($str)) { $value = $str; $k -= length($str); read ($fh, $str, $k + 11); # slurp the whole stream! } else { $value = ''; } $value .= substr($str, 0, $k); $res->{' stream'} = $value; $res->{' nofilt'} = 1; $str = update($fh, $str); $str =~ s/^endstream//o; } } if (defined $res->{'Type'} && defined $types{$res->{'Type'}->val}) { bless $res, $types{$res->{'Type'}->val}; $res->init($self); } # gdj: FIXME: if any of the ws chars were crs, then the whole # string might not have been read. } elsif ($str =~ m/^([0-9]+)$ws_char+([0-9]+)$ws_char+R/so) # objind { $k = $1; $value = $2; $str =~ s/^([0-9]+)$ws_char+([0-9]+)$ws_char+R//so; unless ($res = $self->test_obj($k, $value)) { $res = Text::PDF::Objind->new(); $res->{' objnum'} = $k; $res->{' objgen'} = $value; $res->{' realised'} = 0; $res->{' parent'} = $self; $self->add_obj($res, $k, $value); } # gdj: FIXME: if any of the ws chars were crs, then the whole # string might not have been read. } elsif ($str =~ m/^([0-9]+)$ws_char+([0-9]+)$ws_char+obj/so) # object data { my ($obj); $k = $1; $value = $2; $str =~ s/^([0-9]+)$ws_char+([0-9]+)$ws_char+obj//so; ($obj, $str) = $self->readval($str, %opts, 'objnum' => $k, 'objgen' => $value); if ($res = $self->test_obj($k, $value)) { $res->merge($obj); } else { $res = $obj; $self->add_obj($res, $k, $value); $res->{' realised'} = 1; } $str = update($fh, $str); # thanks to kundrat@kundrat.sk $str =~ s/^endobj//o; } elsif ($str =~ m|^/($reg_char+)|so) # name { # " <- Fix colourization $value = $1; $str =~ s|^/($reg_char+)||so; $res = Text::PDF::Name->from_pdf($value, $self); } elsif ($str =~ m/^\(/o) # literal string { $str =~ s/^\(//o; # We now need to find an unbalanced, unescaped right-paren. # This can't be done with regexps. my ($value) = ""; # The current level of nesting, when this reaches 0 we have finished. my ($nested) = 1; while (1) { # Remove everything up to the first (possibly escaped) paren. $str =~ /^((?:[^\\()]|\\[^()])*)(.*)/so; $value .= $1; $str = $2; if ($str =~ /^(\\[()])/o) { # An escaped paren. This would be tricky to do with # the regexp above (it's very difficult to be certain # that all cases are covered so I think it's better to # deal with them explicitly). $str = substr ($str, 2); $value = $value . $1; } elsif ($str =~ /^\)/o) { # Right paren $nested--; $str = substr ($str, 1); if ($nested == 0) { last; } $value = $value . ')'; } elsif ($str =~ /^\(/o) { # Left paren $nested++; $str = substr ($str, 1); $value = $value . '('; } else { # No parens, we must read more. We don't use update # because we don't want to remove whitespace or # comments. $fh->read($str, 255, length($str)) or die "Unterminated string."; } } $res = Text::PDF::String->from_pdf($value); } elsif ($str =~ m/^read($str, 255, length($str)) while (0 > index( $str, '>' )); ($value, $str) = ($str =~ /^(.*?)>(.*?)$/so); $res = Text::PDF::String->from_pdf("<" . $value . ">"); } elsif ($str =~ m/^\[/o) # array { $str =~ s/^\[//o; $str = update($fh, $str); $res = PDFArray(); while ($str !~ m/^\]/o) { ($value, $str) = $self->readval($str, %opts); $res->add_elements($value); $str = update($fh, $str); } $str =~ s/^\]//o; } elsif ($str =~ m/^(true|false)$irreg_char/o) # boolean { $value = $1; $str =~ s/^(?:true|false)//o; $res = Text::PDF::Bool->from_pdf($value); } elsif ($str =~ m/^([+-.0-9]+)$irreg_char/o) # number { $value = $1; $str =~ s/^([+-.0-9]+)//o; $res = Text::PDF::Number->from_pdf($value); } elsif ($str =~ m/^null$irreg_char/o) { $str =~ s/^null//o; $res = Text::PDF::Null->new; } else { die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . "."; } $str =~ s/^$ws_char*//os; return ($res, $str); } =head2 $ref = $p->read_obj($objind, %opts) Given an indirect object reference, locate it and read the object returning the read in object. =cut sub read_obj { my ($self, $objind, %opts) = @_; my ($loc, $res, $str, $oldloc); # return ($objind) if $self->{' objects'}{$objind->uid}; $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return undef; $objind->merge($res) unless ($objind eq $res); return $objind; } =head2 $ref = $p->read_objnum($num, $gen, %opts) Returns a fully read object of given number and generation in this file =cut sub read_objnum { my ($self, $num, $gen, %opts) = @_; my ($res, $loc, $str, $oldloc); $loc = $self->locate_obj($num, $gen) || return undef; $oldloc = $self->{' INFILE'}->tell; $self->{' INFILE'}->seek($loc, 0); ($res, $str) = $self->readval('', %opts, 'objnum' => $num, 'objgen' => $gen); $self->{' INFILE'}->seek($oldloc, 0); $res; } =head2 $objind = $p->new_obj($obj) Creates a new, free object reference based on free space in the cross reference chain. If nothing free then thinks up a new number. If $obj then turns that object into this new object rather than returning a new object. =cut sub new_obj { my ($self, $base) = @_; my ($res); my ($tdict, $i, $ni, $ng); return $base if ($base->is_obj($self)); if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) { $res = shift(@{$self->{' free'}}); if (defined $base) { my ($num, $gen) = @{$self->{' objects'}{$res->uid}}; $self->remove_obj($res); $self->add_obj($base, $num, $gen); return $self->out_obj($base); } else { $self->{' objects'}{$res->uid}[2] = 0; return $res; } } $tdict = $self; while (defined $tdict) { $i = $tdict->{' xref'}{defined($i)?$i:''}[0]; while (defined $i and $i != 0) { ($ni, $ng) = @{$tdict->{' xref'}{$i}}; if (!defined $self->locate_obj($i, $ng)) { if (defined $base) { $self->add_obj($base, $i, $ng); return $base; } else { $res = $self->test_obj($i, $ng) || $self->add_obj(Text::PDF::Objind->new(), $i, $ng); $tdict->{' xref'}{$i}[0] = $tdict->{' xref'}{$i}[0]; $self->out_obj($res); return $res; } } $i = $ni; } $tdict = $tdict->{' prev'}; } $i = $self->{' maxobj'}++; if (defined $base) { $self->add_obj($base, $i, 0); $self->out_obj($base); return $base; } else { $res = $self->add_obj(Text::PDF::Objind->new(), $i, 0); $self->out_obj($res); return $res; } } =head2 $p->out_obj($objind) Indicates that the given object reference should appear in the output xref table whether with data or freed. =cut sub out_obj { my ($self, $obj) = @_; # don't add objects that aren't real objects! if (!defined $self->{' objects'}{$obj->uid}) { return $self->new_obj($obj); } # This is why we've been keeping the outlist CACHE around; to speed # up this method by orders of magnitude (it saves up from having to # grep the full outlist each time through as we'll just do a lookup # in the hash) (which is super-fast). elsif (!exists $self->{' outlist_cache'}{$obj->uid}) { push( @{$self->{' outlist'}}, $obj ); $self->{' outlist_cache'}{$obj->uid}++; } $obj; } =head2 $p->free_obj($objind) Marks an object reference for output as being freed. =cut sub free_obj { my ($self, $obj) = @_; push(@{$self->{' free'}}, $obj); $self->{' objects'}{$obj->uid}[2] = 1; $self->out_obj($obj); } =head2 $p->remove_obj($objind) Removes the object from all places where we might remember it =cut sub remove_obj { my ($self, $objind) = @_; # who says it has to be fast delete $self->{' objects'}{$objind->uid}; delete $self->{' outlist_cache'}{$objind->uid}; delete $self->{' printed_cache'}{$objind}; @{$self->{' outlist'}} = grep($_ ne $objind, @{$self->{' outlist'}}); @{$self->{' printed'}} = grep($_ ne $objind, @{$self->{' printed'}}); $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef if ($self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind); $self; } =head2 $p->ship_out(@objects) Ships the given objects (or all objects for output if @objects is empty) to the currently open output file (assuming there is one). Freed objects are not shipped, and once an object is shipped it is switched such that this file becomes its source and it will not be shipped again unless out_obj is called again. Notice that a shipped out object can be re-output or even freed, but that it will not cause the data already output to be changed. =cut sub ship_out { my ($self, @objs) = @_; my ($n, $fh, $objind, $i, $j); my ($objnum, $objgen); return unless defined($fh = $self->{' OUTFILE'}); seek($fh, 0, 2); # go to the end of the file @objs = @{$self->{' outlist'}} unless (scalar @objs > 0); foreach $objind (@objs) { next unless $objind->is_obj($self); $j = -1; for ($i = 0; $i < scalar @{$self->{' outlist'}}; $i++) { if ($self->{' outlist'}[$i] eq $objind) { $j = $i; last; } } next if ($j < 0); splice(@{$self->{' outlist'}}, $j, 1); delete $self->{' outlist_cache'}{$objind->uid}; next if grep {$_ eq $objind} @{$self->{' free'}}; $self->{' locs'}{$objind->uid} = $fh->tell; ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid}}[0..1]; $fh->printf("%d %d obj\n", $objnum, $objgen); $objind->outobjdeep($fh, $self, 'objnum' => $objnum, 'objgen' => $objgen); $fh->print("\nendobj\n"); # Note that we've output this obj, not forgetting to update the cache # of whats printed. unless (exists $self->{' printed_cache'}{$objind}) { push( @{$self->{' printed'}}, $objind ); $self->{' printed_cache'}{$objind}++; } } $self; } =head2 $p->copy($outpdf, \&filter) Iterates over every object in the file reading the object, calling filter with the object and outputting the result. if filter is not defined, then just copies input to output. =cut sub copy { my ($self, $out, $filt) = @_; my ($tdict, $i, $nl, $ng, $nt, $res, $obj, $minl, $mini, $ming); foreach $i (grep (!m/^[\s\-]/o, keys %{$self})) { $out->{$i} = $self->{$i} unless defined $out->{$i}; } $tdict = $self; while (defined $tdict) { foreach $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) { ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}}; next unless $nt eq 'n'; if ($nl < $minl || $mini == 0) { $mini = $i; $ming = $ng; $minl = $nl; } unless ($obj = $self->test_obj($i, $ng)) { $obj = Text::PDF::Objind->new(); $obj->{' objnum'} = $i; $obj->{' objgen'} = $ng; $self->add_obj($obj, $i, $ng); $obj->{' parent'} = $self; $obj->{' realised'} = 0; } $obj->realise; $res = defined $filt ? &{$filt}($obj) : $obj; $out->new_obj($res) unless (!$res || $res->is_obj($out)); } $tdict = $tdict->{' prev'}; } # test for linearized and remove it from output $obj = $self->test_obj($mini, $ming); if ($obj->isa('Text::PDF::Dict') && $obj->{'Linearized'}) { $out->free_obj($obj); } $self; } =head1 PRIVATE METHODS & FUNCTIONS The following methods and functions are considered private to this class. This does not mean you cannot use them if you have a need, just that they aren't really designed for users of this class. =head2 $offset = $p->locate_obj($num, $gen) Returns a file offset to the object asked for by following the chain of cross reference tables until it finds the one you want. =cut sub locate_obj { my ($self, $num, $gen) = @_; my ($tdict, $ref); $tdict = $self; while (defined $tdict) { if (ref $tdict->{' xref'}{$num}) { $ref = $tdict->{' xref'}{$num}; if ($ref->[1] == $gen) { return $ref->[0] if ($ref->[2] eq "n"); return undef; # if $ref->[2] eq "f" } } $tdict = $tdict->{' prev'} } return undef; } =head2 update($fh, $str) Keeps reading $fh for more data to ensure that $str has at least a line full for C to work on. At this point we also take the opportunity to ignore comments. =cut sub update { my ($fh, $str) = @_; $str =~ s/^$ws_char*//o; while ($str !~ m/$cr/o && !$fh->eof) { $fh->read($str, 255, length($str)); $str =~ s/^$ws_char*//so; while ($str =~ m/^\%/o) { $fh->read($str, 255, length($str)) while ($str !~ m/$cr/o && !$fh->eof); $str =~ s/^\%(.*)$cr$ws_char*//so; } } return $str; } =head2 $objind = $p->test_obj($num, $gen) Tests the cache to see whether an object reference (which may or may not have been getobj()ed) has been cached. Returns it if it has. =cut sub test_obj { $_[0]->{' objcache'}{$_[1], $_[2]}; } =head2 $p->add_obj($objind) Adds the given object to the internal object cache. =cut sub add_obj { my ($self, $obj, $num, $gen) = @_; $self->{' objcache'}{$num, $gen} = $obj; $self->{' objects'}{$obj->uid} = [$num, $gen]; return $obj; } =head2 $tdict = $p->readxrtr($xpos) Recursive function which reads each of the cross-reference and trailer tables in turn until there are no more. Returns a dictionary corresponding to the trailer chain. Each trailer also includes the corresponding cross-reference table. The structure of the xref private element in a trailer dictionary is of an anonymous hash of cross reference elements by object number. Each element consists of an array of 3 elements corresponding to the three elements read in [location, generation number, free or used]. See the PDF Specification for details. =cut sub readxrtr { my ($self, $xpos) = @_; my ($tdict, $xlist, $buf, $xmin, $xnum, $fh, $xdiff); $fh = $self->{' INFILE'}; $fh->seek($xpos, 0); $fh->read($buf, 22); if ($buf !~ m/^xref$cr/oi) { die "Malformed xref in PDF file $self->{' fname'}"; } $buf =~ s/^xref$cr//oi; $xlist = {}; while ($buf =~ m/^([0-9]+)$ws_char+([0-9]+)$cr(.*?)$/so) { $xmin = $1; $xnum = $2; $buf = $3; $xdiff = length($buf); $fh->read($buf, 20 * $xnum - $xdiff + 15, $xdiff); while ($xnum-- > 0 && $buf =~ s/^0*([0-9]*)$ws_char+0*([0-9]+)$ws_char+([nf])$cr//o) { $xlist->{$xmin++} = [$1, $2, $3]; } } if ($buf !~ /^trailer$cr/oi) { die "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell - length($buf)); } $buf =~ s/^trailer$cr//oi; ($tdict, $buf) = $self->readval($buf); $tdict->{' loc'} = $xpos; $tdict->{' xref'} = $xlist; $self->{' maxobj'} = $xmin if $xmin > $self->{' maxobj'}; $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val) if (defined $tdict->{'Prev'} && $tdict->{'Prev'}->val != 0); return $tdict; } =head2 $p->out_trailer($tdict) Outputs the body and trailer for a PDF file by outputting all the objects in the ' outlist' and then outputting a xref table for those objects and any freed ones. It then outputs the trailing dictionary and the trailer code. =cut sub out_trailer { my ($self, $tdict, $update) = @_; my ($objind, $j, $i, $iend, @xreflist, $first, $k, $xref, $tloc, @freelist); my (%locs, $size); my ($fh) = $self->{' OUTFILE'}; while (@{$self->{' outlist'}}) { $self->ship_out; } # foreach $objind (@{$self->{' outlist'}}) # { # next if ($self->{' objects'}{$objind->uid}[2]); # $locs{$objind->uid} = $fh->tell; # $fh->printf("%d %d obj\n", @{$self->{' objects'}{$objind->uid}}[0..1]); # $objind->outobjdeep($fh, $self); # $fh->print("\nendobj\n"); # } # $size = @{$self->{' printed'}} + @{$self->{' free'}}; # $tdict->{'Size'} = PDFNum($tdict->{'Size'}->val + $size); # PDFSpec 1.3 says for /Size: (Required) Total number of entries in the files # cross-reference table, including the original table and all updates. Which # is what the previous two lines implement. # But this seems to make Acrobat croak on saving so we try the following from # basil.duval@epfl.ch $tdict->{'Size'} = PDFNum($self->{' maxobj'}); $tloc = $fh->tell; $fh->print("xref\n"); @xreflist = sort {$self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0]} (@{$self->{' printed'}}, @{$self->{' free'}}); unless ($update) { $i = 1; for ($j = 0; $j < @xreflist; $j++) { my (@inserts); $k = $xreflist[$j]; while ($i < $self->{' objects'}{$k->uid}[0]) { my ($n) = Text::PDF::Objind->new(); $self->add_obj($n, $i, 0); $self->free_obj($n); push(@inserts, $n); $i++; } splice(@xreflist, $j, 0, @inserts); $j += @inserts; $i++; } } @freelist = sort {$self->{' objects'}{$a->uid}[0] <=> $self->{' objects'}{$b->uid}[0]} @{$self->{' free'}}; $j = 0; $first = -1; $k = 0; for ($i = 0; $i <= $#xreflist + 1; $i++) { # if ($i == 0) # { # $first = $i; $j = $xreflist[0]->{' objnum'}; # $fh->printf("0 1\n%010d 65535 f \n", $ff); # } if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid}[0] != $j + 1) { $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid}[0] ") . ($i - $first) . "\n"); if ($first == -1) { $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid}[0] : 0); $first = 0; } for ($j = $first; $j < $i; $j++) { $xref = $xreflist[$j]; if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref") { $k++; $fh->print(pack("A10AA5A4", sprintf("%010d", (defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid}[0] : 0)), " ", sprintf("%05d", $self->{' objects'}{$xref->uid}[1] + 1), " f \n")); } else { $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid}), " ", sprintf("%05d", $self->{' objects'}{$xref->uid}[1]), " n \n")); } } $first = $i; $j = $self->{' objects'}{$xreflist[$i]->uid}[0] if ($i < scalar @xreflist); } else { $j++; } } $fh->print("trailer\n"); $tdict->outobjdeep($fh, $self); $fh->print("\nstartxref\n$tloc\n" . '%%EOF' . "\n"); } =head2 Text::PDF::File->_new Creates a very empty PDF file object (used by new and open) =cut sub _new { my ($class) = @_; my ($self) = {}; bless $self, $class; $self->{' outlist'} = []; $self->{' outlist_cache'} = {}; # A cache of whats in the 'outlist' $self->{' maxobj'} = 1; $self->{' objcache'} = {}; $self->{' objects'} = {}; $self; } 1; =head1 AUTHOR Martin Hosken Martin_Hosken@sil.org Copyright Martin Hosken 1999 and onwards No warranty or expression of effectiveness, least of all regarding anyone's safety, is implied in this software or documentation. =head2 Licensing This Perl Text::PDF module is licensed under the Perl Artistic License. Text-PDF-0.31/lib/Text/PDF/TTFont.pm0000755000175000017500000002656212757400410015233 0ustar bobhbobhpackage Text::PDF::TTFont; =head1 NAME Text::PDF::TTFont - Inherits from L and represents a TrueType font within a PDF file. =head1 DESCRIPTION A font consists of two primary parts in a PDF file: the header and the font descriptor. Whilst two fonts may share font descriptors, they will have their own header dictionaries including encoding and widhth information. =head1 INSTANCE VARIABLES There are no instance variables beyond the variables which directly correspond to entries in the appropriate PDF dictionaries. =head1 METHODS =cut use strict; use vars qw(@ISA @cp1252 $subcount); # no warnings qw(uninitialized); use Text::PDF::Dict; use Text::PDF::Utils; @ISA = qw(Text::PDF::Dict); use Font::TTF::Font 0.23; @cp1252 = (0 .. 127, 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, 0xA0 .. 0xFF); $subcount = "BXCJIM"; =head2 Text::PDF::TTFont->new($parent, $fontfname, $pdfname, %opts) Creates a new font resource for the given fontfile. This includes the font descriptor and the font stream. The $pdfname is the name by which this font resource will be known throughtout a particular PDF file. All font resources are full PDF objects. =cut sub new { my ($class, $parent, $fontname, $pdfname, %opts) = @_; my ($self) = $class->SUPER::new; my ($f, $flags, $name, $subf, $s, $upem); my ($font, $w); foreach $f (keys %opts) { $f =~ s/^\-//o || next; $self->{" $f"} = $opts{"-$f"}; } $self->{' outto'} = $parent; # only one host for a font if (ref($fontname)) # $fontname is a font object { $font = $fontname; } else { $font = Font::TTF::Font->open($fontname) || return undef; } $self->{' font'} = $font; $Font::TTF::Name::utf8 = 1; $name = $font->{'name'}->read->find_name(4) || return undef; $subf = $font->{'name'}->find_name(2); $name =~ s/\s//og; $name .= $subf if ($subf =~ m/^Regular$/oi); $self->{'Type'} = PDFName("Font"); $self->{'Subtype'} = PDFName("TrueType"); if ($self->{' subset'}) { $self->{' subname'} = "$subcount+" . $name; $subcount++; } else { $self->{' subname'} = $name; } $self->{'BaseFont'} = PDFName($self->{' subname'}); $self->{'Name'} = PDFName($pdfname); $parent->new_obj($self); # leave the encoding & widths, etc. until we know the glyph list $f = PDFDict(); $parent->new_obj($f); # make this thing a true object $self->{'FontDescriptor'} = $f; $f->{'Type'} = PDFName("FontDescriptor"); $upem = $font->{'head'}->read->{'unitsPerEm'}; $f->{'Ascent'} = PDFNum(int($font->{'hhea'}->read->{'Ascender'} * 1000 / $upem)); $f->{'Descent'} = PDFNum(int($font->{'hhea'}{'Descender'} * 1000 / $upem)); # find the top of an H or the null box! Or maybe we should just duck and say 0? $f->{'CapHeight'} = PDFNum(0); # int($font->{'loca'}->read->{'glyphs'}[$font->{'post'}{'STRINGS'}{"H"}]->read->{'yMax'} # * 1000 / $upem)); $f->{'StemV'} = PDFNum(0); # no way! $f->{'FontName'} = $self->{'BaseFont'}; $f->{'ItalicAngle'} = PDFNum($font->{'post'}->read->{'italicAngle'}); $f->{'FontBBox'} = PDFArray( PDFNum(int($font->{'head'}{'xMin'} * 1000 / $upem)), PDFNum(int($font->{'head'}{'yMin'} * 1000 / $upem)), PDFNum(int($font->{'head'}{'xMax'} * 1000 / $upem)), PDFNum(int($font->{'head'}{'yMax'} * 1000 / $upem))); $flags = 4; $flags = 0; $flags |= 1 if ($font->{'OS/2'}->read->{'bProportion'} == 9); $flags |= 2 unless ($font->{'OS/2'}{'bSerifStyle'} > 10 && $font->{'OS/2'}{'bSerifStyle'} < 14); $flags |= 32; # if ($font->{'OS/2'}{'bFamilyType'} > 3); $flags |= 8 if ($font->{'OS/2'}{'bFamilyType'} == 2); $flags |= 64 if ($font->{'OS/2'}{'bLetterform'} > 8); $f->{'Flags'} = PDFNum($flags); # $f->{'MaxWidth'} = PDFNum(int($font->{'hhea'}{'advanceWidthMax'} * 1000 / $upem)); $f->{'MissingWidth'} = PDFNum(int($font->{'hhea'}{'advanceWidthMax'} * 1000 / $upem) + 2); $f->{' notdef'} = PDFNum(".notdef"); $s = PDFDict(); $parent->new_obj($s); $f->{'FontFile2'} = $s; $s->{'Length1'} = PDFNum(-s $font->{' fname'}); $s->{'Filter'} = PDFArray(PDFName("FlateDecode")); $s->{' streamfile'} = $fontname unless ($self->{' subset'}); $font->{'cmap'}->read->find_ms; $self->{' issymbol'} = $font->{'cmap'}{' mstable'}{'Platform'} == 3 && $font->{'cmap'}{' mstable'}{'Encoding'} == 0; $font->{'hmtx'}->read; unless ($opts{'-istype0'}) { $w = PDFArray(map {PDFNum(int($font->{'hmtx'}{'advance'}[$font->{'cmap'}->ms_lookup($_)] / $font->{'head'}{'unitsPerEm'} * 1000))} $self->{' issymbol'} ? (0xf000 .. 0xf0ff) : @cp1252); $parent->new_obj($w); $self->{'Widths'} = $w; } if ($self->{' subset'}) { $self->{' minCode'} = 255; $self->{' maxCode'} = 32; } else { $self->{' minCode'} = 32; $self->{' maxCode'} = 255; } $self; } =head2 $t->width($text) Measures the width of the given text according to the widths in the font =cut sub width { my ($self, $text) = @_; my (@unis, $width); if ($self->{' issymbol'}) { @unis = map {$_ + 0xf000} unpack("C*", $text); } else { @unis = map {$cp1252[$_]} unpack("C*", $text); } foreach (@unis) { $width += $self->{' font'}{'hmtx'}{'advance'}[$self->{' font'}{'cmap'}->ms_lookup($_)]; } $width / $self->{' font'}{'head'}{'unitsPerEm'}; } =head2 $t->trim($text, $len) Trims the given text to the given length (in per mille em) returning the trimmed text =cut sub trim { my ($self, $text, $len) = @_; my ($i, $width); $len *= $self->{' font'}{'head'}{'unitsPerEm'}; foreach (unpack("C*", $text)) { $width += $self->{' font'}{'hmtx'}{'advance'}[$self->{' font'}{'cmap'}->ms_lookup( $self->{' issymbol'} ? $_ + 0xf000 : $cp1252[$_])]; last if ($width > $len); $i++; } return substr($text, 0, $i); } =head2 $t->out_text($text) Indicates to the font that the text is to be output and returns the text to be output =cut sub out_text { my ($self, $text) = @_; if ($self->{' subset'}) { foreach (unpack("C*", $text)) { vec($self->{' subvec'}, $_, 1) = 1; $self->{' minCode'} = $_ if $_ < $self->{' minCode'}; $self->{' maxCode'} = $_ if $_ > $self->{' maxCode'}; } } return asPDFStr($text); } =head2 $f->copy Copies the font object excluding the name, widths and encoding, etc. =cut sub copy { my ($self, $pdf) = @_; my ($res) = {}; my ($k); bless $res, ref($self); foreach $k ('Name', 'FirstChar', 'LastChar') { $res->{$k} = ""; } return $self->SUPER::copy($pdf, $res); } sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; return $self->SUPER::outobjdeep($fh, $pdf) if defined $opts{'passthru'}; my ($f) = $self->{' font'}; my ($d) = $self->{'FontDescriptor'}; my ($s) = $d->{'FontFile2'}; my ($vec, $ffh, $i, $t, $k, $maxuni, $minuni); $self->{'FirstChar'} = PDFNum($self->{' minCode'}); $self->{'LastChar'} = PDFNum($self->{' maxCode'}); splice(@{$self->{'Widths'}{' val'}}, 0, $self->{' minCode'}); splice(@{$self->{'Widths'}{' val'}}, $self->{' maxCode'} - $self->{' minCode'} + 1, $#{$self->{'Widths'}{' val'}}); if ($self->{' subset'}) { $maxuni = 0; $minuni = 0xffff; for ($i = 0; $i < 256; $i++) { if (vec($self->{' subvec'}, $i, 1)) { $t = $self->{' issymbol'} ? $i + 0xf000 : $cp1252[$i]; $maxuni = $t if $t > $maxuni; $minuni = $t if $t < $minuni; vec($vec, $f->{'cmap'}->ms_lookup($t), 1) = 1; } elsif ($i >= $self->{' minCode'} && $i <= $self->{' maxCode'}) { $self->{'Widths'}{' val'}[$i - $self->{' minCode'}] = $d->{'MissingWidth'}; } } $f->{'glyf'}->read; for ($i = 0; $i < scalar @{$f->{'loca'}{'glyphs'}}; $i++) { next if vec($vec, $i, 1); $f->{'loca'}{'glyphs'}[$i] = undef; } $s->{' stream'} = ""; $ffh = Text::PDF::TTIOString->new(\$s->{' stream'}); $f->out($ffh, 'cmap', 'cvt ', 'fpgm', 'glyf', 'head', 'hhea', 'hmtx', 'loca', 'maxp', 'prep'); $s->{'Length1'} = PDFNum(length($s->{' stream'})); } $self->SUPER::outobjdeep($fh, $pdf, %opts); } 1; package Text::PDF::TTIOString; =head1 TITLE Text::PDF::TTIOString - internal IO type handle for string output for font embedding. This code is ripped out of IO::Scalar, to save the direct dependence for so little. See IO::Scalar for details =cut sub new { my $self = bless {}, shift; $self->open(@_) if @_; $self; } sub DESTROY { shift->close; } sub open { my ($self, $sref) = @_; # Sanity: defined($sref) or do {my $s = ''; $sref = \$s}; (ref($sref) eq "SCALAR") or die "open() needs a ref to a scalar"; # Setup: $self->{Pos} = 0; $self->{SR} = $sref; $self; } sub close { my $self = shift; %$self = (); 1; } sub getc { my $self = shift; # Return undef right away if at EOF; else, move pos forward: return undef if $self->eof; substr(${$self->{SR}}, $self->{Pos}++, 1); } if(0) { sub getline { my $self = shift; # Return undef right away if at EOF: return undef if $self->eof; # Get next line: pos(${$self->{SR}}) = $self->{Pos}; # start matching at this point ${$self->{SR}} =~ m/(.*?)(\n|\Z)/g; # match up to newline or EOS my $line = $1.$2; # save it $self->{Pos} += length($line); # everybody remember where we parked! return $line; } sub getlines { my $self = shift; wantarray or croak("Can't call getlines in scalar context!"); my ($line, @lines); push @lines, $line while (defined($line = $self->getline)); @lines; } } sub print { my $self = shift; my $eofpos = length(${$self->{SR}}); my $str = join('', @_); if ($self->{'Pos'} == $eofpos) { ${$self->{SR}} .= $str; $self->{Pos} = length(${$self->{SR}}); } else { substr(${$self->{SR}}, $self->{Pos}, length($str)) = $str; $self->{Pos} += length($str); } 1; } sub read { my ($self, $buf, $n, $off) = @_; die "OFFSET not yet supported" if defined($off); my $read = substr(${$self->{SR}}, $self->{Pos}, $n); $self->{Pos} += length($read); $_[1] = $read; return length($read); } sub eof { my $self = shift; ($self->{Pos} >= length(${$self->{SR}})); } sub seek { my ($self, $pos, $whence) = @_; my $eofpos = length(${$self->{SR}}); # Seek: if ($whence == 0) { $self->{Pos} = $pos } # SEEK_SET elsif ($whence == 1) { $self->{Pos} += $pos } # SEEK_CUR elsif ($whence == 2) { $self->{Pos} = $eofpos + $pos} # SEEK_END else { die "bad seek whence ($whence)" } # Fixup: if ($self->{Pos} < 0) { $self->{Pos} = 0 } if ($self->{Pos} > $eofpos) { $self->{Pos} = $eofpos } 1; } sub tell { shift->{Pos} } 1; Text-PDF-0.31/lib/Text/PDF/Utils.pm0000755000175000017500000000602412750671061015151 0ustar bobhbobhpackage Text::PDF::Utils; =head1 NAME Text::PDF::Utils - Utility functions for PDF library =head1 DESCRIPTION A set of utility functions to save the fingers of the PDF library users! =head1 FUNCTIONS =cut use strict; use Text::PDF::Array; use Text::PDF::Bool; use Text::PDF::Dict; use Text::PDF::Name; use Text::PDF::Number; use Text::PDF::String; use Exporter; use vars qw(@EXPORT @ISA); @ISA = qw(Exporter); @EXPORT = qw(PDFBool PDFArray PDFDict PDFName PDFNum PDFStr asPDFBool asPDFName asPDFNum asPDFStr); # no warnings qw(uninitialized); =head2 PDFBool Creates a Bool via Text::PDF::Bool->new =cut sub PDFBool { Text::PDF::Bool->new(@_); } =head2 PDFArray Creates an array via Text::PDF::Array->new =cut sub PDFArray { Text::PDF::Array->new(@_); } =head2 PDFDict Creates a dict via Text::PDF::Dict->new =cut sub PDFDict { Text::PDF::Dict->new(@_); } =head2 PDFName Creates a name via Text::PDF::Name->new =cut sub PDFName { Text::PDF::Name->new(@_); } =head2 PDFNum Creates a number via Text::PDF::Number->new =cut sub PDFNum { Text::PDF::Number->new(@_); } =head2 PDFStr Creates a string via Text::PDF::String->new =cut sub PDFStr { Text::PDF::String->new(@_); } =head2 asPDFBool Returns a boolean value in PDF output form =cut sub asPDFBool { Text::PDF::Bool->new(@_)->as_pdf; } =head2 asPDFStr Returns a string in PDF output form (including () or <>) =cut sub asPDFStr { Text::PDF::String->new(@_)->as_pdf; } =head2 asPDFName Returns a Name in PDF Output form (including /) =cut sub asPDFName { Text::PDF::Name->new(@_)->as_pdf (@_); } =head2 asPDFNum Returns a number in PDF output form =cut sub asPDFNum { $_[0]; } # no translation needed =head2 unpacku($str) Returns a list of unicode values for the given UTF8 string =cut sub unpacku { my ($str) = @_; my (@res); # return (unpack("U*", $str)) if ($^V && $^V ge v5.6.0); return (unpack("U*", $str)) if ($] >= 5.006); # so much for $^V! $str = "$str"; # copy $str while (length($str)) # Thanks to Gisle Aas for some of his old code { $str =~ s/^[\x80-\xBF]+//o; if ($str =~ s/^([\x00-\x7F]+)//o) { push(@res, unpack("C*", $1)); } elsif ($str =~ s/^([\xC0-\xDF])([\x80-\xBF])//o) { push(@res, ((ord($1) & 0x1F) << 6) | (ord($2) & 0x3F)); } elsif ($str =~ s/^([\0xE0-\xEF])([\x80-\xBF])([\x80-\xBF])//o) { push(@res, ((ord($1) & 0x0F) << 12) | ((ord($2) & 0x3F) << 6) | (ord($3) & 0x3F)); } elsif ($str =~ s/^([\xF0-\xF7])([\x80-\xBF])([\x80-\xBF])([\x80-\xBF])//o) { my ($b1, $b2, $b3, $b4) = (ord($1), ord($2), ord($3), ord($4)); push(@res, ((($b1 & 0x07) << 8) | (($b2 & 0x3F) << 2) | (($b3 & 0x30) >> 4)) + 0xD600); # account for offset push(@res, ((($b3 & 0x0F) << 6) | ($b4 & 0x3F)) + 0xDC00); } elsif ($str =~ s/^[\xF8-\xFF][\x80-\xBF]*//o) { } } @res; } 1; Text-PDF-0.31/lib/Text/PDF/Null.pm0000755000175000017500000000171512750671061014765 0ustar bobhbobhpackage Text::PDF::Null; =head1 NAME Text::PDF::Null - PDF Null type object. This is a subclass of Text::PDF::Objind and cannot be subclassed. =head1 METHODS =cut use strict; use vars qw(@ISA); @ISA = qw(Text::PDF::Objind); # There is only one null object (section 3.2.8). my ($null_obj) = {}; bless $null_obj, "Text::PDF::Null"; =head2 Text::PDF::Null->new Returns the null object. There is only one null object. =cut sub new { return $null_obj; } =head2 $s->realise Pretends to finish reading the object. =cut sub realise { return $null_obj; } =head2 $s->outobjdeep Output the object in PDF format. =cut sub outobjdeep { my ($self, $fh, $pdf) = @_; $fh->print ("null"); } =head2 $s->is_obj Returns false because null is not a full object. =cut sub is_obj { return 0; } =head2 $s->copy Another no-op. =cut sub copy { return $null_obj; } =head2 $s->val Return undef. =cut sub val { return undef; } 1; Text-PDF-0.31/lib/Text/PDF/Name.pm0000755000175000017500000000462312750671061014734 0ustar bobhbobhpackage Text::PDF::Name; use strict; use vars qw(@ISA); # no warnings qw(uninitialized); use Text::PDF::String; @ISA = qw(Text::PDF::String); =head1 NAME Text::PDF::Name - Inherits from L and stores PDF names (things beginning with /) =head1 METHODS =head2 Text::PDF::Name->from_pdf($string) Creates a new string object (not a full object yet) from a given string. The string is parsed according to input criteria with escaping working, particular to Names. =cut sub from_pdf { my ($class, $str, $pdf) = @_; my ($self) = $class->SUPER::from_pdf($str); $self->{'val'} = name_to_string ($self->{'val'}, $pdf); $self; } =head2 $n->convert ($str, $pdf) Converts a name into a string by removing the / and converting any hex munging unless $pdf is supplied and its version is less than 1.2. =cut sub convert { my ($self, $str, $pdf) = @_; $str = name_to_string ($str, $pdf); return $str; } =head2 $s->as_pdf ($pdf) Returns a name formatted as PDF. $pdf is optional but should be the PDF File object for which the name is intended if supplied. =cut sub as_pdf { my ($self, $pdf) = @_; my ($str) = $self->{'val'}; $str = string_to_name ($str, $pdf); return ("/" . $str); } # Prior to PDF version 1.2, `#' was a literal character. Embedded # spaces were implicitly allowed in names as well but it would be best # to ignore that (PDF reference 2nd edition, Appendix H, section 3.2.4.3). =head2 Text::PDF::Name->string_to_name ($str, $pdf) Suitably encode the string $str for output in the File object $pdf (the exact format may depend on the version of $pdf). Prinicipally, encode certain characters in hex if the version is greater than 1.1. =cut sub string_to_name ($;$) { my ($str, $pdf) = @_; if (!defined($pdf) || (defined $pdf->{' version'} && $pdf->{' version'} >= 2)) { $str =~ s|([\001-\040\177-\377%()\[\]{}<>#/])|"#".sprintf("%02X", ord($1))|oge; } return $str; } =head2 Text::PDF::Name->name_to_string ($str, $pdf) Suitably decode the string $str as read from the File object $pdf (the exact decoding may depend on the version of $pdf). Principally, undo the hex encoding for PDF versions > 1.1. =cut sub name_to_string ($;$) { my ($str, $pdf) = @_; $str =~ s|^/||o; if (!defined($pdf) || (defined $pdf->{' version'} && $pdf->{' version'} >= 2)) { $str =~ s/#([0-9a-f]{2})/chr(hex($1))/oige; } return $str; } Text-PDF-0.31/lib/Text/PDF/TTFont0.pm0000755000175000017500000002143312750671061015310 0ustar bobhbobhpackage Text::PDF::TTFont0; =head1 NAME Text::PDF::TTFont0 - Inherits from L and represents a TrueType Type 0 font within a PDF file. =head1 DESCRIPTION A font consists of two primary parts in a PDF file: the header and the font descriptor. Whilst two fonts may share font descriptors, they will have their own header dictionaries including encoding and widhth information. =head1 INSTANCE VARIABLES There are no instance variables beyond the variables which directly correspond to entries in the appropriate PDF dictionaries. =head1 METHODS =cut use strict; use vars qw(@ISA); # no warnings qw(uninitialized); use Text::PDF::TTFont; use Text::PDF::Dict; @ISA = qw(Text::PDF::TTFont); use Font::TTF::Font; use Text::PDF::Utils; =head2 Text::PDF::TTFont->new($parent, $fontfname. $pdfname) Creates a new font resource for the given fontfile. This includes the font descriptor and the font stream. The $pdfname is the name by which this font resource will be known throughout a particular PDF file. All font resources are full PDF objects. =cut sub new { my ($class, $parent, $fontname, $pdfname, %opt) = @_; my ($desc, $sinfo, $unistr, $touni, @rev); my ($i, $first, $num, $upem, @wid, $name, $ff2, $ffh); my ($self) = $class->SUPER::new($parent, $fontname, $pdfname, -istype0 => 1, %opt); my ($font) = $self->{' font'}; $self->{'Subtype'} = PDFName('Type0'); $self->{'Encoding'} = PDFName('Identity-H'); $parent->{' version'} = 3 unless (defined $parent->{' version'} && $parent->{' version'} > 3); $desc = PDFDict(); $parent->new_obj($desc); $desc->{'Type'} = $self->{'Type'}; $desc->{'Subtype'} = PDFName('CIDFontType2'); $desc->{'BaseFont'} = $self->{'BaseFont'}; # $name = $self->{'BaseFont'}->val; # $name =~ s/^.*\+//oi; # $self->{'BaseFont'} = PDF::Name->new($parent, $name . "-Identity-H"); $desc->{'FontDescriptor'} = $self->{'FontDescriptor'}; delete $self->{'FontDescriptor'}; $num = $font->{'maxp'}{'numGlyphs'}; $upem = $font->{'head'}{'unitsPerEm'}; $desc->{'DW'} = $desc->{'FontDescriptor'}{'MissingWidth'}; $desc->{'W'} = PDFArray(); $parent->new_obj($desc->{'W'}); $font->{'hmtx'}->read; unless ($opt{-subset}) { $first = 1; for ($i = 1; $i < $num; $i++) { push(@wid, PDFNum(int($font->{'hmtx'}{'advance'}[$i] * 1000 / $upem))); if ($i % 20 == 19 || $i + 1 >= $num) { $desc->{'W'}->add_elements(PDFNum($first), PDFArray(@wid)); @wid = (); $first = $i + 1; } } } $self->{'DescendantFonts'} = PDFArray($desc); $sinfo = PDFDict(); # $parent->new_obj($sinfo); $sinfo->{'Registry'} = PDFStr('Adobe'); $sinfo->{'Ordering'} = PDFStr('Identity'); $sinfo->{'Supplement'} = PDFNum(0); $desc->{'CIDSystemInfo'} = $sinfo; $ff2 = $desc->{'FontDescriptor'}{'FontFile2'}; delete $ff2->{' streamfile'}; # $ff2->{' stream'} = ""; # $ffh = Text::PDF::TTIOString->new(\$ff2->{' stream'}); # $font->out($ffh, 'cvt ', 'fpgm', 'glyf', 'head', 'hhea', 'hmtx', 'loca', 'maxp', 'prep'); # $ff2->{'Filter'} = PDFArray(PDFName("FlateDecode")); # $ff2->{'Length1'} = PDFNum(length($ff2->{' stream'})); if ($opt{'ToUnicode'}) { @rev = $font->{'cmap'}->read->reverse; $unistr = '/CIDInit /ProcSet findresource being 12 dict begin begincmap /CIDSystemInfo << /Registry (' . $self->{'BaseFont'}->val . '+0) /Ordering (XYZ) /Supplement 0 >> def /CMapName /' . $self->{'BaseFont'}->val . '+0 def 1 begincodespacerange <'; $unistr .= sprintf("%04x> <%04x> endcodespacerange\n", 1, $num - 1); for ($i = 1; $i < $num; $i++) { if ($i % 100 == 0) { $unistr .= "endbfrange\n"; $unistr .= $num - $i > 100 ? 100 : $num - $i; $unistr .= " beginbfrange\n"; } $unistr .= sprintf("<%04x> <%04x> <%04x>\n", $i, $i, $rev[$i]); } $unistr .= "endbfrange\nendcmap CMapName currendict /CMap defineresource pop end end"; $touni = PDFDict(); $parent->new_obj($touni); $touni->{' stream'} = $unistr; $touni->{'Filter'} = PDFArray(PDFName("FlateDecode")); $self->{'ToUnicode'} = $touni; } $self; } =head2 out_text($text) Returns the string to be put into a content stream for text to be output in this font. The text is assumed to be UTF8 encoded and the return string is a glyph sequence for the text. If subsetting is enabled, then all the glyphs returned are also marked for output. =cut sub out_text { my ($self, $text) = @_; my (@clist) = Text::PDF::Utils::unpacku($text); my ($f) = $self->{' font'}; my ($g, $res); foreach $g (map {$f->{'cmap'}->ms_lookup($_)} (@clist)) { vec($self->{' subvec'}, $g, 1) = 1 if ($self->{' subset'}); $res .= sprintf("%04X", $g); } "<$res>"; } =head2 out_glyphs(@n) Marks the glyphs as being needed in the output font when subsetting. Returns a string to render the glyphs as specified. =cut sub out_glyphs { my ($self, @list) = @_; my ($g, $res); foreach $g (@list) { vec($self->{' subvec'}, $g, 1) = 1 if ($self->{' subset'}); $res .= sprintf("%04X", $g); } "<$res>"; } =head2 width($text) Returns the width of the string, assuming it to be UTF8 encoded. =cut sub width { my ($self, $text) = @_; my (@clist) = Text::PDF::Utils::unpacku($text); my ($f) = $self->{' font'}; my ($width, $g); foreach $g (map {$f->{'cmap'}->ms_lookup($_)} (@clist)) { $width += $f->{'hmtx'}{'advance'}[$g]; } $width / $f->{'head'}{'unitsPerEm'}; } =head2 outobjdeep($fh, $pdf, %opts) Handles the creation of the font stream including subsetting at this point. So if you get this far, that's it for subsetting. =cut sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; my ($d) = $self->{'DescendantFonts'}->val->[0]; my ($f) = $self->{' font'}; my ($s) = $d->{'FontDescriptor'}{'FontFile2'}; my ($ffh); if ($self->{' subset'}) { my ($max) = length($self->{' subvec'}) * 8; my ($upem) = $f->{'head'}{'unitsPerEm'}; my ($mode, $miniArr, $i, $j, $first, @minilist); $f->{'glyf'}->read; for ($i = 0; $i <= $max; $i++) { next unless(vec($self->{' subvec'},$i,1)); next unless($f->{'loca'}{glyphs}[$i]); map { vec($self->{' subvec'},$_,1)=1; } $f->{loca}{glyphs}[$i]->get_refs; } $max = length($self->{' subvec'}) * 8; for ($i = 0; $i <= $max; $i++) { if (!$mode && vec($self->{' subvec'}, $i, 1)) { $first = $i; $mode = 1; @minilist = (); } elsif ($mode && !vec($self->{' subvec'}, $i, 1)) { for ($j = 0; $j < scalar @minilist; $j++) { if ($j % 20 == 0) { $miniArr = PDFArray(); $d->{'W'}->add_elements(PDFNum($first + $j), $miniArr) } $miniArr->add_elements(PDFNum($minilist[$j])); } $mode = 0; } if ($mode) { push(@minilist, int($f->{'hmtx'}{'advance'}[$i] / $upem * 1000)); } else { $f->{'loca'}{glyphs}[$i] = undef; } } for ( ; $i < $f->{'maxp'}{'numGlyphs'}; $i++) { $f->{'loca'}{'glyphs'}[$i] = undef; } } $s->{' stream'} = ""; $ffh = Text::PDF::TTIOString->new(\$s->{' stream'}); $f->out($ffh, 'cvt ', 'fpgm', 'glyf', 'head', 'hhea', 'hmtx', 'loca', 'maxp', 'prep'); $s->{'Filter'} = PDFArray(PDFName("FlateDecode")); $s->{'Length1'} = PDFNum(length($s->{' stream'})); $self->SUPER::outobjdeep($fh, $pdf, %opts, 'passthru' => 1); $self; } =head2 ship_out($pdf) Ship this font out to the given $pdf file context =cut sub ship_out { my ($self, $pdf) = @_; my ($d); foreach $d ($self->{'DescendantFonts'}->elementsof) { $pdf->ship_out($self, $d, $d->{'FontDescriptor'}, $d->{'FontDescriptor'}{'FontFile2'}); } $pdf->ship_out($self->{'ToUnicode'}) if (defined $self->{'ToUnicode'}); $self; } =head2 empty Empty the font of as much as possible in order to save memory =cut sub empty { my ($self) = @_; my ($d); if (defined $self->{'DescendantFonts'}) { foreach $d ($self->{'DescendantFonts'}->elementsof) { $d->{'FontDescriptor'}{'FontFile2'}->empty; $d->{'FontDescriptor'}->empty; $d->empty; } } $self->{'ToUnicode'}->empty if (defined $self->{'ToUnicode'}); $self->SUPER::empty; } 1; Text-PDF-0.31/lib/Text/PDF/Bool.pm0000755000175000017500000000100012750671061014731 0ustar bobhbobhpackage Text::PDF::Bool; use strict; use vars qw(@ISA); # no warnings qw(uninitialized); use Text::PDF::String; @ISA = qw(Text::PDF::String); =head1 NAME PDF::Bool - A special form of L which holds the strings B or B =head1 METHODS =head2 $b->convert($str) Converts a string into the string which will be stored. =cut sub convert { return $_[1] eq "true"; } =head2 as_pdf Converts the value to a PDF output form =cut sub as_pdf { $_[0]->{'val'} ? "true" : "false"; } 1; Text-PDF-0.31/lib/Text/PDF/String.pm0000755000175000017500000000560412750671061015322 0ustar bobhbobhpackage Text::PDF::String; =head1 NAME Text::PDF::String - PDF String type objects and superclass for simple objects that are basically stringlike (Number, Name, etc.) =head1 METHODS =cut use strict; use vars qw(@ISA %trans %out_trans); # no warnings qw(uninitialized); use Text::PDF::Objind; @ISA = qw(Text::PDF::Objind); %trans = ( "n" => "\n", "r" => "\r", "t" => "\t", "b" => "\b", "f" => "\f", "\\" => "\\", "(" => "(", ")" => ")" ); %out_trans = ( "\n" => "n", "\r" => "r", "\t" => "t", "\b" => "b", "\f" => "f", "\\" => "\\", "(" => "(", ")" => ")" ); =head2 Text::PDF::String->from_pdf($string) Creates a new string object (not a full object yet) from a given string. The string is parsed according to input criteria with escaping working. =cut sub from_pdf { my ($class, $str) = @_; my ($self) = {}; bless $self, $class; $self->{'val'} = $self->convert($str); $self->{' realised'} = 1; return $self; } =head2 Text::PDF::String->new($string) Creates a new string object (not a full object yet) from a given string. The string is parsed according to input criteria with escaping working. =cut sub new { my ($class, $str) = @_; my ($self) = {}; bless $self, $class; $self->{'val'} = $str; $self->{' realised'} = 1; return $self; } =head2 $s->convert($str) Returns $str converted as per criteria for input from PDF file =cut sub convert { my ($self, $str) = @_; $str =~ s/\\([nrtbf\\()]|[0-7]+)/defined $trans{$1} ? $trans{$1} : chr(oct($1))/oegi; # $str =~ s/\\([0-7]+)/chr(oct($1))/oeg; # thanks to kundrat@kundrat.sk 1 while $str =~ s/\<([0-9a-f]{2})[\r\n]*/chr(hex($1))."\<"/oige; $str =~ s/\<([0-9a-f]?)\>/chr(hex($1."0"))/oige; $str =~ s/\<\>//og; return $str; } =head2 $s->val Returns the value of this string (the string itself). =cut sub val { $_[0]->{'val'}; } =head2 $->as_pdf Returns the string formatted for output as PDF for PDF File object $pdf. =cut sub as_pdf { my ($self) = @_; my ($str) = $self->{'val'}; if ($str =~ m/[^\n\r\t\b\f\040-\176\200-\377]/oi) { $str =~ s/(.)/sprintf("%02X", ord($1))/oge; return "<$str>"; } else { $str =~ s/([\n\r\t\b\f\\()])/\\$out_trans{$1}/ogi; return "($str)"; } } =head2 $s->outobjdeep Outputs the string in PDF format, complete with necessary conversions =cut sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; $fh->print($self->as_pdf ($pdf)); } =head2 $s->copy($inpdf, $res, $unique, $outpdf, %opts) Copies an object. See Text::PDF::Objind::Copy() for details =cut sub copy { my ($self, $inpdf, $res, $unique, $outpdf, %opts) = @_; my ($i); $res = $self->SUPER::copy($inpdf, $res, $unique, $outpdf, %opts); $res->{'val'} = $self->{'val'}; $res->{' realised'} = 1; $res; } Text-PDF-0.31/lib/Text/PDF/SFont.pm0000755000175000017500000002065312750671061015106 0ustar bobhbobhpackage Text::PDF::SFont; # use strict; use vars qw(@ISA %widths @encodings); @ISA = qw(Text::PDF::Dict); use Text::PDF::Utils; use Compress::Zlib; # no warnings qw(uninitialized); =head1 NAME Text::PDF::SFont - PDF Standard inbuilt font resource object. Inherits from L =head1 METHODS =head2 Text::PDF::SFont->new($parent, $name, $pdfname) Creates a new font object with given parent and name. The name must be from one of the core 14 base fonts included with PDF. These are: Courier, Courier-Bold, Courier-Oblique, Courier-BoldOblique Times-Roman, Times-Bold, Times-Italic, Times-BoldItalic Helvetica, Helvetica-Bold, Helvetica-Oblique, Helvetica-BoldOblique Symbol, ZapfDingbats The $pdfname is the name that this particular font object will be referenced by throughout the PDF file. If you want to play silly games with naming, then you can write the code to do it! All fonts in this system are full PDF objects. =head1 BUGS Currently no width support for Symbol or ZapfDingbats, I haven't got my head around the AFMs yet. MacExpertEncoding not supported yet (I don't have the width info for any of the fonts) =cut BEGIN { @encodings = ('WinAnsiEncoding', 'MacRomanEncoding'); %enc_map = ( 'WinAnsiEncoding' => [0 .. 126, 128, 160, 128, 145,134, 140, 131, 129, 130, 26, 139, 151, 136, 150, 128, 153, 128, 128, 143, 144, 141, 142, 128, 133, 132, 31, 146, 157, 137, 156, 128, 158, 152, 32, 161 .. 255], 'MacRomanEncoding' => [0 .. 127, 196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 129, 176, 162, 163, 128, 182, 223, 174, 169, 146, 180, 168, 0, 198, 216, 0, 177, 0, 0, 165, 181, 0, 0, 0, 0, 0, 186, 169, 0, 230, 248, 192, 161, 172, 0, 134, 0, 0, 171, 187, 131, 32, 192, 195, 213, 150, 156, 133, 132, 141 .. 144, 247, 0, 255, 152, 135, 164, 136, 137, 147, 148, 130, 183, 145, 140, 139, 194, 202, 200, 205, 206, 207, 204, 211, 212, 0, 210, 218, 219, 217, 154, 26, 31, 175, 24, 27, 30, 184, 28, 29, 25], 'PDFDocEncoding' => [0 .. 255], 'AdobeStandardEncoding' => [ 0 .. 38, 144, 40 .. 95, 143, 97 .. 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 161, 162, 163, 135, 165, 134, 167, 164, 39, 141, 171, 136, 137, 147, 148, 0, 133, 129, 130, 183, 0, 182, 128, 145, 140, 142, 187, 131, 139, 0, 191, 0, 96, 180, 26, 31,175, 24, 27, 168, 0, 30, 231, 0, 28, , 25, 29, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 198, 0, 170, 0, 0, 0, 0, 149, 216, 150, 186, 0, 0, 0, 0, 0, 230, 0, 0, 0, 154, 0, 0, 155, 248, 156, 223, 0, 0, 0, 0], ); } sub new { my ($class, $parent, $name, $pdfname, $encoding) = @_; my ($self) = $class->SUPER::new; return undef unless exists $width_data{$name}; $self->{'Type'} = PDFName("Font"); $self->{'Subtype'} = PDFName("Type1"); $self->{'BaseFont'} = PDFName($name); $self->{'Name'} = PDFName($pdfname); $self->{'Encoding'} = PDFName($encodings[$encoding-1]) if ($encoding); $parent->new_obj($self); $self; } =head2 $f->width($text) Returns the width of the text in em. =cut sub getBase { my ($self) = @_; unless (defined $widths{$self->{'BaseFont'}->val}) { @{$widths{$self->{'BaseFont'}->val}} = unpack("n*", uncompress(unpack("u", $width_data{$self->{'BaseFont'}->val}))); } $self; } sub width { my ($self, $text) = @_; my ($width); my ($str) = $self->{'BaseFont'}->val; my ($enc); $enc = $self->{'Encoding'}->val if defined $self->{'Encoding'}; $self->getBase; foreach (unpack("C*", $text)) { $width += $widths{$str}[(defined $enc and $enc ne "") ? $enc_map{$enc}[$_] : $_]; } $width / 1000; } =head2 $f->trim($text, $len) Trims the given text to the given length (in em) returning the trimmed text =cut sub trim { my ($self, $text, $len) = @_; my ($width, $i); my ($str) = $self->{'BaseFont'}->val; my ($enc); $enc = $self->{'Encoding'}->val if defined $self->{'Encoding'}; $self->getBase; $len *= 1000; foreach (unpack("C*", $text)) { $width += $widths{$str}[$enc ne "" ? $enc_map{$enc}[$_] : $_]; last if ($width > $len); $i++; } return substr($text, 0, $i); } =head2 $f->out_text($text) Acknowledges the text to be output for subsetting purposes, etc. =cut sub out_text { my ($self, $text) = @_; return PDFStr($text)->as_pdf; } BEGIN { %width_data = ( 'Courier' => <<'EOT', 8>)QC8"`-,$4,)TB9?_"%Q]`(*0`U#D_/ EOT 'Courier-Bold' => <<'EOT', 8>)QC8"`-,$4,)TB9?_"%Q]`(*0`U#D_/ EOT 'Courier-BoldOblique' => <<'EOT', 8>)QC8"`-,$4,)TB9?_"%Q]`(*0`U#D_/ EOT 'Courier-Oblique' => <<'EOT', 8>)QC8"`-,$4,)TB9?_"%Q]`(*0`U#D_/ EOT 'Helvetica' => <<'EOT', M>)R54#$.PC`,C.U.?4`E_I!/P)2E?0)+9Q:V/H"=/0]@[0_X0R362FQ]0"16 MSFY+4<5"3HJN@P$YQ!?33$L_[5>J+E`OPK]*<2G-E^^VJ=),+B\,_W`]K )A]92?@,9KULP EOT 'Helvetica-Bold' => <<'EOT', M>)R54"L6PC`0S&Y<#Y#70^02H&IZ!4PT!LPZ]]\G_08!YZY1HS]K=D_Z!NWH#Q(LLH7E:$AG_2P9I327F.5#[ M[&>[W;5H\S-R#.B%"+O@+MSFFZQ+V4>=JB7+0TYXH3.L7B[8!/=$?8'V$7Q[ G`V>J0/],!L`X.(?YFP70U:RV-7+N"LS[W?!2GW[0K6QN[`T)B%=1 EOT 'Helvetica-BoldOblique' => <<'EOT', M>)R54"L6PC`0S&Y<#Y#70^02H&IZ!4PT!LPZ]]\G_08!YZY1HS]K=D_Z!NWH#Q(LLH7E:$AG_2P9I327F.5#[ M[&>[W;5H\S-R#.B%"+O@+MSFFZQ+V4>=JB7+0TYXH3.L7B[8!/=$?8'V$7Q[ G`V>J0/],!L`X.(?YFP70U:RV-7+N"LS[W?!2GW[0K6QN[`T)B%=1 EOT 'Helvetica-Oblique' => <<'EOT', M>)R54#$.PC`,C.U.?4`E_I!/P)2E?0)+9Q:V/H"=/0]@[0_X0R362FQ]0"16 MSFY+4<5"3HJN@P$YQ!?33$L_[5>J+E`OPK]*<2G-E^^VJ=),+B\,_W`]K )A]92?@,9KULP EOT 'Times-Bold' => <<'EOT', M>)R54#$.PC`,C)T%Y0&9^`%Y0;<.+$C]`DO?DJ43>S<65AZ05W3F`960F#(Q M<78"+6(BI]B.[=@^&_/?H>X;YDD=[RA3MK-MR:LW95+T#-)?M?3VY%$G4JK[J*Q0$?E\IZ#\#:R. M3N`8`'3GK>P$]@'QAT#F6:# <<'EOT', M>)R54+L1@S`,E>2.`;Q$F,`5&8`-.!I/P0#NTS-`V@S`%-09@+O8D:#`K4YFPUJ^ MX$BX/TMT$VJK1.NTY40-'8E@\,IA7,JXV*N;6W1TJW<-XK(O`1[9(K/-)1[H MR@-4$H^F?7.;GG0V;6/)BE*W83OVNXNIB,HOJW*:'^G]5V#$O-RX@8J1"%'/ M)VAL@5XZ3,`3R[+<%?:?%>"OL2D&S($/PE>L2G:N0M-2X5_(U0.=")H7%,>> $ZZB'L0`` EOT 'Times-Italic' => <<'EOT', M>)R54#L.PC`,C9T-1E`W;N"9O0?H%5AZBAX@>T1$B5?^H6[639QU#Q.H94?X`U'!?[:5'I$+6T MHI8WV&=GLS>^4>O.PVS^KEW0!Y&_:@7S7&P*47[P@-/T")\N*YV&G[2V_1V\ M"CTB"8EILC2MA+;P'PK8^@O?8-_LC^IB9]5@1)SXHJI,D7<;4&3,7\1)9(J] %`8H]BK$` EOT 'Times-Roman' => <<'EOT', M>)R542L2PC`0S6Y<-1/'#2(S>`[0*V!Z"@Z`1\+D`%@$,I89=`=\916J@D'Q M=IMV6E#T3;9]N]NWGQCSWT/E'.8->Z"..KNVA;FHM^,@?APGD1\X%/M.H-YI M9F-NU/`2\VRT]]:V\C8GZ9\#6%*`V3NJ>^W':VWIOH9BROO(5:"(2ORBALF %/HNICN<` EOT 'Symbol' => <<'EOT', M>)RED*%/PV`0Q;_W#C-D1+(F5W;)1J40E4X>2#' M\1]RY)PY>%/N6"ENN>(/(SOYW:NN[4&YL\`/7G$I]16-JF>;+(/Z'H 3]4S^3R/20`@HD+%V_NJ_?N9DIP`` EOT 'ZapfDingbats' => <<'EOT', M>)RMD#M+`T$4A>>>TZ3TD4V,51K!3CO!!R$B@LTV%M:2*BFUL1!+"19:F2)9 MB"AIM)`$B6D$14%,D11:^@,DJ(46NT8$#_X"0>?CFPYOS1+L\$H^ MH$N/"2;1X"5/,,85[K$FZ^S@G3[Z^,`,:OA$A!!O]HIQI)#%&8<8%PFF.,I) M3BG.DOJLYAE6\(1[O&AW!.>,T6D,<8T;'.."TYS0:4_K4[105Z1AW#')09G7 M>WT1(N2FB'/`FF['/*M:T8H($"C_;3$GXN)+E*WL>JYGOOF_JP`>D4::NZCH M7Q%C6$83:S\_^D^VN,C(VGAF($NL\D#,,\]9!F*?ASRR+M>Y('+,J1XM%IQC 405DU2*[*#=[*C&Y5V6;I&TT<5J(` EOT ); } 1; Text-PDF-0.31/lib/Text/PDF/Dict.pm0000755000175000017500000002137112757400410014731 0ustar bobhbobhpackage Text::PDF::Dict; use strict; use vars qw(@ISA $mincache $tempbase $cr); # no warnings qw(uninitialized); use Text::PDF::Objind; @ISA = qw(Text::PDF::Objind); $cr = '(?:\015|\012|(?:\015\012))'; use Text::PDF::Filter; BEGIN { my $temp_dir = -d '/tmp' ? '/tmp' : $ENV{TMP} || $ENV{TEMP}; $tempbase = sprintf("%s/%d-%d-0000", $temp_dir, $$, time()); $mincache = 32768; } =head1 NAME Text::PDF::Dict - PDF Dictionaries and Streams. Inherits from L =head1 INSTANCE VARIABLES There are various special instance variables which are used to look after, particularly, streams. Each begins with a space: =over =item stream Holds the stream contents for output =item streamfile Holds the stream contents in an external file rather than in memory. This is not the same as a PDF file stream. The data is stored in its unfiltered form. =item streamloc If both ' stream' and ' streamfile' are empty, this indicates where in the source PDF the stream starts. =back =head1 METHODS =cut sub new { my ($class, @opts) = @_; my ($self); $class = ref $class if ref $class; $self = $class->SUPER::new(@_); $self->{' realised'} = 1; return $self; } =head2 $d->outobjdeep($fh) Outputs the contents of the dictionary to a PDF file. This is a recursive call. It also outputs a stream if the dictionary has a stream element. If this occurs then this method will calculate the length of the stream and insert it into the stream's dictionary. =cut sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; my ($key, $val, $f, @filts); my ($loc, $str, %specs, $len); if (defined $self->{' stream'} or defined $self->{' streamfile'} or defined $self->{' streamloc'}) { if ($self->{'Filter'} || !defined $self->{' stream'}) { $self->{'Length'} = Text::PDF::Number->new(0) unless (defined $self->{'Length'}); $pdf->new_obj($self->{'Length'}) unless ($self->{'Length'}->is_obj($pdf)); # $pdf->out_obj($self->{'Length'}); } else { $self->{'Length'} = Text::PDF::Number->new(length($self->{' stream'}) + ($self->{' stream'} =~ m/$cr$/o ? 0 : 1)); } } $fh->print("<<\n"); foreach ('Type', 'Subtype') { $specs{$_} = 1; if (defined $self->{$_}) { $fh->print("/$_ "); $self->{$_}->outobj($fh, $pdf, %opts); $fh->print("\n"); } } foreach $key (sort {$a cmp $b} keys %{$self}) { next if ($key =~ m/^[\s\-]/o || $specs{$key}); $val = $self->{$key}; next if ($val eq ''); $key = Text::PDF::Name::string_to_name ($key, $pdf); $fh->print("/$key "); $val->outobj($fh, $pdf, %opts); $fh->print("\n"); } $fh->print('>>'); #now handle the stream (if any) if (defined $self->{' streamloc'} && !defined $self->{' stream'}) { # read a stream if infile $loc = $fh->tell; $self->read_stream; $fh->seek($loc, 0); } if (!$self->{' nofilt'} && (defined $self->{' stream'} || defined $self->{' streamfile'}) && defined $self->{'Filter'}) { my ($hasflate) = -1; my ($temp, $i, $temp1, @filtlist); if (ref($self->{'Filter'}) eq 'Text::PDF::Name') { push(@filtlist, $self->{'Filter'}->val); } else { for ($i = 0; $i < scalar @{$self->{'Filter'}{' val'}}; $i++) { push (@filtlist, $self->{'Filter'}{' val'}[$i]->val); } } foreach $temp (@filtlist) { if ($temp eq 'LZWDecode') # hack to get around LZW patent { if ($hasflate < -1) { $hasflate = $i; next; } $temp = 'FlateDecode'; $self->{'Filter'}{' val'}[$i]{'val'} = $temp; # !!! } elsif ($temp eq 'FlateDecode') { $hasflate = -2; } $temp1 = "Text::PDF::$temp"; push (@filts, $temp1->new); } splice(@{$self->{'Filter'}{' val'}}, $hasflate, 1) if ($hasflate > -1); } if (defined $self->{' stream'}) { $fh->print("\nstream\n"); $loc = $fh->tell; $str = $self->{' stream'}; unless ($self->{' nofilt'}) { foreach $f (reverse @filts) { $str = $f->outfilt($str, 1); } } $fh->print($str); if (@filts > 0) { $len = $fh->tell - $loc + 1; if ($self->{'Length'}{'val'} != $len) { $self->{'Length'}{'val'} = $len; $pdf->out_obj($self->{'Length'}) if ($self->{'Length'}->is_obj($pdf)); } } $fh->print("\n") unless ($str =~ m/$cr$/o); $fh->print("endstream"); # $self->{'Length'}->outobjdeep($fh); } elsif (defined $self->{' streamfile'}) { open(DICTFH, $self->{' streamfile'}) || die "Unable to open $self->{' streamfile'}"; binmode DICTFH; $fh->print("\nstream\n"); $loc = $fh->tell; while (read(DICTFH, $str, 4096)) { unless ($self->{' nofilt'}) { foreach $f (reverse @filts) { $str = $f->outfilt($str, 0); } } $fh->print($str); } close(DICTFH); unless ($self->{' nofilt'}) { $str = ''; foreach $f (reverse @filts) { $str = $f->outfilt($str, 1); } $fh->print($str); } $len = $fh->tell - $loc + 1; if ($self->{'Length'}{'val'} != $len) { $self->{'Length'}{'val'} = $len; $pdf->out_obj($self->{'Length'}) if ($self->{'Length'}->is_obj($pdf)); } $fh->print("\n") unless ($str =~ m/$cr$/o); $fh->print("endstream\n"); # $self->{'Length'}->outobjdeep($fh); } } =head2 $d->read_stream($force_memory) Reads in a stream from a PDF file. If the stream is greater than C (defaults to 32768) bytes to be stored, then the default action is to create a file for it somewhere and to use that file as a data cache. If $force_memory is set, this caching will not occur and the data will all be stored in the $self->{' stream'} variable. =cut sub read_stream { my ($self, $force_memory) = @_; my ($fh) = $self->{' streamsrc'}; my (@filts, $f, $last, $i, $dat); my ($len) = $self->{'Length'}->val; $self->{' stream'} = ''; if (defined $self->{'Filter'}) { foreach $f ($self->{'Filter'}->elementsof) { my ($temp) = "Text::PDF::" . $f->val; push(@filts, $temp->new()); } } $last = 0; if (defined $self->{' streamfile'}) { unlink ($self->{' streamfile'}); $self->{' streamfile'} = undef; } seek ($fh, $self->{' streamloc'}, 0); for ($i = 0; $i < $len; $i += 4096) { if ($i + 4096 > $len) { $last = 1; read($fh, $dat, $len - $i); } else { read($fh, $dat, 4096); } foreach $f (@filts) { $dat = $f->infilt($dat, $last); } if (!$force_memory && !defined $self->{' streamfile'} && ((length($self->{' stream'}) * 2) > $mincache)) { open (DICTFH, ">$tempbase") || next; binmode DICTFH; $self->{' streamfile'} = $tempbase; $tempbase =~ s/-(\d+)$/"-" . ($1 + 1)/oe; # prepare for next use print DICTFH $self->{' stream'}; undef $self->{' stream'}; } if (defined $self->{' streamfile'}) { print DICTFH $dat; } else { $self->{' stream'} .= $dat; } } close DICTFH if (defined $self->{' streamfile'}); $self->{' nofilt'} = 0; $self; } =head2 $d->val Returns the dictionary, which is itself. =cut sub val { $_[0]; } =head2 $d->copy($inpdf, $res, $unique, $outpdf, %opts) Copies an object. See Text::PDF::Objind::Copy() for details =cut sub copy { my ($self, $inpdf, $res, $unique, $outpdf, %opts) = @_; my ($k, $path); $res = $self->SUPER::copy($inpdf, $res, $unique, $outpdf, %opts); $path = delete $opts{'path'}; foreach $k (keys %$self) { next if $self->dont_copy($k); next if defined $res->{$k}; if (UNIVERSAL::can($self->{$k}, "is_obj")) { if (grep {"$path/$k" =~ m|$_|} @{$opts{'clip'}}) { $res->{$k} = $self->{$k}; } else { $res->{$k} = $self->{$k}->realise->copy($inpdf, undef, $unique ? $unique + 1 : 0, $outpdf, %opts, 'path' => "$path/$k"); } } else { $res->{$k} = $self->{$k}; } } $res; } 1;Text-PDF-0.31/lib/Text/PDF/Number.pm0000755000175000017500000000072412750671061015302 0ustar bobhbobhpackage Text::PDF::Number; =head1 NAME Text::PDF::Number - Numbers in PDF. Inherits from L =head1 METHODS =cut use strict; use vars qw(@ISA); # no warnings qw(uninitialized); use Text::PDF::String; @ISA = qw(Text::PDF::String); =head2 $n->convert($str) Converts a string from PDF to internal, by doing nothing =cut sub convert { return $_[1]; } =head2 $n->as_pdf Converts a number to PDF format =cut sub as_pdf { $_[0]->{'val'}; } Text-PDF-0.31/lib/Text/PDF/Page.pm0000755000175000017500000000513212750671061014724 0ustar bobhbobhpackage Text::PDF::Page; use strict; use vars qw(@ISA); @ISA = qw(Text::PDF::Pages); # no warnings qw(uninitialized); use Text::PDF::Pages; use Text::PDF::Utils; =head1 NAME Text::PDF::Page - Represents a PDF page, inherits from L =head1 DESCRIPTION Represents a page of output in PDF. It also keeps track of the content stream, any resources (such as fonts) being switched, etc. Page inherits from Pages due to a number of shared methods. They are really structurally quite different. =head1 INSTANCE VARIABLES A page has various working variables: =over =item curstrm The currently open stream =back =head1 METHODS =head2 Text::PDF::Page->new($pdf, $parent, $index) Creates a new page based on a pages object (perhaps the root object). The page is also added to the parent at this point, so pages are ordered in a PDF document in the order in which they are created rather than in the order they are closed. Only the essential elements in the page dictionary are created here, all others are either optional or can be inherited. The optional index value indicates the index in the parent list that this page should be inserted (so that new pages need not be appended) =cut sub new { my ($class, $pdf, $parent, $index) = @_; my ($self) = {}; $class = ref $class if ref $class; $self = $class->SUPER::new($pdf, $parent); $self->{'Type'} = PDFName('Page'); delete $self->{'Count'}; delete $self->{'Kids'}; $parent->add_page($self, $index); $self; } =head2 $p->add($str) Adds the string to the currently active stream for this page. If no stream exists, then one is created and added to the list of streams for this page. The slightly cryptic name is an aim to keep it short given the number of times people are likely to have to type it. =cut sub add { my ($self, $str) = @_; my ($strm) = $self->{' curstrm'}; if (!defined $strm) { $strm = Text::PDF::Dict->new; foreach (@{$self->{' outto'}}) { $_->new_obj($strm); } $self->{'Contents'} = PDFArray() unless defined $self->{'Contents'}; unless (ref $self->{'Contents'} eq "Text::PDF::Array") { $self->{'Contents'} = PDFArray($self->{'Contents'}); } $self->{'Contents'}->add_elements($strm); $self->{' curstrm'} = $strm; } $strm->{' stream'} .= $str; $self; } =head2 $p->ship_out($pdf) Ships the page out to the given output file context =cut sub ship_out { my ($self, $pdf) = @_; $pdf->ship_out($self); if (defined $self->{'Contents'}) { $pdf->ship_out($self->{'Contents'}->elementsof); } $self; } Text-PDF-0.31/lib/Text/PDF/Objind.pm0000755000175000017500000001741612750671061015265 0ustar bobhbobhpackage Text::PDF::Objind; =head1 NAME Text::PDF::Objind - PDF indirect object reference. Also acts as an abstract superclass for all elements in a PDF file. =head1 INSTANCE VARIABLES Instance variables differ from content variables in that they all start with a space. =over =item parent For an object which is a reference to an object in some source, this holds the reference to the source object, so that should the reference have to be de-referenced, then we know where to go and get the info. =item objnum (R) The object number in the source (only for object references) =item objgen (R) The object generation in the source There are other instance variables which are used by the parent for file control. =item isfree This marks whether the object is in the free list and available for re-use as another object elsewhere in the file. =item nextfree Holds a direct reference to the next free object in the free list. =back =head1 METHODS =cut use strict; use vars qw(@inst %inst $uidc); # no warnings qw(uninitialized); # protected keys during emptying and copying, etc. @inst = qw(parent objnum objgen isfree nextfree uid); map {$inst{" $_"} = 1} @inst; $uidc = "pdfuid000"; =head2 Text::PDF::Objind->new() Creates a new indirect object =cut sub new { my ($class) = @_; my ($self) = {}; bless $self, ref $class || $class; } =head2 uid Returns a Unique id for this object, creating one if it didn't have one before =cut sub uid { $_[0]->{' uid'} || ($_[0]->{' uid'} = $uidc++); } =head2 $r->release Releases ALL of the memory used by this indirect object, and all of its component/child objects. This method is called automatically by 'Crelease>' (so you don't have to call it yourself). B, that it is important that this method get called at some point prior to the actual destruction of the object. Internally, PDF files have an enormous amount of cross-references and this causes circular references within our own internal data structures. Calling 'C' forces these circular references to be cleaned up and the entire internal data structure purged. B As part of the brute-force cleanup done here, this method will throw a warning message whenever unexpected key values are found within the C object. This is done to help ensure that unexpected and unfreed values are brought to your attention, so you can bug us to keep the module updated properly; otherwise the potential for memory leaks due to dangling circular references will exist. =cut sub release { my ($self, $force) = @_; my (@tofree); # delete stuff that we know we can, here if ($force) { foreach my $key (keys %{$self}) { push(@tofree,$self->{$key}); $self->{$key}=undef; delete($self->{$key}); } } else { @tofree = map { delete $self->{$_} } keys %{$self}; } while (my $item = shift @tofree) { my $ref = ref($item); if (UNIVERSAL::can($ref, 'release')) # $ref was $item { $item->release($force); } elsif ($ref eq 'ARRAY') { push( @tofree, @{$item} ); } elsif (UNIVERSAL::isa($ref, 'HASH')) { release($item, $force); } } # check that everything has gone - it better had! foreach my $key (keys %{$self}) { warn ref($self) . " still has '$key' key left after release.\n"; } } =head2 $r->val Returns the val of this object or reads the object and then returns its value. Note that all direct subclasses *must* make their own versions of this subroutine otherwise we could be in for a very deep loop! =cut sub val { my ($self) = @_; $self->{' parent'}->read_obj(@_)->val unless ($self->{' realised'}); } =head2 $r->realise Makes sure that the object is fully read in, etc. =cut sub realise { $_[0]->{' realised'} ? $_[0] : $_[0]->{' parent'}->read_obj(@_); } =head2 $r->outobjdeep($fh, $pdf) If you really want to output this object, then you must need to read it first. This also means that all direct subclasses must subclass this method or loop forever! =cut sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; $self->{' parent'}->read_obj($self)->outobjdeep($fh, $pdf) unless ($self->{' realised'}); } =head2 $r->outobj($fh) If this is a full object then outputs a reference to the object, otherwise calls outobjdeep to output the contents of the object at this point. =cut sub outobj { my ($self, $fh, $pdf, %opts) = @_; if (defined $pdf->{' objects'}{$self->uid}) { $fh->printf("%d %d R", @{$pdf->{' objects'}{$self->uid}}[0..1]); } else { $self->outobjdeep($fh, $pdf, %opts); } } =head2 $r->elementsof Abstract superclass function filler. Returns self here but should return something more useful if an array. =cut sub elementsof { my ($self) = @_; if ($self->{' realised'}) { return ($self); } else { return $self->{' parent'}->read_obj($self)->elementsof; } } =head2 $r->empty Empties all content from this object to free up memory or to be read to pass the object into the free list. Simplistically undefs all instance variables other than object number and generation. =cut sub empty { my ($self) = @_; my ($k); for $k (keys %$self) { undef $self->{$k} unless $self->dont_copy($k); } $self; } =head2 $r->merge($objind) This merges content information into an object reference place-holder. This occurs when an object reference is read before the object definition and the information in the read data needs to be merged into the object place-holder =cut sub merge { my ($self, $other) = @_; my ($k); for $k (keys %$other) { $self->{$k} = $other->{$k} unless $self->dont_copy($k); } $self->{' realised'} = 1; bless $self, ref($other); } =head2 $r->is_obj($pdf) Returns whether this object is a full object with its own object number or whether it is purely a sub-object. $pdf indicates which output file we are concerned that the object is an object in. =cut sub is_obj { defined $_[1]->{' objects'}{$_[0]->uid}; } =head2 $r->copy($inpdf, $res, $unique, $outpdf, %opts) Returns a new copy of this object. $inpdf gives the source pdf object for the object to be copied. $outpdf gives the target pdf for the object to be copied into. $outpdf may be undefined. $res may be defined in which case the object is copied into that object. $unique controls recursion. if $unique is non zero then new objects are always created and recursion always occurs. But each time recursion occurs, $unique is incremented. Thus is $unique starts with a negative value it is possible to stop the recursion at a certain depth. Of course for a positive value of $unique, recursion always occurs. If $unique is 0 then recursion only occurs if $outpdf is not the same as $inpdf. In this case, a cache is held in $outpdf to see whether a previous copy of the same object has been made. If so, then that previous copy is returned otherwise a new object is made and added to the cache and recursed into. Objects that are full objects with their own id numbers are correspondingly full objects in the output pdf. =cut sub copy { my ($self, $inpdf, $res, $unique, $outpdf, %opts) = @_; my ($k, $o); $outpdf = $inpdf unless $outpdf; $self->realise; unless (defined $res) { if ($outpdf eq $inpdf && !$unique) { return $self; } elsif (!$unique && defined $outpdf->{' copies'}{$self->uid}) { return $outpdf->{' copies'}{$self->uid}; } $res = {}; bless $res, ref($self); } if ($self->is_obj($inpdf) && ($unique || ($outpdf ne $inpdf && !defined $outpdf->{' copies'}{$self->uid}))) { $outpdf->new_obj($res); # $outpdf->{' copies'}{$self->uid} = $res unless ($unique); } $res; } sub dont_copy { return $inst{$_[1]}; } 1; Text-PDF-0.31/lib/Text/PDF/Array.pm0000755000175000017500000000540312750671061015127 0ustar bobhbobhpackage Text::PDF::Array; use strict; use vars qw(@ISA); # no warnings qw(uninitialized); use Text::PDF::Objind; @ISA = qw(Text::PDF::Objind); =head1 NAME Text::PDF::Array - Corresponds to a PDF array. Inherits from L =head1 INSTANCE VARIABLES This object is not an array but an associative array containing the array of elements. Thus, there are special instance variables for an array object, beginning with a space =over =item var Contains the actual array of elements =back =head1 METHODS =head2 PDF::Array->new($parent, @vals) Creates an array with the given storage parent and an optional list of values to initialise the array with. =cut sub new { my ($class, @vals) = @_; my ($self); $self->{' val'} = [@vals]; $self->{' realised'} = 1; bless $self, $class; } =head2 $a->outobjdeep($fh, $pdf) Outputs an array as a PDF array to the given filehandle. =cut sub outobjdeep { my ($self, $fh, $pdf, %opts) = @_; my ($obj); $fh->print("[ "); foreach $obj (@{$self->{' val'}}) { $obj->outobj($fh, $pdf, %opts); $fh->print(" "); } $fh->print("]"); } =head2 $a->removeobj($elem) Removes all occurrences of an element from an array. =cut sub removeobj { my ($self, $elem) = @_; $self->{' val'} = [grep($_ ne $elem, @{$self->{' val'}})]; } =head2 $a->elementsof Returns a list of all the elements in the array. Notice that this is not the array itself but the elements in the array. =cut sub elementsof { wantarray ? @{$_[0]->{' val'}} : scalar @{$_[0]->{' val'}}; } =head2 $a->add_elements Appends the given elements to the array. An element is only added if it is defined. =cut sub add_elements { my ($self) = shift; my ($e); foreach $e (@_) { push (@{$self->{' val'}}, $e) if defined $e; } $self; } =head2 $a->val Returns the value of the array, this is a reference to the actual array containing the elements. =cut sub val { $_[0]->{' val'}; } =head2 $d->copy($inpdf, $res, $unique, $outpdf, %opts) Copies an object. See Text::PDF::Objind::Copy() for details =cut sub copy { my ($self, $inpdf, $res, $unique, $outpdf, %opts) = @_; my ($i, $path); $res = $self->SUPER::copy($inpdf, $res, $unique, $outpdf, %opts); $res->{' val'} = []; $path = delete $opts{'path'}; for ($i = 0; $i < scalar @{$self->{' val'}}; $i++) { if (UNIVERSAL::can($self->{'val'}[$i], "is_obj") && !grep {"$path\[$i\]" =~ m|$_|} @{$opts{'clip'}}) { push (@{$res->{' val'}}, $self->{' val'}[$i]->realise->copy($inpdf, undef, $unique ? $unique + 1 : 0, $outpdf, %opts, 'path' => "$path\[$i\]")); } else { push (@{$res->{' val'}}, $self->{' val'}[$i]); } } $res->{' realised'} = 1; $res; } 1; Text-PDF-0.31/lib/Text/PDF.pm0000644000175000017500000000134312757400410014040 0ustar bobhbobhpackage Text::PDF; $VERSION = '0.31'; 1; =head1 NAME Text::PDF - Module for manipulating PDF files =head1 DESCRIPTION This module allows interaction with existing PDF files directly. It includes various tools including: pdfbklt - make booklets out of existing PDF files pdfrevert - remove edits from a PDF file pdfstamp - stamp text on each page of a PDF file various example programs are also included =head1 AUTHOR Martin Hosken Martin_Hosken@sil.org Copyright Martin Hosken 1999 and onwards No warranty or expression of effectiveness, least of all regarding anyone's safety, is implied in this software or documentation. =head2 COPYRIGHT This Perl Text::PDF module is licensed under the same terms as Perl itself. Text-PDF-0.31/scripts/0000755000175000017500000000000012757431131013071 5ustar bobhbobhText-PDF-0.31/scripts/pdfstamp0000755000175000017500000000542612752511217014643 0ustar bobhbobh#!/usr/bin/perl use 5.006; use strict; use Text::PDF::File; use Text::PDF::SFont; use Text::PDF::Utils; use Getopt::Std; our ($opt_t,$opt_f,$opt_p,$opt_s,$opt_l); getopts('f:l:ps:t:'); our $VERSION = 0.02; # MJPH 23-JUL-2001 Re-order to stamp on the top unless ((defined $ARGV[1] || $opt_p) && -f $ARGV[0]) { die <<'EOT'; pdfstamp [-f font] [-l locx,locy] [-s size] infile string Adds the given string to the infile .pdf file at the given location, font and size. -f font Font name from the standard fonts [Helvetica] -l locx,locy Location in points from bottom left of page [0,0] -p Use the page number as the string -s size Font size to print at [11] -t ttfile TrueType font file to use (instead of -f) EOT } require Text::PDF::TTFont if ($opt_t); $opt_f = 'Helvetica' unless $opt_f; $opt_s = 11 unless $opt_s; $opt_l =~ s/,\s*/ /o; $opt_l = "0 0" unless $opt_l; my ($pdf, $root, $pgs, $fpgins, $spgins, @pglist); $pdf = Text::PDF::File->open($ARGV[0], 1); $root = $pdf->{'Root'}->realise; $pgs = $root->{'Pages'}->realise; $fpgins = PDFDict(); $pdf->new_obj($fpgins); $spgins = PDFDict(); $pdf->new_obj($spgins); $fpgins->{' stream'} = "q"; $spgins->{' stream'} = "Q"; @pglist = proc_pages($pdf, $pgs); my $max = 0; foreach my $p (@pglist) { my $dict = $p->find_prop('Resources'); if (defined $dict && defined $dict->{'Font'}) { foreach my $k (keys %{$dict->{'Font'}}) { next unless $k =~ m/^ap([0-9]+)/o; my $val = $1; $max = $val if $val > $max; } } } $max++; my $font; if ($opt_t) { $font = Text::PDF::TTFont->new($pdf, $opt_t, "ap$max", -subset => 1) || die "Can't work with font $opt_t"; } else { $font = Text::PDF::SFont->new($pdf, $opt_f, "ap$max") || die "Can't create font $opt_f"; } my $stream = PDFDict(); $stream->{' stream'} = "BT 1 0 0 1 $opt_l Tm /ap$max $opt_s Tf " . $font->out_text($ARGV[1]) . " Tj ET"; $pdf->new_obj($stream); my $count = 1; foreach my $p (@pglist) { my ($s) = $stream; if ($opt_p) { $s = PDFDict(); $s->{' stream'} = "BT 1 0 0 1 $opt_l Tm /ap$max $opt_s Tf " . $font->out_text($count++) . " Tj ET"; $pdf->new_obj($s); } $p->add_font($font, $pdf); $p->{Contents} = PDFArray($fpgins, $p->{Contents}->elementsof, $spgins, $s); $pdf->out_obj($p); } $pdf->close_file; sub proc_pages { my ($pdf, $pgs) = @_; my ($pg, $pgref, @pglist, $pcount); foreach $pgref ($pgs->{'Kids'}->elementsof) { $pg = $pdf->read_obj($pgref); if ($pg->{'Type'}->val =~ m/^Pages$/oi) { push(@pglist, proc_pages($pdf, $pg)); } else { $pgref->{' pnum'} = $pcount++; push (@pglist, $pgref); } } (@pglist); } Text-PDF-0.31/scripts/pdfrevert0000755000175000017500000000174612750671061015031 0ustar bobhbobh#!/usr/bin/perl use Text::PDF::File; unless (defined $ARGV[0]) { die <<'EOT'; PDFREVERT infile Removes one layer of changes to a PDF file, trying to maximise the size of the output file (to account for linearised PDF). EOT } $cr = '\s*(?:\r|\n|(?:\r\n))'; $VERSION = "1.000"; # MJPH 6-NOV-1998 Original $f = Text::PDF::File->open($ARGV[0], 1); exit unless defined $f->{'Prev'}; # account for linearised pdf, maximise output for ($t = $f; defined $t->{' prev'}; $t = $t->{' prev'}) { $loc1 = $t->{'Prev'}->val; $loc = $loc1 if ($loc1 > $loc); } $fd = $f->{' INFILE'}; seek($fd, $loc, 0); $rest = ""; while ($len = read($fd, $dat, 1024)) { $len += length($rest); $_ = $rest . $dat; if (m/(?:\r|\n|(?:\r\n))%%EOF$cr/oi) { $loc += length($` . $&); last; } elsif (m/$cr(.*?)$/oi) { $rest = $1; $loc += $len - length($rest); } } if ($len != 0) { truncate($fd, $loc) || die "Can't truncate"; } Text-PDF-0.31/scripts/pdfbklt0000755000175000017500000004503612750671061014456 0ustar bobhbobh#!/usr/bin/perl use Text::PDF::File; use Text::PDF::Utils; use Getopt::Std; $version = "1.8"; # MJPH 17-AUG-2009 add -u gutter for -p 2 # $version = "1.7"; # MJPH 29-FEB-2008 add -g -p r, fix contents problem for empty pages # $version = "1.6"; # MJPH 13-JAN-2003 -s2b option supported # $version = "1.505"; # MJPH 3-AUG-2002 -s comma separated. Allow -ve values in -s # merge errors store something, at least! # $version = "1.504"; # MJPH 27-JUN-2002 Use CropBox over MediaBox # $version = "1.503"; # MJPH 19-FEB-2002 Fix -p1 positioning (again!) # $version = "1.502"; # MJPH 18-JUN-2001 Add support for -p4s # $version = "1.501"; # MJPH 2-MAY-2001 Correct positioning of -s;;; type pages # $version = "1.500"; # MJPH 26-JUL-2000 Correct positioning in some cases and add landscape sizes # $version = "1.401"; # MJPH 28-JUN-2000 Debug content lists and -r # $version = "1.4"; # MJPH 1-FEB-2000 Add -l and preset paper sizes # $version = "1.302"; # MJPH 11-DEC-1999 Make sure updated root output # $version = "1.302"; # MJPH 30-NOV-1999 Update to use Text::PDF # $version = "1.300"; # MJPH 15-JUN-1999 Remove outlines, etc. when making booklet # $version = "1.201"; # MJPH 29-DEC-1998 Add test in merge_dict for $v2 not present # $version = "1.200"; # MJPH 6-NOV-1998 Merging external resources and change -r # $version = "1.101"; # MJPH 13-OCT-1998 Debug resource merging not being output # $version = "1.100"; # MJPH 3-AUG-1998 Support new PDF library getopts("b:g:h:lp:qrs:u:"); if (!defined $ARGV[0]) { die <<"EOT"; PDFBKLT [-b num/size] [-l] [-p num] [-g num] [-q] [-r] [-s size] pdffile (c) M. Hosken. Version: $version Converts a PDF file into a booklet. It edits the pdffile to add the modifications at the end. -b num/size Specifies which page contains the output page size details [1] or gives the dimensions of the page in pts (x;y) or A4,ltr,lgl,A5 -g num[;offset] Makes up page groups, or signatures with num pages in each and offset pages in the first signature. num and offset must be multiples of 4 -l Flag to indicate linear scaling -p num Specifies the number of pages on the output page (1, 2, 4) [2] If 4, can be 4s to flip on short edge. Append r if right to left. -q Quiet (no on screen messages) -r Rotates the output (-p 2 rotates automatically, -r rotates back) -s size Specifies the the location of the actual text on a page: 2 half page right and left (big gutter) 2r half page always on right 2l half page always on left 2b dual half pages on right and left assumes last page opposite first 4 1/4 page right and left bottom (very big gutter) 4t 1/4 page right and left top (very big gutter) 4r/4l/4rt/4lt 1/4 page always on right or left bottom or top location as a single string: minx,miny,maxx,maxy in pts -u width Specify the gutter size (for 2 pages per output page only) EOT } %sizes=( 'a4' => '595;842', 'a4l' => '842;595', 'ltr' => '612;792', 'ltrl' => '792;612', 'lgl' => '612;1008', 'lgll' => '1008;612', ); $opt_u = 0 unless defined $opt_u; $opt_b = $sizes{lc($opt_b)} if defined $sizes{lc($opt_b)}; print "Reading file\n" unless $opt_q; $p = Text::PDF::File->open($ARGV[0], 1) || die "Can't open $ARGV[0]"; $r = $p->{'Root'}->realise; $pgs = $r->{'Pages'}->realise; $pgcount = $pgs->{'Count'}->val; foreach (qw(Outlines Dests Threads AcroForm PageLabels StructTreeRoot OpenAction PageMode)) { if (defined $r->{$_}) { delete $r->{$_}; $p->out_obj($r); } } $pgcount = 0; $rtl = ($opt_p =~ s/r$//o); proc_pages($pgs); if ($opt_s =~ m/b$/o) { for ($i = 0; $i < $pgcount; $i++) { my ($pnum) = ($i == 0) ? 0 : 2 * $i - 1; my ($ref) = $pglist[$pnum]->copy($p, undef, 1, $p, 'clip' => ["^/Contents", '^/Resources/[^/]+/.*']); my ($npnum) = ($i == 0) ? -1 : $pnum + 1; $r->{'Pages'}->add_page($ref, $npnum); if ($npnum == -1) { push (@pglist, $ref); $pglist[$npnum]{' pnum'} = $pgcount * 2; } else { splice(@pglist, $npnum, 0, $ref); $pglist[$npnum]{' pnum'} = $npnum; } $ref->{' pnum'} = $pnum; print STDERR '.' unless $opt_q; } $pgcount *= 2; # $p->append_file; # exit(1); } $opt_p = 2 unless defined $opt_p; $opt_r = !$opt_r if (($opt_p == 2 && $opt_s !~ /^2[rltb]*$/oi) || ($opt_p != 2 && $opt_s =~ /^2[rlt]*$/oi)); if ($opt_b =~ m/^([0-9]+)\;([0-9]+)/oi) { @pbox = (0, 0, $1, $2); } else { $opt_b = 1 unless defined $opt_b; $opt_b--; foreach $n ($pglist[$opt_b]->find_prop('MediaBox')->elementsof) { push(@pbox, $n->val); } } $fpgins = PDFDict(); $p->new_obj($fpgins); $spgins = PDFDict(); $p->new_obj($spgins); $fpgins->{' stream'} = "q"; $spgins->{' stream'} = "Q"; unless ($opt_q) { print "\nThere are $pgcount pages\n"; print "Page box (pt) = $pbox[0], $pbox[1], $pbox[2], $pbox[3]\n"; } if ($opt_g) { my ($num, $offset) = split(/[\s;,:]/, $opt_g); my ($i); $offset ||= $num; for ($i = $offset; $i < $pgcount; $i += $num) { push (@jobs, $i); } } push(@jobs, ($opt_p == 1) ? $pgcount : int(($pgcount + 3) / 4) * 4); if ($opt_p > 1) { for ($i = $pgcount; $i < int(($pgcount + 3) /4) * 4; $i++) { if ($rtl) { unshift (@pglist, undef); } else { push (@pglist, undef); } } } for ($jn = 0; $jn < scalar @jobs; $jn++) { my ($first) = $jn ? $jobs[$jn - 1] : 0; my ($last) = $jobs[$jn]; for ($i = 0; $i < ($last - $first) / $opt_p; $i++) { if ($opt_p == 1) { next if $i >= $pgcount; @pl = ($pglist[$i + $first]); $m = $i + 1; } elsif ($opt_p == 2) { @pl = ($pglist[$i + $first], $pglist[$last - $i - 1]); @pl = ($pl[1], $pl[0]) if ($i & 1); $m = ($first + $i + 1) . ", " . ($last - $i); } else # $opt_p == 4 { @pl = ($pglist[2 * $i + $first]); # do these in a special order with increasing difficulty of passing if() requirement push (@pl, $pglist[2 * $i + 1 + $first]) if (2 * $i + 1 + $first < $pgcount); push (@pl, $pglist[$last - 2 * $i - 2]) if ($last - 2 * $i - 2 < $pgcount); push (@pl, $pglist[$last - 2 * $i - 1]) if ($last - 2 * $i <= $pgcount); $m = (2 * $i + 1 + $first) . ", " . (2 * $i + 2 + $first) . ", " . ($last - 2 * $i - 1) . ", " . ($last - 2 * $i); } print "Merging " . $m . "\n" unless $opt_q; merge_pages(@pl); } } $p->append_file; sub proc_pages { my ($pgs) = @_; my ($pgref); foreach $pgref ($pgs->{'Kids'}->elementsof) { print STDERR "." unless $opt_q; $pgref->realise; if ($pgref->{'Type'}->val =~ m/^Pages$/oi) { proc_pages($pgref); } else { if ($rtl) { unshift (@pglist, $pgref); } else { push (@pglist, $pgref); } $pgref->{' pnum'} = $pgcount++; } } } sub merge_pages { my (@pr) = @_; my ($s, $j, $bp); $s = undef; for ($j = 0; $j <= $#pr; $j++) { my ($n, $is, $rl, $bt, $xs, $ys, $scale, $scalestr, $scalestrr, $id, $i); my (@slist, @s, $s1, $s2, $p2, $min, $k); next unless defined $pr[$j]; @prbox = (); @clipbox = (); foreach $n (($pr[$j]->find_prop('CropBox') || $pr[$j]->find_prop('MediaBox'))->elementsof) { push(@prbox, $n->val); } $is = 1; if ($opt_s =~ m/^(-?[0-9]+),(-?[0-9]+),(-?[0-9]+),(-?[0-9]+)/o) { @prbox = ($1, $2, $3, $4); } elsif ($opt_s =~ m/^([0-9])(.?)(.?)$/o) { $is = $1; $rl = lc($2); $bt = lc($3); # $rl = ($pr[$j]->{' pnum'} & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o); $rl = ($j & 1) ? "l" : "r" unless ($rl =~ m/[rl]/o); if ($rl eq "r") { $prbox[1] = $prbox[3] - (($prbox[3] - $prbox[1]) / $is); } elsif ($rl eq "l") { $prbox[3] = $prbox[1] + (($prbox[3] - $prbox[1]) / $is); } if ($bt eq "t") { $prbox[2] = $prbox[0] + (($prbox[2] - $prbox[0]) * 2 / $is); } elsif ($is == 4) { $prbox[0] = $prbox[2] - (($prbox[2] - $prbox[0]) * 2 / $is); } elsif ($opt_s =~ m/b$/o) { @clipbox = ($prbox[0], $prbox[1], $prbox[2] - $prbox[0], $prbox[3] - $prbox[1]); } } elsif ($opt_s) { die "Illegal -s value of $opt_s"; } $id = join(',', @prbox) . ",$opt_p"; if (!defined $scache{$id}) { @slist = (); $xs = ($pbox[2] - $pbox[0]) / ($prbox[2] - $prbox[0]); # $ys = ($pbox[3] - $pbox[1]) / ($prbox[3] - $prbox[1]); $ys = ($pbox[3] - $pbox[1] - $opt_u*2) / ($prbox[3] - $prbox[1]); $scale = ($prbox[3] - $prbox[1]) / ($prbox[2] - $prbox[0]); $rot = (($opt_p == 1 || $opt_p == 4) && $is == 2) || ($opt_p == 2 && $is != 2); if ($opt_l) { if ($xs < ($opt_p == 2 ? .5 : 1) * ($opt_r ? $scale * $scale : 1) * $ys) { $ys = $xs / ($opt_r ? $scale * $scale : 1) / ($opt_p == 2 ? .5 : 1); } else { $xs = $ys * ($opt_r ? $scale * $scale : 1) * ($opt_p == 2 ? .5 : 1); } } if ($opt_p == 1 && $is != 2) # portrait to portrait { $slist[0] = cm($xs, 0, 0, $ys, # $pbox[0] - ($xs * $prbox[0]), $pbox[1] - ($ys * $prbox[1])); .5 * ($pbox[2] + $pbox[0] - $xs * ($prbox[2] + $prbox[0])), .5 * ($pbox[3] + $pbox[1] - $ys * ($prbox[3] + $prbox[1])), @clipbox); } elsif ($opt_p == 1) # landscape on portrait to portrait { $slist[0] = cm(0, -$scale * $ys, $xs / $scale, 0, $pbox[0] - $xs * $prbox[1] / $scale, $pbox[1] + $scale * $ys * $prbox[2], @clipbox); } elsif ($opt_p == 2 && $is != 2) # portrait source on portrait { @scalestr = (0, 0.5 * $ys * $scale, -$xs / $scale, 0, 0.5 * ($xs * ($prbox[3] + $prbox[1]) / $scale + $pbox[2])); $slist[0] = cm(@scalestr, # 0.5 * (-$ys * $scale * $prbox[0] + $pbox[1] + $pbox[3])); .25 * (3 * ($pbox[1] + $pbox[3]) - $ys * $scale * ($prbox[2] + $prbox[0])) + $opt_u*0.5, @clipbox); $slist[1] = cm(@scalestr, # -0.5 * $ys * $scale * $prbox[0] + $pbox[1]); .25 * (3 * $pbox[1] + $pbox[3] - $ys * $scale * ($prbox[2] + $prbox[0])) - $opt_u*0.5, @clipbox); } elsif ($opt_p == 2) # double page landscape on portrait { @scalestr = ($xs, 0, 0, 0.5 * $ys, .5 * ($pbox[2] - $xs * ($prbox[2] - $prbox[0]))); # -$xs * $prbox[0] + $pbox[0]); # $slist[0] = cm(@scalestr, 0.5 * (-$ys * $prbox[1] + $pbox[1] + $pbox[3]), @clipbox); # $slist[1] = cm(@scalestr, -0.5 * $ys * $prbox[1] + $pbox[1], @clipbox); $slist[0] = cm(@scalestr, .25 * (3 * $pbox[3] + $pbox[1] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox); $slist[1] = cm(@scalestr, .25 * (3 * $pbox[1] + $pbox[3] - $ys * ($prbox[3] - $prbox[1])) - $prbox[1], @clipbox); } elsif ($opt_p == 4 && $is == 1) # true portrait { $a = .5 * $xs; $b = .5 * $ys; $slist[0] = cm($a, 0, 0, $b, -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + $pbox[1], @clipbox); $slist[2] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + $pbox[3], @clipbox); if ($opt_p =~ /s/o) { $slist[1] = cm($a, 0, 0, $b, -$a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), -$b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); $slist[3] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + 0.5 * ($pbox[0] + $pbox[2]), $b * $prbox[1] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); } else { $slist[1] = cm(-$a, 0, 0, -$b, $a * $prbox[0] + $pbox[2], $b * $prbox[1] + $pbox[3], @clipbox); $slist[3] = cm($a, 0, 0, $b, -$a * $prbox[0] + $pbox[0], -$b * $prbox[1] + $pbox[1], @clipbox); } } elsif ($opt_p == 4) { $a = .5 * $ys * $scale; $b = .5 * $xs / $scale; $slist[0] = cm(0, -$a, $b, 0, -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + $pbox[1], @clipbox); $slist[2] = cm(0, $a, -$b, 0, $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + $pbox[3], @clipbox); if ($opt_p =~ /s/o) { $slist[1] = cm(0, -$a, $b, 0, -$b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), $a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); $slist[3] = cm(0, $a, -$b, 0, $b * $prbox[1] + 0.5 * ($pbox[0] + $pbox[2]), -$a * $prbox[2] + 0.5 * ($pbox[1] + $pbox[3]), @clipbox); } else { $slist[1] = cm(0, $a, -$b, 0, $b * $prbox[1] + $pbox[2], -$a * $prbox[2] + $pbox[3], @clipbox); $slist[3] = cm(0, -$a, $b, 0, -$b * $prbox[1] + $pbox[0], $a * $prbox[2] + $pbox[1], @clipbox); } } $scache{$id} = [@slist]; } @s = $pr[$j]->{'Contents'}->elementsof if (defined $pr[$j]->{'Contents'}); if (!defined $s) { $min = 100000; for ($k = 0; $k <= $#pr; $k++) { next unless defined $pr[$k]; if ($pr[$k]->{' pnum'} < $min) { $bp = $pr[$k]; $min = $pr[$k]->{' pnum'}; } } $s = PDFArray(); $bp->{' Contents'} = $p->new_obj($s); $bp->{'Rotate'} = PDFNum(90) if ($opt_r); } next unless defined $pr[$j]; $s->add_elements($fpgins) unless $j == $#pr; $s->add_elements($scache{$id}[$j]) unless ($opt_h == 17 && $j == $#pr); $s->add_elements(@s); $s->add_elements($spgins) unless $j == $#pr; if ($pr[$j] ne $bp) { $pr2 = $pr[$j]; $s1 = $bp->find_prop('Resources'); $s2 = $pr2->find_prop('Resources'); $bp->{'Resources'} = merge_dict($s1, $s2) unless ($s1 eq $s2); $p->free_obj($pr2); while (defined $pr2->{'Parent'}) { $temp = $pr2->{'Parent'}; $temp->{'Kids'}->removeobj($pr2) if ($pr2->{'Type'}{'val'} eq 'Page' || $pr2->{'Kids'}->elementsof <= 0); $temp->{'Count'}{'val'}--; $pr2 = $temp; if ($pr2->{'Kids'}->elementsof <= 0) { print "Killing a tree! $pr2->{'num'}\n" unless $opt_q; $p->free_obj($pr2); } else { $p->out_obj($pr2); } } } } return 1 unless defined $bp; $bp->{'Contents'} = delete $bp->{' Contents'}; foreach (qw(Annots Thumb Beeds CropBox)) { delete $bp->{$_} if defined $bp->{$_}; } $bp->bbox(@pbox); $p->out_obj($bp); } sub merge_dict { my ($p1, $p2) = @_; return $p1 if (ref $p1 eq "Text::PDF::Objind" && ref $p2 eq "Text::PDF::Objind" && $p->{' objects'}{$p1->uid}[0] eq $p->{' objects'}{$p2->uid}[0]); # $p1 = $p->read_obj($p1) if (ref $p1 eq "Text::PDF::Objind"); # $p2 = $p->read_obj($p2) if (ref $p2 eq "Text::PDF::Objind"); $p1->realise; $p2->realise; my ($k, $v1, $v2); my (@a1, @a2, %a1); if ($p1->{' isvisited'}) { warn "circular reference!"; return $p1; } $p1->{' isvisited'} = 1; $p2->{' isvisited'} = 1; foreach $k (keys %{$p1}) { next if ($k =~ m/^\s/oi); $v1 = $p1->{$k}; $v2 = $p2->{$k}; next if $v1 eq $v2 || !defined $v2; # !defined added v1.201 if (ref $v1 eq "Text::PDF::Objind" || ref $v2 eq "Text::PDF::Objind") { $v1->realise if (ref $v1 eq "Text::PDF::Objind"); $v2->realise if (ref $v2 eq "Text::PDF::Objind"); } next unless defined $v2; # assume $v1 & $v2 are of the same type if (ref $v1 eq "Text::PDF::Array") { # merge contents of array uniquely (the array is a set) @a1 = $v1->elementsof; @a2 = $v2->elementsof; map { $a1{$_} = 1; } @a1; push (@a1, grep (!$a1{$_}, @a2)); $p1->{$k} = PDFArray(@a1); } elsif (ref $v1 eq "Text::PDF::Dict") # { $p1->{$k} = merge_dict($v1->val, $v2->val); } { $p1->{$k} = merge_dict($v1, $v2); } elsif ($v1->val ne $v2->val) { warn "Inconsistent dictionaries at $k with " . $v1->val . " and " . $v2->val; $p1->{$k} = $v1; } } foreach $k (grep (!defined $p1->{$_} && $_ !~ m/^\s/oi, keys %{$p2})) { $p1->{$k} = $p2->{$k}; } $p->out_obj($p1) if $p1->is_obj($p); delete $p1->{' isvisited'}; delete $p2->{' isvisited'}; return $p1; } sub cm { my (@a) = @_; my ($res, $r, $str); foreach $r (@a) { $r = int($r) if (abs($r - int($r)) < 1e-6); } $str = "$a[6] $a[7] $a[8] $a[9] re W n" if (defined $a[6]); return undef if ($a[0] == 1 && $a[1] == 0 && $a[2] == 0 && $a[3] == 1 && $a[4] == 0 && $a[5] == 0 && $str eq ''); $res = PDFDict(); $p->new_obj($res); $res->{' stream'} = "$a[0] $a[1] $a[2] $a[3] $a[4] $a[5] cm $str"; $res; } sub copy_page { my ($page) = @_; return undef unless $page; my ($res) = $page->copy; $p->new_obj($res); $res; } Text-PDF-0.31/Changes0000644000175000017500000000255012757430742012706 0ustar bobhbobh0.31 2016-08-24 * Sort PDF dictionary keys for consistent output * Bug fixes * TTF Font objects had null BaseFont (broken at commit afd5b9a) * [rt.cpan.org] #110854: Fix spelling error in manpage 0.30 2016-08-17 * Source repo moved from Subversion to Github * Add -p to pdfstamp * Add -g, -p, -u to pdfbklt * Bug fixes * Wasn't installing on Windows Perl 5.22 and up * [rt.cpan.org] #116492: make install fails under Strawberry Perl * [rt.cpan.org] #110855: Fix pod2man errors * [rt.cpan.org] #86452: Parse error reading array * [rt.cpan.org] #78351: Invalid version format (non-numeric data) * [rt.cpan.org] #41085: ASCII85 decode broken * [rt.cpan.org] #35871: lzw compression fixup * [rt.cpan.org] #32210: pdfstamp patch for use strict * [rt.cpan.org] #31353: adding first test * fix Dict::read_stream() to write long streams to file as intended * fix LZWDecode to handle streams longer than 4096 bytes * close INFILE at start of release() * Fix Dict to allow single stream filters rather than requiring an array 0.29a 2006-09-07 * Bug fixes * [rt.cpan.org] #18574: "make install" doesn't install code 0.29 2006-06-23 * Add Text::PDF.pm; take version from this file rather than from Text::PDF::File.pm * Add debian packaging instructions Some previous changes documented in lib/Text/PDF/File.pm and lib/Text/PDF/changes.old Text-PDF-0.31/Makefile.PL0000755000175000017500000000377512750677544013410 0ustar bobhbobhuse ExtUtils::MakeMaker; use Getopt::Std; getopts('d:rv:'); $opt_v ||= 1; @scripts = grep {-f } glob("scripts/*"); # incantation to enable MY::pm_to_blib later on if ($^O eq 'MSWin32') { @extras = (dist => { 'TO_UNIX' => 'perl -Mtounix -e "tounix(\"$(DISTVNAME)\")"' }); } if ($^O eq 'linux') { *MY::postamble = sub { my ($self) = @_; my ($res); my ($package) = lc($self->{'NAME'}); my ($pversion) = $self->{'VERSION'}; my ($sha) = `git rev-parse --short HEAD`; my ($sign) = '--auto-debsign' if ($opt_r); my ($fpackage); $sha =~ s/\s//g; $package =~ s/::/-/g; $package = "lib${package}-perl"; $pversion .= "+$sha" unless ($opt_r); $fpackage = "$package-$pversion"; $res = <<"EOT"; deb-base: dist rm -fr $self->{'DISTVNAME'} rm -fr $fpackage tar xvzf $self->{'DISTVNAME'}.tar.gz mv $self->{'DISTVNAME'} $fpackage tar cfz "${package}_$pversion.orig.tar.gz" $fpackage cp -a debian $fpackage cd $fpackage && find . -name .svn | xargs rm -rf # make deb builds an interim deb from svn source for release deb: deb-base EOT foreach $d (split(' ', $opt_d)) { $res .= <<"EOT"; mkdir -p dists/$d dch -D $d -v $pversion-$opt_v -m -b -c $fpackage/debian/changelog "Auto build from perl for $d" cd $fpackage && pdebuild --buildresult ../dists/$d -- --basetgz /var/cache/pbuilder/base-$d.tgz $pbuilderopts{$d} EOT } return $res; } } WriteMakefile ( NAME => "Text::PDF", VERSION_FROM => "lib/Text/PDF.pm", # VERSION => "0.30", EXE_FILES => \@scripts, AUTHOR => "martin_hosken\@sil.org", ABSTRACT => "PDF Manipulation and generation", PREREQ_PM => {'Compress::Zlib' => 0}, META_MERGE => { 'meta-spec' => { version => 2 }, resources => { repository => { type => 'git', url => 'https://github.com/silnrsi/text-pdf.git', web => 'https://github.com/silnrsi/text-pdf', } } }, @extras ); Text-PDF-0.31/LICENSE0000644000175000017500000002070612752511217012413 0ustar bobhbobh Artistic License 2.0 Copyright (c) 2000-2006, The Perl Foundation. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software. You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement. Definitions "Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package. "Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures. "You" and "your" means any person who would like to copy, distribute, or modify the Package. "Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version. "Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization. "Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees. "Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder. "Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder. "Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future. "Source" form means the source code, documentation source, and configuration files for the Package. "Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form. Permission for Use and Modification Without Distribution (1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version. Permissions for Redistribution of the Standard Version (2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package. (3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License. Distribution of Modified Versions of the Package as Source (4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following: (a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version. (b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version. (c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under (i) the Original License or (ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed. Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source (5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days after you become aware that the instructions are invalid, then you do not forfeit any of your rights under this license. (6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version. Aggregating or Linking the Package (7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation. (8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package. Items That are Not Considered Part of a Modified Version (9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license. General Provisions (10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license. (11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license. (12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder. (13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic License to you shall terminate on the date that such litigation is filed. (14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Text-PDF-0.31/META.json0000664000175000017500000000134012757431131013023 0ustar bobhbobh{ "abstract" : "PDF Manipulation and generation", "author" : [ "martin_hosken@sil.org" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921", "license" : [ "unknown" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Text-PDF", "no_index" : { "directory" : [ "t", "inc" ] }, "release_status" : "stable", "resources" : { "repository" : { "type" : "git", "url" : "https://github.com/silnrsi/text-pdf.git", "web" : "https://github.com/silnrsi/text-pdf" } }, "version" : "0.31" } Text-PDF-0.31/examples/0000755000175000017500000000000012757431131013220 5ustar bobhbobhText-PDF-0.31/examples/squ.pdf0000755000175000017500000000131012754625441014527 0ustar bobhbobh%PDF-1.2 %쏢 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [ 0 0 595 840 ] /Count 1 /Resources << /ProcSet [ /PDF ] >> /Kids [ 3 0 R ] >> endobj 3 0 obj << /Type /Page /Contents [ 4 0 R ] /Parent 2 0 R >> endobj 4 0 obj << /Length 5 0 R /Filter [ /FlateDecode ] >> stream xe 1 D{pl6sڏI!|xA[)R.SDUVMj}YlSd[l[dۊ-2iyv"$,nfEf rCiNA[4víPd+*wv-f-(2-V-( 2e endstream endobj 5 0 obj 182 endobj xref 0 6 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000184 00000 n 0000000251 00000 n 0000000510 00000 n trailer << /Size 6 /Root 1 0 R >> startxref 529 %%EOF Text-PDF-0.31/examples/pdfgetobj.pl0000755000175000017500000000164112754625441015534 0ustar bobhbobhuse strict; use Text::PDF::File; use Getopt::Std; our ($opt_g, $opt_n, $opt_o); getopt("g:n:o:"); unless (defined $ARGV[0]) { die <<'EOT'; PDFGETOBJ [-g gen] -n num [-o outfile] pdffile Gets the given object from the pdf file and unpacks it to either stdout or outfile. -g gen Generation number [0] -n num Object number -o outfile Output file EOT } my ($file, $offset, $res, $str); $file = Text::PDF::File->open("$ARGV[0]") || die "Unable to open $ARGV[0]"; $offset = $file->locate_obj($opt_n, $opt_g) || die "Can't find obj $opt_n $opt_g"; seek($file->{' INFILE'}, $offset, 0); ($res, $str) = $file->readval(""); if (defined $opt_o) { open(OUTFILE, ">$opt_o") || die "Unable to open $opt_o"; binmode OUTFILE; select OUTFILE; } if (defined $res->{' stream'}) { print $res->read_stream(1)->{' stream'}; } else { print $res->val; } close(OUTFILE) if defined $opt_o; Text-PDF-0.31/examples/pdflines.pl0000755000175000017500000000637612754625441015406 0ustar bobhbobh# # An example program which creates graph paper. Very simple, but shows the basics # page creation, etc. use Text::PDF::File; use Text::PDF::Page; use Text::PDF::Utils; use IO::File; use Getopt::Std; getopts('c:m:n:p:s:'); unless(defined $ARGV[0] && $opt_c) { die <<'EOT'; GRAPH [-M left,bottom,right,top] [-n num] [-p num] [-s size] -c config.dat outfile Generates graph paper as a PDF file to outfile. -c data file Configuration file -M num,num,num,num Margins in points [56,56,56,56] -n num Number of line blocks to fit or 0 for no flexibility -p num Only generate page number num -s size either one of (A4,ltr,lgl,A3,A5) or width,height Config file: \width pts Height in points of one line block \line pts [string] relative to bottom of block, PDF string (optional) \line ... EOT } %sizes = ( 'a3' => [840, 1190], 'a4' => [595, 840], 'a5' => [420, 595], 'ltr' => [612, 792], 'lgl' => [792, 1008], 'a3l' => [1190, 840], 'a4l' => [840, 595], 'a5l' => [595, 420], 'ltrl' => [792, 612], 'lgll' => [1008, 792] ); $opt_m = "56,56,56,56" unless $opt_m; $opt_s = 'A4' unless $opt_s; process($opt_c) || die "Can't process $opt_c"; if (defined $sizes{lc($opt_s)}) { @opt_s = @{$sizes{lc($opt_s)}}; } else { @opt_s = split(/,\s*/, $opt_s); } @opt_m = split(/,\s*/, $opt_m); $pdf = Text::PDF::File->new; $root = Text::PDF::Pages->new($pdf); $root->proc_set("PDF"); $root->bbox(0, 0, @opt_s); # Now pretend to make a simple font: # $font = Text::PDF::SFont->new($pdf, 'Helvetica', 'F0'); # $root->add_font($font); # Use same principle for other fonts. Could use $page->add_font($font) just as well. # OK Now put something on this exciting page! $height = $opt_s[1] - $opt_m[1] - $opt_m[3]; $farr = $opt_s[0] - $opt_m[2]; if (defined $opt_p) { $first = $opt_p; $last = $opt_p; } else { $first = 0; $last = scalar @widths; } for ($pcount = $first; $pcount < $last; $pcount++) { $page = Text::PDF::Page->new($pdf, $root); $width = $widths[$pcount]; if ($opt_n eq '0') { $gap = 0; } else { $opt_n = int($height / $width) unless $opt_n; $gap = ($height - $opt_n * $width) / ($opt_n - 1); } for ($y = $opt_m[1]; $y <= $height + $opt_m[3]; $y += $width + $gap) { foreach $l (@{$lines[$pcount]}) { $offy = $l->[0] + $y; $page->add(sprintf("%s %d %.2f m %d %.2f l S\n", $l->[1], $opt_m[0], $offy, $farr, $offy)); } } # $page->{' curstrm'}{'Filter'} = PDFArray(PDFName('FlateDecode')); } # Only now that something has been added can we mess with the content stream $pdf->out_file($ARGV[0]); sub process { my ($fname) = @_; my ($fh) = IO::File->new("< $fname") || return undef; my ($width, $pcount); $pcount = -1; while (<$fh>) { if (m/^\\width\s+([0-9.]+)/o) { $pcount++; $widths[$pcount] = $1; } elsif (m/^\\line\s+([0-9.]+)(?:\s+(.*?)\s*$)?/o) { my ($pos) = $1; my ($str) = $2 || '[] 0 d .25 w 0 g'; push (@{$lines[$pcount]}, [$pos, $str]); } } 1; } Text-PDF-0.31/examples/pdfaddpg.pl0000755000175000017500000000272012754625441015340 0ustar bobhbobh use Text::PDF::File; use Text::PDF::Utils; use Text::PDF::Page; use Getopt::Std; $version = "1.002"; # MJPH 10-DEC-1999 Fix page counts for pages objs. # $version = "1.001"; # MJPH 30-NOV-1999 Original getopts("b:h:p:qrs:"); if (!defined $ARGV[0]) { die <<"EOT"; PDFADDPG [-b num/size] [-p num] [-q] pdffile (c) M. Hosken. Version: $version Inserts a blank page of given or calculated size after the given page. The new information is appended to pdffile and can be reverted. -b num/size Specifies which page contains the output page size details or gives the dimensions of the page in pts (x,y). [inherited or 1] -p num Specifies the page number after which to insert [last page] -q Quiet (no on screen messages) EOT } $p = Text::PDF::File->open($ARGV[0], 1); # open file for appending $r = $p->read_obj($p->{'Root'}); # read the page root $pgs = $p->read_obj($r->{'Pages'}); # Get the pages tree $pgcount = $pgs->{'Count'}->val; # how many pages if ($opt_b =~ m/^([0-9]+)\;([0-9]+)/oi) # parse $opt_b making @pbox { @pbox = (0, 0, $1, $2); $opt_b = 0; } else { $opt_b = -1 unless defined $opt_b; } $newpage = Text::PDF::Page->new($p, $pgs, $opt_p); # now set the page's bounding box if it needs setting if ($opt_b != -1 || $newpage->find_prop('MediaBox') eq "") { $newpage->bbox(@pbox); } $p->append_file; # update appended file Text-PDF-0.31/examples/hello.pl0000755000175000017500000000220412754625441014667 0ustar bobhbobh# # An example program which prints Hello World! on a page # use Text::PDF::File; use Text::PDF::Page; # pulls in Pages use Text::PDF::Utils; # not strictly needed use Text::PDF::SFont; $pdf = Text::PDF::File->new; # Make up a new document $root = Text::PDF::Pages->new($pdf); # Make a page tree in the document $root->proc_set("PDF", "Text"); # Say that all pages have PDF and Text instructions $root->bbox(0, 0, 595, 840); # hardwired page size A4 (for this app.) for all pages $page = Text::PDF::Page->new($pdf, $root); # Make a new page in the tree $font = Text::PDF::SFont->new($pdf, 'Helvetica', 'F0'); # Make a new font in the document $root->add_font($font); # Tell all pages about the font $page->add("BT 1 0 0 1 250 600 Tm /F0 14 Tf (Hello World!) Tj ET"); # put some content on the page # $page->add(" BT 1 0 0 1 250 700 Tm /F0 14 Tf (Hello World line two!) Tj ET"); # $page->{' curstrm'}{'Filter'} = PDFArray(PDFName('FlateDecode')); # compress the page content $pdf->out_file($ARGV[0]); # output the document to a file # all done! Text-PDF-0.31/examples/CD.CFG0000755000175000017500000000016012754625441014035 0ustar bobhbobhlength = 20 thickness = .5 40, 66 w 40, 776 w 341, 66 sw 341, 776 nw 341, 421 nse Text-PDF-0.31/examples/squares.pdf0000755000175000017500000000203712754625441015411 0ustar bobhbobh%PDF-1.2 %쏢 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [ 0 0 595 840 ] /Count 1 /Resources << /ProcSet [ /PDF ] >> /Kids [ 3 0 R ] >> endobj 3 0 obj << /Type /Page /Contents [ 4 0 R ] /Parent 2 0 R >> endobj 4 0 obj << /Length 5 0 R /Filter [ /FlateDecode ] >> stream xei` Iv)`>o𯁫+>⃳_o7_}˼.X]ϱxs,T܎Qle17:+EdrK˹:v:v:fjcn----W\rw--v:v:fa1Fk֚[krEۅEۅEۅEۅEۅEۅEۅEۅjۮ]ov^ysy},σy\.u:CwJ]c $[oUoݳ(V؎nklg56떺R؂-!-ɖ-lgKQޭRج[Z#[F&rtMv!vA]]..hb څB:uubiŮӮ]]N.vv]: b7h7nnݠA!vvCMMb7i7nnݤI)v3_7? endstream endobj 5 0 obj 525 endobj xref 0 6 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000184 00000 n 0000000251 00000 n 0000000853 00000 n trailer << /Size 6 /Root 1 0 R >> startxref 872 %%EOF Text-PDF-0.31/examples/call_conf.txt0000755000175000017500000000010012755223124015672 0ustar bobhbobh\width 60 \line 0 \line 18 [2] 0 d \line 29.3 [2] 0 d \line 47.3Text-PDF-0.31/examples/boon_graph.pdf0000755000175000017500000000250212754625441016041 0ustar bobhbobh%PDF-1.2 %쏢 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [ 0 0 595 840 ] /Count 1 /Resources << /ProcSet [ /PDF ] >> /Kids [ 3 0 R ] >> endobj 3 0 obj << /Type /Page /Contents [ 4 0 R ] /Parent 2 0 R >> endobj 4 0 obj << /Length 5 0 R /Filter [ /FlateDecode ] >> stream xemH b"XH"6KP!)dSSHd!)mDe RPGӡ \BRRJJUQ R% JJJ5QM*1U0Ubj`jGa0j؆ARR#M܆AMFJ J-Q#ԠHAMvDRR'NJJ]QԉSSaIPԡICMGhIPSi@iҀҐ#@4 ( RPSi@ix_\F@iҀ q7A'ii$ )MGhΓ44IiBi҄#A4$M(ͩ4sS?{@? endstream endobj 5 0 obj 815 endobj xref 0 6 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000184 00000 n 0000000251 00000 n 0000001143 00000 n trailer << /Size 6 /Root 1 0 R >> startxref 1162 %%EOF Text-PDF-0.31/examples/pdfaddobj.pl0000755000175000017500000000165112754625441015506 0ustar bobhbobhuse Text::PDF::File; use Getopt::Std; getopts("g:m:n:"); unless(defined $opt_n && defined $ARGV[1]) { die <<'EOT'; pdfaddobj [-g gen] [-m num] -n num pdf_file data_file Adds the given file as object number given by -n to pdf_file. -g gen Generation number of -n to insert -m num Font hack. Lookup object -m and add a reference to -n as FontFile2 in that dictionary -n num Object number to insert/replace as EOT } $f = Text::PDF::File->open($ARGV[0], 1) || "Can't open $ARGV[0]"; $res = $f->read_objnum($opt_n, $opt_g); open(INFILE, $ARGV[1]) || die "Can't read $ARGV[1]"; binmode(INFILE); $res->{' stream'} = ""; while (read(INFILE, $dat, 4096)) { $res->{' stream'} .= $dat; } delete $res->{' nofilt'}; $res->{'Length1'} $f->out_obj($res); if (defined $opt_m) { $mres = $f->read_objnum($opt_m, 0); $mres->{'FontFile2'} = $res; $f->out_obj($mres); } $f->append_file; Text-PDF-0.31/examples/graph.pl0000755000175000017500000000413312754625441014670 0ustar bobhbobh# # An example program which creates graph paper. Very simple, but shows the basics # page creation, etc. use Text::PDF::File; use Text::PDF::Page; use Text::PDF::Utils; use Getopt::Std; getopts('d:g:p:s:'); unless(defined $ARGV[0]) { die <<'EOT'; GRAPH [-d size] [-g num] [-p num] [-s num] outfile Generates graph paper as a PDF file to outfile. -d size grid size in pts [8] -g percent percentage black [100] -p num primary (thick) lines every num lines [10] -s num secondary (somewhat thick) lines every num lines [5] EOT } $opt_d = 8 unless $opt_d; $opt_g = 100 unless $opt_g; $opt_g = 1. - $opt_g / 100.; $opt_p = 10 unless defined $opt_p; $opt_s = 5 unless defined $opt_s; $pdf = Text::PDF::File->new; $root = Text::PDF::Pages->new($pdf); $root->proc_set("PDF"); $root->bbox(0, 0, 595, 840); # hardwired page size A4 (for this app.) $page = Text::PDF::Page->new($pdf, $root); # Now pretend to make a simple font: # $font = Text::PDF::SFont->new($pdf, 'Helvetica', 'F0'); # $root->add_font($font); # Use same principle for other fonts. Could use $page->add_font($font) just as well. # OK Now put something on this exciting page! # assume 58 pt margin $max_x = int(479 / $opt_d) * $opt_d + 58; $max_y = int(724 / $opt_d) * $opt_d + 58; $page->add("$opt_g G "); $i = 0; $curx = 58; while ($curx <= 537) { if ($opt_p and $i % $opt_p == 0) { $width = 1; } elsif ($opt_s and $i % $opt_s == 0) { $width = .5; } else { $width = .25; } # No fancy interface for drawing. You create your own PDF code! $page->add("$width w $curx 58 m $curx $max_y l S\n"); $curx += $opt_d; $i++; } $i = 0; $cury = 58; while ($cury <= 782) { if ($opt_p and $i % $opt_p == 0) { $width = 1; } elsif ($opt_s and $i % $opt_s == 0) { $width = .5; } else { $width = .25; } $page->add("$width w 58 $cury m $max_x $cury l S\n"); $cury += $opt_d; $i++; } # Only now that something has been added can we mess with the content stream $page->{' curstrm'}{'Filter'} = PDFArray(PDFName('FlateDecode')); $pdf->out_file($ARGV[0]); Text-PDF-0.31/examples/pdfcrop.pl0000755000175000017500000000601512754625441015225 0ustar bobhbobhuse Text::PDF::File; use Text::PDF::Utils; use IO::File; use Getopt::Std; getopts('c:l:p:t:'); unless (-f $ARGV[0] && $opt_c) { die <<'EOT'; pdfcrop -c config.dat [-l length] [-t thickness] [-p num[,num...]] file Adds crop marks to a PDF file as specified in the configuartion file. Default values of arm length and thickness can be overridden on the command line and crop marks only added to certain pages. -c config.dat Configuration file [required] -l length Arm length in pts [default from config file] -p pagelist List of page numbers, comma separated -t thickness Arm width in pts [default from config file] The config file takes the following format: length = 36 thickness = .5 ;# or anything unrecognised is a comment 100, 100 sw 485, 100 se 765, 100 nw 765, 485 ne The n, s, e, w indicate which arms to display for each crop mark. Locations are in points. EOT } @opt_p = split(/\D\s*/, $opt_p) if ($opt_p); $content = make_content($opt_c, $opt_l, $opt_t); $pdf = Text::PDF::File->open($ARGV[0], 1); $root = $pdf->{'Root'}->realise; $pgs = $root->{'Pages'}->realise; $stream = PDFDict(); $stream->{' stream'} = $content; $stream->{'Filter'} = PDFArray(PDFName('FlateDecode')); $pdf->new_obj($stream); @pglist = proc_pages($pdf, $pgs); $j = 0; for ($i = 0; $i <= $#pglist; $i++) { next unless ($i == $opt_p[$j] || !defined $opt_p); $j++; $p = $pglist[$i]; $p->{'Contents'} = PDFArray($stream, $p->{'Contents'}->elementsof); $pdf->out_obj($p); } $pdf->close_file; sub proc_pages { my ($pdf, $pgs) = @_; my ($pg, $pgref, @pglist); foreach $pgref ($pgs->{'Kids'}->elementsof) { $pg = $pdf->read_obj($pgref); if ($pg->{'Type'}->val =~ m/^Pages$/oi) { push(@pglist, proc_pages($pdf, $pg)); } else { $pgref->{' pnum'} = $pcount++; push (@pglist, $pgref); } } (@pglist); } sub make_content { my ($config, $length, $thick) = @_; my ($fh) = IO::File->new("< $config") || die "Can't open $config"; my ($res); while (<$fh>) { if (m/^\s*length\s*=\s*([0-9.]+)/oi) { $length = $1 unless defined $length; } elsif (m/^\s*thickness\s*=\s*([0-9.]+)/oi) { $thick = $1 unless defined $thick; } if (s/\s*([0-9.]+)\s*,\s*([0-9.]+)\s+//o) { my ($x, $y) = ($1, $2); while (s/^([nsewtblr])\s*//oi) { my ($l, $t, $d) = ($length, $thick, lc($1)); my ($xn, $yn) = ($x, $y); # insert code for arm properties here if ($d eq 'n' || $d eq 't') { $yn += $l; } elsif ($d eq 's' || $d eq 'b') { $yn -= $l; } elsif ($d eq 'e' || $d eq 'r') { $xn += $l; } elsif ($d eq 'w' || $d eq 'l') { $xn -= $l; } $res .= "$t w $x $y m $xn $yn l S\n"; } } } $res; } __END__ :endofperl Text-PDF-0.31/examples/test.pdf0000755000175000017500000000130312754625441014700 0ustar bobhbobh%PDF-1.2 %쏢 1 0 obj << /Type /Catalog /Pages 2 0 R >> endobj 2 0 obj << /Type /Pages /MediaBox [ 0 0 595 840 ] /Count 1 /Resources << /ProcSet [ /PDF /Text ] /Font << /F0 4 0 R >> >> /Kids [ 3 0 R ] >> endobj 3 0 obj << /Type /Page /Contents [ 5 0 R ] /Parent 2 0 R >> endobj 4 0 obj << /Type /Font /Subtype /Type1 /Name /F0 /BaseFont /Helvetica >> endobj 5 0 obj << /Length 115 >> stream BT 1 0 0 1 250 600 Tm /F0 14 Tf (Hello World!) Tj ET BT 1 0 0 1 250 700 Tm /F0 14 Tf (Hello World line two!) Tj ET endstream endobj xref 0 6 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000212 00000 n 0000000279 00000 n 0000000359 00000 n trailer << /Size 6 /Root 1 0 R >> startxref 524 %%EOF Text-PDF-0.31/META.yml0000664000175000017500000000070112757431131012653 0ustar bobhbobh--- abstract: 'PDF Manipulation and generation' author: - martin_hosken@sil.org build_requires: {} dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.120921' license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Text-PDF no_index: directory: - t - inc resources: repository: https://github.com/silnrsi/text-pdf.git version: 0.31 Text-PDF-0.31/MANIFEST.SKIP0000755000175000017500000000026312752511217013303 0ustar bobhbobh\.\$\$\$ \.tmp \.bak CVS/ \.tar \.tgz \.old test/ misc/ \.cvsignore Makefile$ \.zip blib/ (^|/)\.git \.svn/ ~$ Makefile$ pm_to_blib debian/ \.tar\.gz$ \.par$ -stamp$ ^MYMETA\..*$