class Origami::PDF

Main class representing a PDF file and its inner contents. A PDF file contains a set of Revision.

Constants

AuthEvent
BaseVersion
CF
CFM
Catalog
Cert
ContactInfo
Count
Data

Class representing the Catalog Dictionary of a PDF file.

EncryptMetadata
ExtensionLevel
ID
Length
Location
Metadata
OpenAction
P
Pages
Parent
Prev
R
Reason
Rect
Reference
Root
SigFlags
Size
StmF
TransformParams
UR3
V
W
WC
WP
XFA
XRefStm

Attributes

header[RW]
revisions[RW]

Public Class Methods

create(output, options = {}) { |pdf| ... } click to toggle source

Creates a new PDF and saves it. If a block is passed, the PDF instance can be processed before saving.

# File lib/origami/pdf.rb, line 130
def create(output, options = {})
  pdf = PDF.new
  yield(pdf) if block_given?
  pdf.save(output, options)
end
Also aliased as: write
deserialize(filename) click to toggle source

Deserializes a PDF dump.

# File lib/origami/pdf.rb, line 140
def deserialize(filename)
  Zlib::GzipReader.open(filename) { |gz|
    pdf = Marshal.load(gz.read)
  }
  
  pdf
end
new(parser = nil) click to toggle source

Creates a new PDF instance.

parser

The Parser object creating the document. If none is specified, some default structures are automatically created to get a minimal working document.

# File lib/origami/pdf.rb, line 153
def initialize(parser = nil)
  @header = PDF::Header.new
  @revisions = []
  
  add_new_revision
  @revisions.first.trailer = Trailer.new

  if parser
    @parser = parser
  else
    init
  end
end
read(filename, options = {}) click to toggle source

Reads and parses a PDF file from disk.

# File lib/origami/pdf.rb, line 121
def read(filename, options = {})
  filename = File.expand_path(filename) if filename.is_a?(::String)
  PDF::LinearParser.new(options).parse(filename)
end
write(output, options = {})
Alias for: create

Public Instance Methods

<<(object) click to toggle source

Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.

object

The object to add.

# File lib/origami/pdf.rb, line 449
def <<(object)
  owner = object.pdf

  #
  # Does object belongs to another PDF ?
  #
  if owner and not owner.equal?(self)
    import object
  else
    add_to_revision(object, @revisions.last)
  end
end
Also aliased as: insert
Catalog() click to toggle source

Returns the current Catalog Dictionary.

# File lib/origami/catalog.rb, line 46
def Catalog
  cat = get_doc_attr(:Root)

  case cat
    when Catalog then
      cat
    when Dictionary then
      casted = Catalog.new(cat)
      casted.no, casted.generation = cat.no, cat.generation
      casted.set_indirect(true)
      casted.set_pdf(self)

      casted
    else
      raise InvalidPDFError, "Broken catalog"
  end
end
Catalog=(cat) click to toggle source

Sets the current Catalog Dictionary.

# File lib/origami/catalog.rb, line 67
def Catalog=(cat)
  #unless cat.is_a?(Catalog)
  #  raise TypeError, "Expected type Catalog, received #{cat.class}"
  #end
  cat = Catalog.new(cat) unless cat.is_a? Catalog
  
  if @revisions.last.trailer.Root
    delete_object(@revisions.last.trailer[:Root])
  end
  
  @revisions.last.trailer.Root = self << cat
end
add_fields(*fields) click to toggle source

Add a field to the Acrobat form.

field

The Field to add.

# File lib/origami/acroform.rb, line 51
def add_fields(*fields)
  raise TypeError, "Expected Field arguments" unless fields.all? { |f| f.is_a?(Field) }
  
  self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
  self.Catalog.AcroForm.Fields ||= []
  
  self.Catalog.AcroForm.Fields.concat(fields)
  fields.each do |field| field.set_indirect(true) end
  
  self
end
add_new_revision() click to toggle source

Ends the current Revision, and starts a new one.

# File lib/origami/pdf.rb, line 495
def add_new_revision
  
  root = @revisions.last.trailer[:Root] unless @revisions.empty?

  @revisions << Revision.new(self)
  @revisions.last.trailer = Trailer.new
  @revisions.last.trailer.Root = root

  self
end
add_to_revision(object, revision) click to toggle source

Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.

object

The object to add.

revision

The revision to add the object to.

# File lib/origami/pdf.rb, line 480
def add_to_revision(object, revision)
 
  object.set_indirect(true)
  object.set_pdf(self)
  
  object.no, object.generation = alloc_new_object_number if object.no == 0
  
  revision.body[object.reference] = object
  
  object.reference
end
alloc_new_object_number() click to toggle source

Returns a new number/generation for future object.

# File lib/origami/pdf.rb, line 631
def alloc_new_object_number
  no = 1

  # Deprecated number allocation policy (first available)
  #no = no + 1 while get_object(no)

  objset = self.indirect_objects
  self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
    objstm.each{|obj| objset << obj}
  end

  allocated = objset.collect{|obj| obj.no}.compact
  no = allocated.max + 1 unless allocated.empty?

  [ no, 0 ]
end
append_page(page = Page.new, *more) click to toggle source

Appends a page or list of pages to the end of the page tree.

# File lib/origami/page.rb, line 33
def append_page(page = Page.new, *more)
  raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode)
  
  pages = [ page ].concat(more).map! do |pg|
    if pg.pdf and pg.pdf != self
      # Page from another document must be exported.
      pg.export
    else
      pg
    end
  end
  
  treeroot = self.Catalog.Pages
  
  treeroot.Kids ||= [] #:nodoc:
  treeroot.Kids.concat(pages)
  treeroot.Count = treeroot.Kids.length
  
  pages.each do |page| 
    page.Parent = treeroot
  end
  
  self
end
append_subobj(root, objset, opts) click to toggle source
# File lib/origami/pdf.rb, line 397
def append_subobj(root, objset, opts)
  
  if objset.find{ |o| root.equal?(o) }.nil?
    objset << root unless opts[:only_keys]

    if root.is_a?(Dictionary)
      root.each_pair { |name, value|
        objset << name if opts[:only_keys]

        append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
        append_subobj(value, objset, opts)
      }
    elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
      root.each { |subobj| append_subobj(subobj, objset, opts) }
    end
  end
end
attach_file(path, options = {}) click to toggle source

Attachs an embedded file to the PDF.

path

The path to the file to attach.

options

A set of options to configure the attachment.

# File lib/origami/file.rb, line 35
def attach_file(path, options = {})

  #
  # Default options.
  #
  params = 
  {
    :Register => true,                      # Shall the file be registered in the name directory ?
    :EmbeddedName => nil,                   # The inner filename of the attachment.
    :Filter => :FlateDecode,                # The stream filter used to store data.
  }.update(options)

  if path.is_a? FileSpec
    filespec = path
    params[:EmbeddedName] ||= ''
  else
    if path.respond_to?(:read)
      fd = path
      params[:EmbeddedName] ||= ''
    else
      fd = File.open(File.expand_path(path), 'r').binmode
      params[:EmbeddedName] ||= File.basename(path)
    end
  
    fstream = EmbeddedFileStream.new

    if ''.respond_to? :force_encoding
      fstream.data = fd.read.force_encoding('binary') # 1.9
    else
      fstream.data = fd.read
    end

    fd.close

    fstream.setFilter(params[:Filter])
    filespec = FileSpec.new(:F => fstream)
  end

  name = params[:EmbeddedName]
  fspec = FileSpec.new.setType(:Filespec).setF(name.dup).setEF(
    filespec
  )
  
  register(
    Names::Root::EMBEDDEDFILES, 
    name.dup, 
    fspec
  ) if params[:Register] == true
  
  fspec
end
author() click to toggle source
# File lib/origami/metadata.rb, line 54
def author; get_document_info_field(:Author) end
create_acroform(*fields) click to toggle source

Creates a new AcroForm with specified fields.

# File lib/origami/acroform.rb, line 40
def create_acroform(*fields)
  acroform = self.Catalog.AcroForm ||= InteractiveForm.new.set_indirect(true)
  self.add_fields(*fields)

  acroform
end
create_metadata(info = {}) click to toggle source

Modifies or creates a metadata stream.

# File lib/origami/metadata.rb, line 96
    def create_metadata(info = {})
      skeleton = <<-XMP
<?packet begin="#{"\xef\xbb\xbf"}" id="W5M0MpCehiHzreSzNTczkc9d"?>
  <x:xmpmeta xmlns:x="adobe:ns:meta/">
    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
      <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
      </rdf:Description>
    </rdf:RDF>
  </x:xmpmeta>
<?xpacket end="w"?>
      XMP

      xml =
        if self.Catalog.Metadata.is_a?(Stream)
          self.Catalog.Metadata.data
        else
          skeleton
        end

      doc = REXML::Document.new(xml)
      desc = doc.elements['*/*/rdf:Description']
  
      info.each do |name, value|
        elt = REXML::Element.new "pdf:#{name}"
        elt.text = value

        desc.elements << elt
      end

      xml = ""; doc.write(xml, 3)

      if self.Catalog.Metadata.is_a?(Stream)
        self.Catalog.Metadata.data = xml
      else
        self.Catalog.Metadata = Stream.new(xml)
      end

      self.Catalog.Metadata
    end
create_xfa_form(xdp, *fields) click to toggle source
# File lib/origami/xfa.rb, line 32
def create_xfa_form(xdp, *fields)
  acroform = create_acroform(*fields)
  acroform.XFA = Stream.new(xdp, :Filter => :FlateDecode)

  acroform
end
creation_date() click to toggle source
# File lib/origami/metadata.rb, line 59
def creation_date; get_document_info_field(:CreationDate) end
creator() click to toggle source
# File lib/origami/metadata.rb, line 57
def creator; get_document_info_field(:Creator) end
decrypt(passwd = "") click to toggle source

Decrypts the current document (only RC4 40..128 bits).

passwd

The password to decrypt the document.

# File lib/origami/encryption.rb, line 59
def decrypt(passwd = "")

  unless self.is_encrypted?
    raise EncryptionError, "PDF is not encrypted"
  end
 
  encrypt_dict = get_doc_attr(:Encrypt)
  handler = Encryption::Standard::Dictionary.new(encrypt_dict.dup)

  unless handler.Filter == :Standard
    raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
  end

  case handler.V.to_i
    when 1,2 then str_algo = stm_algo = Encryption::ARC4
    when 4,5
      if handler[:CF].is_a?(Dictionary)
        cfs = handler[:CF]
        
        if handler[:StrF].is_a?(Name) and cfs[handler[:StrF]].is_a?(Dictionary)
          cfdict = cfs[handler[:StrF]]
          
          str_algo =
            if cfdict[:CFM] == :V2 then Encryption::ARC4
            elsif cfdict[:CFM] == :AESV2 then Encryption::AES
            elsif cfdict[:CFM] == :None then Encryption::Identity
            elsif cfdict[:CFM] == :AESV3 and handler.V.to_i == 5 then Encryption::AES
            else
              raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
            end
        else
          str_algo = Encryption::Identity
        end

        if handler[:StmF].is_a?(Name) and cfs[handler[:StmF]].is_a?(Dictionary)
          cfdict = cfs[handler[:StmF]]

          stm_algo =
            if cfdict[:CFM] == :V2 then Encryption::ARC4
            elsif cfdict[:CFM] == :AESV2 then Encryption::AES
            elsif cfdict[:CFM] == :None then Encryption::Identity
            elsif cfdict[:CFM] == :AESV3 and handler.V.to_i == 5 then Encryption::AES
            else
              raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
            end
        else
          stm_algo = Encryption::Identity
        end

      else
        str_algo = stm_algo = Encryption::Identity
      end

    else
      raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
  end
  
  doc_id = get_doc_attr(:ID)
  unless doc_id.is_a?(Array)
    raise EncryptionError, "Document ID was not found or is invalid" unless handler.V.to_i == 5
  else
    doc_id = doc_id.first
  end

  if handler.is_user_password?(passwd, doc_id)
    encryption_key = handler.compute_user_encryption_key(passwd, doc_id)
  elsif handler.is_owner_password?(passwd, doc_id)
    if handler.V.to_i < 5
      user_passwd = handler.retrieve_user_password(passwd)
      encryption_key = handler.compute_user_encryption_key(user_passwd, doc_id)
    else
      encryption_key = handler.compute_owner_encryption_key(passwd)
    end
  else
    raise EncryptionInvalidPasswordError
  end


  #self.extend(Encryption::EncryptedDocument)
  #self.encryption_dict = encrypt_dict
  #self.encryption_key = encryption_key
  #self.stm_algo = self.str_algo = algorithm

  encrypt_metadata = (handler.EncryptMetadata != false)

  self.extend(Encryption::EncryptedDocument)
  self.encryption_dict = handler
  self.encryption_key = encryption_key
  self.stm_algo,self.str_algo = stm_algo,str_algo
  
  #
  # Should be fixed to exclude only the active XRefStream
  #
  metadata = self.Catalog.Metadata

  self.indirect_objects.each do |indobj|
    encrypted_objects = []
    case indobj
      when String,Stream then encrypted_objects << indobj
      when Dictionary,Array then encrypted_objects |= indobj.strings_cache
    end

    encrypted_objects.each do |obj|

      case obj
        when String
          next if obj.equal?(encrypt_dict[:U]) or 
                  obj.equal?(encrypt_dict[:O]) or
                  obj.equal?(encrypt_dict[:UE]) or
                  obj.equal?(encrypt_dict[:OE]) or
                  obj.equal?(encrypt_dict[:Perms]) or
                  (obj.parent.is_a?(Signature::DigitalSignature) and obj.equal?(obj.parent[:Contents]))

          obj.extend(Encryption::EncryptedString) unless obj.is_a?(Encryption::EncryptedString)
          obj.encryption_handler = handler
          obj.encryption_key = encryption_key
          obj.algorithm = str_algo
          obj.decrypt!

        when Stream
          next if obj.is_a?(XRefStream) or (not encrypt_metadata and obj.equal?(metadata))
          obj.extend(Encryption::EncryptedStream) unless obj.is_a?(Encryption::EncryptedStream)
          obj.encryption_handler = handler
          obj.encryption_key = encryption_key
          obj.algorithm = stm_algo
      end
    end
  end

  self
end
delete_object(no, generation = 0) click to toggle source

Remove an object.

# File lib/origami/pdf.rb, line 533
def delete_object(no, generation = 0)
  
  case no
    when Reference
      target = no
    when ::Integer
      target = Reference.new(no, generation)
  else
    raise TypeError, "Invalid parameter type : #{no.class}" 
  end
  
  @revisions.each do |rev|
    rev.body.delete(target)
  end

end
delete_xrefstm(xrefstm) click to toggle source
# File lib/origami/xreftable.rb, line 34
def delete_xrefstm(xrefstm)
  prev = xrefstm.Prev
  delete_object(xrefstm.reference)

  if prev.is_a?(Integer) and (prev_stm = get_object_by_offset(prev)).is_a?(XRefStream)
    delete_xrefstm(prev_stm)
  end
end
delinearize!() click to toggle source

Tries to delinearize the document if it has been linearized. This operation is xrefs destructive, should be fixed in the future to merge tables.

# File lib/origami/linearization.rb, line 50
def delinearize!
  raise LinearizationError, 'Not a linearized document' unless is_linearized?
  
  #
  # Saves the first trailer.
  #
  prev_trailer = @revisions.first.trailer

  lin_dict = @revisions.first.objects.first
  hints = lin_dict[:H]
  
  #
  # Removes hint streams used by linearization.
  #
  if hints.is_a?(::Array)
    if hints.length > 0 and hints[0].is_a?(Integer)
      hint_stream = get_object_by_offset(hints[0])
      delete_object(hint_stream.reference) if hint_stream.is_a?(Stream)
    end

    if hints.length > 2 and hints[2].is_a?(Integer)
      overflow_stream = get_object_by_offset(hints[2])
      delete_object(overflow_stream.reference) if overflow_stream.is_a?(Stream)
    end
  end

  #
  # Update the trailer.
  #
  last_trailer = (@revisions.last.trailer ||= Trailer.new)

  last_trailer.dictionary ||= Dictionary.new
   
  if prev_trailer.has_dictionary?
    last_trailer.dictionary =
      last_trailer.dictionary.merge(prev_trailer.dictionary)
  else
    xrefstm = get_object_by_offset(last_trailer.startxref)
    raise LinearizationError, 
      'Cannot find trailer info while delinearizing document' unless xrefstm.is_a?(XRefStream)

    last_trailer.dictionary[:Root] = xrefstm[:Root]
    last_trailer.dictionary[:Encrypt] = xrefstm[:Encrypt]
    last_trailer.dictionary[:Info] = xrefstm[:Info]
    last_trailer.dictionary[:ID] = xrefstm[:ID]
  end

  #
  # Remove all xrefs.
  # Fix: Should be merged instead.
  #
  remove_xrefs

  #
  # Remove the linearization revision.
  #
  remove_revision(0)

  self
end
each_field(&b) click to toggle source

Iterates over each Acroform Field.

# File lib/origami/acroform.rb, line 77
def each_field(&b)
  if self.has_form?
    if self.Catalog.AcroForm.has_key?(:Fields)
      self.Catalog.AcroForm[:Fields].each {|field| b.call(field.solve)}
    end
  end
end
each_name(root, &b) click to toggle source
# File lib/origami/catalog.rb, line 159
def each_name(root, &b)
  namesroot = get_names_root(root)
  return if namesroot.nil?
   
  each_name_from_node(namesroot, [], &b)
  self
end
each_named_dest(&b) click to toggle source

Calls block for each named destination.

# File lib/origami/destinations.rb, line 40
def each_named_dest(&b)
  each_name(Names::Root::DESTS, &b) 
end
each_named_embedded_file(&b) click to toggle source

Calls block for each named embedded file.

# File lib/origami/file.rb, line 97
def each_named_embedded_file(&b)
  each_name(Names::Root::EMBEDDEDFILES, &b) 
end
each_named_page(&b) click to toggle source

Calls block for each named page.

# File lib/origami/page.rb, line 109
def each_named_page(&b)
  each_name(Names::Root::PAGES, &b) 
end
each_named_script(&b) click to toggle source

Calls block for each named JavaScript script.

# File lib/origami/actions.rb, line 40
def each_named_script(&b)
  each_name(Names::Root::JAVASCRIPT, &b) 
end
each_page(&b) click to toggle source

Iterate through each page, returns self.

# File lib/origami/page.rb, line 83
def each_page(&b)
  raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode)
 
   self.Catalog.Pages.each_page(&b)
   self
end
enable_usage_rights(cert, pkey, *rights) click to toggle source

Enable the document Usage Rights.

rights

list of rights defined in UsageRights::Rights

# File lib/origami/signature.rb, line 287
def enable_usage_rights(cert, pkey, *rights)
  
  unless Origami::OPTIONS[:use_openssl]
    fail "OpenSSL is not present or has been disabled."
  end
  
  signfield_size = lambda{|crt, key, ca|
    datatest = "abcdefghijklmnopqrstuvwxyz"
    OpenSSL::PKCS7.sign(crt, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128
  }
  
  #
  # Load key pair
  #
  key = pkey.is_a?(OpenSSL::PKey::RSA) ? pkey : OpenSSL::PKey::RSA.new(pkey)
  certificate = cert.is_a?(OpenSSL::X509::Certificate) ? cert : OpenSSL::X509::Certificate.new(cert)
  
  #
  # Forge digital signature dictionary
  #
  digsig = Signature::DigitalSignature.new.set_indirect(true)
  
  self.Catalog.AcroForm ||= InteractiveForm.new
  #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPENDONLY
  
  digsig.Type = :Sig #:nodoc:
  digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, []]) #:nodoc:
  digsig.Filter = Name.new("Adobe.PPKLite") #:nodoc:
  digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" #:nodoc:
  digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc:
  digsig.ByteRange = [0, 0, 0, 0] #:nodoc:
  
  sigref = Signature::Reference.new #:nodoc:
  sigref.Type = :SigRef #:nodoc:
  sigref.TransformMethod = :UR3 #:nodoc:
  sigref.Data = self.Catalog
  
  sigref.TransformParams = UsageRights::TransformParams.new
  sigref.TransformParams.P = true #:nodoc:
  sigref.TransformParams.Type = :TransformParams #:nodoc:
  sigref.TransformParams.V = UsageRights::TransformParams::VERSION
  
  rights.each do |right|
    sigref.TransformParams[right.first] ||= []
    sigref.TransformParams[right.first].concat(right[1..-1])
  end
  
  digsig.Reference = [ sigref ]
  
  self.Catalog.Perms ||= Perms.new
  self.Catalog.Perms.UR3 = digsig
  
  #
  #  Flattening the PDF to get file view.
  #
  compile
  
  #
  # Creating an empty Xref table to compute signature byte range.
  #
  rebuild_dummy_xrefs
  
  sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset
  
  digsig.ByteRange[0] = 0 
  digsig.ByteRange[1] = sigoffset
  digsig.ByteRange[2] = sigoffset + digsig.Contents.size
  
  digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
  
  # From that point the file size remains constant
  
  #
  # Correct Xrefs variations caused by ByteRange modifications.
  #
  rebuildxrefs
  
  filedata = output()
  signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]]
  
  signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, [], OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
  digsig.Contents[0, signature.size] = signature
  
  #
  # No more modification are allowed after signing.
  #
  self.freeze
  
end
encrypt(options = {}) click to toggle source

Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.

userpasswd

The user password.

ownerpasswd

The owner password.

options

A set of options to configure encryption.

# File lib/origami/encryption.rb, line 198
def encrypt(options = {})

  if self.is_encrypted?
    raise EncryptionError, "PDF is already encrypted"
  end

  #
  # Default encryption options.
  #
  params = 
  {
    :user_passwd => '',
    :owner_passwd => '',
    :cipher => 'rc4',            # :RC4 or :AES
    :key_size => 128,            # Key size in bits
    :hardened => false,          # Use newer password validation (since Reader X)
    :encrypt_metadata => true,   # Metadata shall be encrypted?
    :permissions => Encryption::Standard::Permissions::ALL    # Document permissions
  }.update(options)

  userpasswd, ownerpasswd = params[:user_passwd], params[:owner_passwd]

  case params[:cipher].upcase
    when 'RC4'
      algorithm = Encryption::ARC4
      if (40..128) === params[:key_size] and params[:key_size] % 8 == 0
        if params[:key_size] > 40
          version = 2
          revision = 3
        else
          version = 1
          revision = 2
        end
      else
        raise EncryptionError, "Invalid RC4 key length"
      end
    when 'AES'
      algorithm = Encryption::AES
      if params[:key_size] == 128 
        version = revision = 4
      elsif params[:key_size] == 256
        version = 5
        if params[:hardened]
          revision = 6
        else
          revision = 5
        end
      else
        raise EncryptionError, "Invalid AES key length (Only 128 and 256 bits keys are supported)"
      end
    else
      raise EncryptionNotSupportedError, "Cipher not supported : #{params[:cipher]}"
  end
 
  doc_id = (get_doc_attr(:ID) || gen_id).first

  handler = Encryption::Standard::Dictionary.new
  handler.Filter = :Standard #:nodoc:
  handler.V = version
  handler.R = revision
  handler.Length = params[:key_size]
  handler.P = -1 # params[:Permissions] 
  
  if revision >= 4
    handler.EncryptMetadata = params[:encrypt_metadata]
    handler.CF = Dictionary.new
    cryptfilter = Encryption::CryptFilterDictionary.new
    cryptfilter.AuthEvent = :DocOpen
    
    if revision == 4
      cryptfilter.CFM = :AESV2
    else
      cryptfilter.CFM = :AESV3
    end

    cryptfilter.Length = params[:key_size] >> 3

    handler.CF[:StdCF] = cryptfilter
    handler.StmF = handler.StrF = :StdCF
  end
 
  handler.set_passwords(ownerpasswd, userpasswd, doc_id)
  encryption_key = handler.compute_user_encryption_key(userpasswd, doc_id)

  fileInfo = get_trailer_info
  fileInfo[:Encrypt] = self << handler

  self.extend(Encryption::EncryptedDocument)
  self.encryption_dict = handler
  self.encryption_key = encryption_key
  self.stm_algo = self.str_algo = algorithm

  self
end
eval_js(code) click to toggle source

Executes a JavaScript script in the current document context.

# File lib/origami/javascript.rb, line 679
def eval_js(code)
  js_engine.exec(code) 
end
export_to_graph(filename) click to toggle source

Exports the document to a dot Graphiz file.

filename

The path where to save the file.

# File lib/origami/export.rb, line 34
def export_to_graph(filename)
  
  def appearance(object) #:nodoc:
  
    label = object.type.to_s
    case object
      when Catalog
        fontcolor = "red"
        color = "mistyrose"
        shape = "ellipse"
      when Name, Number
        label = object.value 
        fontcolor = "brown"
        color = "lightgoldenrodyellow"
        shape = "polygon"
       when String
        label = object.value unless (object.is_binary_data? or object.length > 50)
        fontcolor = "red"
        color = "white"
        shape = "polygon"
      when Array
        fontcolor = "darkgreen"
        color = "lightcyan"
        shape = "ellipse"
    else
      fontcolor = "blue"
      color = "aliceblue"
      shape = "ellipse"
    end
  
    { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape }
  end
  
  def add_edges(pdf, fd, object) #:nodoc:
    
    if object.is_a?(Array) or object.is_a?(ObjectStream)
      
      object.each { |subobj|
        subobj = subobj.solve if subobj.is_a?(Reference) 
        fd << "\t#{object.object_id} -> #{subobj.object_id}\n" unless subobj.nil?
      }
      
    elsif object.is_a?(Dictionary)
      
      object.each_pair { |name, subobj|
        subobj = subobj.solve if subobj.is_a?(Reference) 
        fd << "\t#{object.object_id} -> #{subobj.object_id} [label=\"#{name.value}\",fontsize=9];\n" unless subobj.nil?
      }
      
    end
    
    if object.is_a?(Stream)
      
      object.dictionary.each_pair { |key, value|
        value = value.solve if value.is_a?(Reference)
        fd << "\t#{object.object_id} -> #{value.object_id} [label=\"#{key.value}\",fontsize=9];\n" unless value.nil?
      }
      
    end
    
  end
  
  graphname = "PDF" if graphname.nil? or graphname.empty?
  fd = File.open(filename, "w")

  begin
    fd << "digraph #{graphname} {\n\n"
    
    objects = self.objects(:include_keys => false).find_all{ |obj| not obj.is_a?(Reference) }
    
    objects.each { |object|
      attr = appearance(object)
      
      fd << "\t#{object.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n"
      
      if object.is_a?(Stream)
        
        object.dictionary.each { |value|
          unless value.is_a?(Reference)
            attr = appearance(value)
            fd << "\t#{value.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]},fontsize=16];\n"
          end
        }
        
      end
      
      add_edges(self, fd, object)
    }
    fd << "\n}"
  ensure
    fd.close
  end
  
end
export_to_graphml(filename) click to toggle source

Exports the document to a GraphML file.

filename

The path where to save the file.

# File lib/origami/export.rb, line 133
def export_to_graphml(filename)
  
  def declare_node(id, attr) #:nodoc:
    " <node id=\"#{id}\">\n" <<
    "  <data key=\"d0\">\n" <<
    "    <y:ShapeNode>\n" <<
    "     <y:NodeLabel>#{attr[:label]}</y:NodeLabel>\n" <<
    #~ "     <y:Shape type=\"#{attr[:shape]}\"/>\n" <<
    "    </y:ShapeNode>\n" <<
    "  </data>\n" <<
    " </node>\n"
  end
  
  def declare_edge(id, src, dest, label = nil) #:nodoc:
    " <edge id=\"#{id}\" source=\"#{src}\" target=\"#{dest}\">\n" << 
    "  <data key=\"d1\">\n" <<
    "   <y:PolyLineEdge>\n" <<
    "    <y:LineStyle type=\"line\" width=\"1.0\" color=\"#000000\"/>\n" <<
    "    <y:Arrows source=\"none\" target=\"standard\"/>\n" << 
    "    <y:EdgeLabel>#{label.to_s}</y:EdgeLabel>\n" <<
    "   </y:PolyLineEdge>\n" <<
    "  </data>\n" <<
    " </edge>\n"
  end
  
  def appearance(object) #:nodoc:
  
    label = object.type.to_s
    case object
      when Catalog
        fontcolor = "red"
        color = "mistyrose"
        shape = "doublecircle"
      when Name, Number
        label = object.value 
        fontcolor = "orange"
        color = "lightgoldenrodyellow"
        shape = "polygon"
      when String
        label = object.value unless (object.is_binary_data? or object.length > 50)
        fontcolor = "red"
        color = "white"
        shape = "polygon"
      when Array
        fontcolor = "green"
        color = "lightcyan"
        shape = "ellipse"
    else
      fontcolor = "blue"
      color = "aliceblue"
      shape = "ellipse"
    end
  
    { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape }
  end
  
 def add_edges(pdf, fd, object, id) #:nodoc:
    
    if object.is_a?(Array) or object.is_a?(ObjectStream)
      
      object.each { |subobj|
        
        subobj = subobj.solve if subobj.is_a?(Reference)
        
        unless subobj.nil?
          fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}")
          id = id + 1
        end
      }
      
    elsif object.is_a?(Dictionary)
      
      object.each_pair { |name, subobj|
        
        subobj = subobj.solve if subobj.is_a?(Reference)
        
        unless subobj.nil?
          fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}", name.value)
          id = id + 1
        end
      }
      
    end
    
    if object.is_a?(Stream)
      
      object.dictionary.each_pair { |key, value|
      
        value = value.solve if value.is_a?(Reference)
        
        unless value.nil?
          fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{value.object_id}", key.value)
          id = id + 1
        end
      }
      
    end
    
    id
  end
  
  @@edge_nb = 1
  
  graphname = "PDF" if graphname.nil? or graphname.empty?
  
  fd = File.open(filename, "w")
  
  edge_nb = 1
  begin
    
    fd << '<?xml version="1.0" encoding="UTF-8"?>' << "\n"
    fd << '<graphml xmlns="http://graphml.graphdrawing.org/xmlns/graphml"' << "\n"
    fd << ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' << "\n"
    fd << ' xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml ' << "\n"
    fd << ' http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd"' << "\n"
    fd << ' xmlns:y="http://www.yworks.com/xml/graphml">' << "\n"
    fd << '<key id="d0" for="node" yfiles.type="nodegraphics"/>' << "\n"
    fd << '<key id="d1" for="edge" yfiles.type="edgegraphics"/>' << "\n"
    fd << "<graph id=\"#{graphname}\" edgedefault=\"directed\">\n"
    
    objects = self.objects(:include_keys => false).find_all{ |obj| not obj.is_a?(Reference) }
    
    objects.each { |object|
      
      fd << declare_node("n#{object.object_id}", appearance(object))
      
      if object.is_a?(Stream)
        
        object.dictionary.each { |value|
        
          unless value.is_a?(Reference)
            fd << declare_node(value.object_id, appearance(value))
          end
        }
      end
      
      edge_nb = add_edges(self, fd, object, edge_nb)
    }
    
    fd << '</graph>' << "\n"
    fd << '</graphml>'
    
  ensure
    fd.close
  end
  
end
fields() click to toggle source

Returns an array of Acroform fields.

# File lib/origami/acroform.rb, line 66
def fields
  if self.has_form?
    if self.Catalog.AcroForm.has_key?(:Fields)
      self.Catalog.AcroForm[:Fields].map {|field| field.solve}
    end
  end
end
find(params = {}, &b) click to toggle source

Returns an array of objects matching specified block.

# File lib/origami/pdf.rb, line 376
def find(params = {}, &b)
  
  options =
  {
    :only_indirect => false
  }
  options.update(params)
  
  objset = (options[:only_indirect] == true) ? 
    self.indirect_objects : self.objects

  objset.find_all(&b)
end
get_destination_by_name(name) click to toggle source

Lookup destination in the destination name directory.

# File lib/origami/destinations.rb, line 33
def get_destination_by_name(name)
  resolve_name Names::Root::DESTS, name
end
get_document_info() click to toggle source

Returns the document information dictionary if present.

# File lib/origami/metadata.rb, line 49
def get_document_info
  get_doc_attr :Info
end
get_embedded_file_by_name(name) click to toggle source

Lookup embedded file in the embedded files name directory.

# File lib/origami/file.rb, line 90
def get_embedded_file_by_name(name)
  resolve_name Names::Root::EMBEDDEDFILES, name
end
get_field(name) click to toggle source

Returns the corresponding named Field.

# File lib/origami/acroform.rb, line 88
def get_field(name)
  self.each_field do |field|
    return field if field[:T].solve == name
  end
end
get_metadata() click to toggle source

Returns a Hash of the information found in the metadata stream

# File lib/origami/metadata.rb, line 65
def get_metadata
  metadata_stm = self.Catalog.Metadata

  if metadata_stm.is_a?(Stream)
    doc = REXML::Document.new(metadata_stm.data)

    info = {}

    doc.elements.each('*/*/rdf:Description') do |description|
      
      description.attributes.each_attribute do |attr|
        case attr.prefix
          when 'pdf','xap'
            info[attr.name] = attr.value
        end
      end

      description.elements.each('*') do |element|
        value = (element.elements['.//rdf:li'] || element).text
        info[element.name] = value.to_s
      end

    end

    info
  end
end
get_page(n) click to toggle source

Get the n-th Page object.

# File lib/origami/page.rb, line 93
def get_page(n)
  raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode)

  self.Catalog.Pages.get_page(n)
end
get_page_by_name(name) click to toggle source

Lookup page in the page name directory.

# File lib/origami/page.rb, line 102
def get_page_by_name(name)
  resolve_name Names::Root::PAGES, name
end
get_script_by_name(name) click to toggle source

Lookup script in the scripts name directory.

# File lib/origami/actions.rb, line 33
def get_script_by_name(name)
  resolve_name Names::Root::JAVASCRIPT, name
end
has_document_info?() click to toggle source

Returns true if the document has a document information dictionary.

# File lib/origami/metadata.rb, line 35
def has_document_info?
  has_attr? :Info 
end
has_form?() click to toggle source

Returns true if the document contains an acrobat form.

# File lib/origami/acroform.rb, line 33
def has_form?
  (not self.Catalog.nil?) and self.Catalog.has_key? :AcroForm
end
has_metadata?() click to toggle source

Returns true if the document has a catalog metadata stream.

# File lib/origami/metadata.rb, line 42
def has_metadata?
  self.Catalog.Metadata.is_a?(Stream)
end
has_usage_rights?() click to toggle source
# File lib/origami/signature.rb, line 377
def has_usage_rights?
  not self.Catalog.Perms.nil? and (not self.Catalog.Perms.has_key?(:UR3) or not self.Catalog.Perms.has_key?(:UR))
end
import(object) click to toggle source

Similar to #insert or #<<, but for an object belonging to another document. Object will be recursively copied and new version numbers will be assigned. Returns the new reference to the imported object.

object

The object to import.

# File lib/origami/pdf.rb, line 469
def import(object)
  self.insert(object.export)
end
indirect_objects() click to toggle source

Return an array of indirect objects.

# File lib/origami/pdf.rb, line 438
def indirect_objects
  @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
end
Also aliased as: root_objects
insert(object)
Alias for: <<
insert_page(index, page) click to toggle source

Inserts a page at position index into the document.

# File lib/origami/page.rb, line 61
def insert_page(index, page)
  raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode)

  # Page from another document must be exported.
  page = page.export if page.pdf and page.pdf != self

  self.Catalog.Pages.insert_page(index, page)
  self
end
is_a_pdfa1?() click to toggle source
# File lib/origami/outputintents.rb, line 48
def is_a_pdfa1?
  self.Catalog.OutputIntents.is_a?(Array) and
  self.Catalog.OutputIntents.any?{|intent| 
    intent = intent.solve; 
    intent.S == OutputIntent::Intent::PDFA1
  } and
  self.has_metadata? and (
    doc = REXML::Document.new self.Catalog.Metadata.data;
    REXML::XPath.match(doc, "*/*/rdf:Description[@xmlns:pdfaid]").any? {|desc|
      desc.elements["pdfaid:conformance"].text == "A" and
      desc.elements["pdfaid:part"].text == "1"
    }
  )
end
is_encrypted?() click to toggle source

Returns whether the PDF file is encrypted.

# File lib/origami/encryption.rb, line 51
def is_encrypted?
  has_attr? :Encrypt
end
is_linearized?() click to toggle source

Returns whether the current document is linearized.

# File lib/origami/linearization.rb, line 36
def is_linearized?
  begin
    obj = @revisions.first.objects.sort_by{|obj| obj.file_offset}.first
  rescue
    return false
  end

  obj.is_a?(Dictionary) and obj.has_key? :Linearized
end
is_signed?() click to toggle source

Returns whether the document contains a digital signature.

# File lib/origami/signature.rb, line 273
def is_signed?
  begin
    self.Catalog.AcroForm.is_a?(Dictionary) and 
    self.Catalog.AcroForm.has_key?(:SigFlags) and 
    (self.Catalog.AcroForm.SigFlags & InteractiveForm::SigFlags::SIGNATURESEXIST != 0)
  rescue InvalidReferenceError
    false
  end
end
js_engine() click to toggle source

Returns the JavaScript engine (if JavaScript support is present).

# File lib/origami/javascript.rb, line 686
def js_engine
  @js_engine ||= PDF::JavaScript::Engine.new(self)
end
keywords() click to toggle source
# File lib/origami/metadata.rb, line 56
def keywords; get_document_info_field(:Keywords) end
ls(*patterns) click to toggle source

Returns an array of Objects whose name (in a Dictionary) is matching pattern.

# File lib/origami/pdf.rb, line 331
def ls(*patterns)
  return objects(:include_keys => false) if patterns.empty?

  result = []

  patterns.map! do |pattern|
    pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
  end

  objects(:only_keys => true).each do |key|
    if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
      value = key.parent[key]
      result << ( value.is_a?(Reference) ? value.solve : value )
    end
  end

  result
end
ls_names(root) click to toggle source

Returns a Hash of all names under specified root name directory. Returns nil if the directory does not exist.

# File lib/origami/catalog.rb, line 183
def ls_names(root)
  namesroot = get_names_root(root)
  return {} if namesroot.nil?

  names = names_from_node(namesroot)
  if names.length % 2 != 0
    return InvalidNameTreeError, "Odd number of elements"
  end

  Hash[*names]
end
ls_no_follow(*patterns) click to toggle source

Returns an array of Objects whose name (in a Dictionary) is matching pattern. Do not follow references.

# File lib/origami/pdf.rb, line 354
def ls_no_follow(*patterns)
  return objects(:include_keys => false) if patterns.empty?

  result = []

  patterns.map! do |pattern|
    pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
  end

  objects(:only_keys => true).each do |key|
    if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
      value = key.parent[key]
      result << value
    end
  end

  result
end
mod_date() click to toggle source
# File lib/origami/metadata.rb, line 60
def mod_date; get_document_info_field(:ModDate) end
objects(params = {}) click to toggle source

Returns an array of objects embedded in the PDF body.

include_objstm

Whether it shall return objects embedded in object streams.

Note : Shall return to an iterator for Ruby 1.9 comp.

# File lib/origami/pdf.rb, line 395
def objects(params = {})
  
  def append_subobj(root, objset, opts)
    
    if objset.find{ |o| root.equal?(o) }.nil?
      objset << root unless opts[:only_keys]

      if root.is_a?(Dictionary)
        root.each_pair { |name, value|
          objset << name if opts[:only_keys]

          append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
          append_subobj(value, objset, opts)
        }
      elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
        root.each { |subobj| append_subobj(subobj, objset, opts) }
      end
    end
  end

  options =
  {
    :include_objectstreams => true,
    :include_keys => true,
    :only_keys => false
  }
  options.update(params)

  options[:include_keys] |= options[:only_keys]
  
  objset = []
  @revisions.each do |revision|
    revision.objects.each do |object|
        append_subobj(object, objset, options)
    end
  end
  
  objset
end
onDocumentClose(action) click to toggle source

Sets an action to run on document closing.

action

A JavaScript Action Object.

# File lib/origami/catalog.rb, line 103
def onDocumentClose(action)
  
  unless action.is_a?(Action::JavaScript) or action.is_a?(Reference)
    raise TypeError, "An Action::JavaScript object must be passed."
  end
  
  unless self.Catalog
    raise InvalidPDFError, "A catalog object must exist to add this action."
  end
  
  self.Catalog.AA ||= CatalogAdditionalActions.new
  self.Catalog.AA.WC = action
  
  self
end
onDocumentOpen(action) click to toggle source

Sets an action to run on document opening.

action

An Action Object.

# File lib/origami/catalog.rb, line 84
def onDocumentOpen(action)   
  
  unless action.is_a?(Action) or action.is_a?(Destination) or action.is_a?(Reference)
    raise TypeError, "An Action object must be passed."
  end
  
  unless self.Catalog
    raise InvalidPDFError, "A catalog object must exist to add this action."
  end
  
  self.Catalog.OpenAction = action
  
  self
end
onDocumentPrint(action) click to toggle source

Sets an action to run on document printing.

action

A JavaScript Action Object.

# File lib/origami/catalog.rb, line 123
def onDocumentPrint(action)
  
  unless action.is_a?(Action::JavaScript) or action.is_a?(Reference)
    raise TypeError, "An Action::JavaScript object must be passed."
  end
  
  unless self.Catalog
    raise InvalidPDFError, "A catalog object must exist to add this action."
  end
  
  self.Catalog.AA ||= CatalogAdditionalActions.new
  self.Catalog.AA.WP = action
  
end
original_data() click to toggle source

Original data parsed to create this document, nil if created from scratch.

# File lib/origami/pdf.rb, line 184
def original_data
  @parser.target_data if @parser
end
original_filename() click to toggle source

Original file name if parsed from disk, nil otherwise.

# File lib/origami/pdf.rb, line 170
def original_filename
  @parser.target_filename if @parser
end
original_filesize() click to toggle source

Original file size if parsed from a data stream, nil otherwise.

# File lib/origami/pdf.rb, line 177
def original_filesize
  @parser.target_filesize if @parser
end
pages() click to toggle source

Returns an array of Page

# File lib/origami/page.rb, line 74
def pages
  raise InvalidPDFError, "Invalid page tree" if not self.Catalog or not self.Catalog.Pages or not self.Catalog.Pages.is_a?(PageTreeNode)
  
  self.Catalog.Pages.children
end
producer() click to toggle source
# File lib/origami/metadata.rb, line 58
def producer; get_document_info_field(:Producer) end
register(root, name, value) click to toggle source

Registers an object into a specific Names root dictionary.

root

The root dictionary (see Names::Root)

name

The value name.

value

The value to associate with this name.

# File lib/origami/catalog.rb, line 144
def register(root, name, value)
  self.Catalog.Names ||= Names.new
  
  value.set_indirect(true) unless value.is_a? Reference
  
  namesroot = self.Catalog.Names[root]
  if namesroot.nil?
    names = NameTreeNode.new(:Names => []).set_indirect(true)
    self.Catalog.Names[root] = names
    names.Names << name << value
  else
    namesroot.solve[:Names] << name << value
  end
end
remove_revision(index) click to toggle source

Removes a whole document revision.

index

Revision index, first is 0.

# File lib/origami/pdf.rb, line 510
def remove_revision(index)
  if index < 0 or index > @revisions.size
    raise IndexError, "Not a valid revision index"
  end

  if @revisions.size == 1
    raise InvalidPDFError, "Cannot remove last revision"
  end

  @revisions.delete_at(index)
  self
end
remove_xrefs() click to toggle source

Tries to strip any xrefs information off the document.

# File lib/origami/xreftable.rb, line 33
def remove_xrefs
  def delete_xrefstm(xrefstm)
    prev = xrefstm.Prev
    delete_object(xrefstm.reference)

    if prev.is_a?(Integer) and (prev_stm = get_object_by_offset(prev)).is_a?(XRefStream)
      delete_xrefstm(prev_stm)
    end
  end

  @revisions.reverse_each do |rev|
    if rev.has_xrefstm?
      delete_xrefstm(rev.xrefstm)
    end
    
    if rev.trailer.has_dictionary? and rev.trailer.XRefStm.is_a?(Integer)
      xrefstm = get_object_by_offset(rev.trailer.XRefStm)

      delete_xrefstm(xrefstm) if xrefstm.is_a?(XRefStream)
    end

    rev.xrefstm = rev.xreftable = nil
  end
end
resolve_name(root, name) click to toggle source

Retrieve the corresponding value associated with name in the specified root name directory, or nil if the value does not exist.

# File lib/origami/catalog.rb, line 172
def resolve_name(root, name)
  namesroot = get_names_root(root)
  return nil if namesroot.nil?

  resolve_name_from_node(namesroot, name)
end
root_objects()
Alias for: indirect_objects
save(path, params = {}) click to toggle source

Saves the current document.

filename

The path where to save this PDF.

# File lib/origami/pdf.rb, line 207
def save(path, params = {})
  
  options = 
  {
    :delinearize => true,
    :recompile => true,
    :decrypt => false
  }
  options.update(params)

  if self.frozen? # incompatible flags with frozen doc (signed)
    options[:recompile] = 
    options[:rebuildxrefs] = 
    options[:noindent] = 
    options[:obfuscate] = false
  end
  
  if path.respond_to?(:write)
    fd = path
  else
    path = File.expand_path(path)
    fd = File.open(path, 'w').binmode
  end
  
  intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
  self.delinearize! if options[:delinearize] and self.is_linearized?
  compile(options) if options[:recompile]

  fd.write output(options)
  fd.close
  
  self
end
Also aliased as: write
save_upto(revision, filename) click to toggle source

Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.

revision

The revision number to save.

filename

The path where to save this PDF.

# File lib/origami/pdf.rb, line 248
def save_upto(revision, filename)
  save(filename, :up_to_revision => revision)  
end
serialize(filename) click to toggle source

Serializes the current PDF.

# File lib/origami/pdf.rb, line 191
def serialize(filename)
  parser = @parser
  @parser = nil # do not serialize the parser

  Zlib::GzipWriter.open(filename) { |gz|
    gz.write Marshal.dump(self)
  }
  
  @parser = parser
  self
end
set_extension_level(version, level) click to toggle source

Sets PDF extension level and version. Only supported values are “1.7” and 3.

# File lib/origami/catalog.rb, line 33
def set_extension_level(version, level)
  exts = (self.Catalog.Extensions ||= Extensions.new)

  exts[:ADBE] = DeveloperExtension.new
  exts[:ADBE].BaseVersion = Name.new(version)
  exts[:ADBE].ExtensionLevel = level

  self
end
sign(certificate, key, options = {}) click to toggle source

Sign the document with the given key and x509 certificate.

certificate

The X509 certificate containing the public key.

key

The private key associated with the certificate.

ca

Optional CA certificates used to sign the user certificate.

# File lib/origami/signature.rb, line 108
def sign(certificate, key, options = {})
  
  unless Origami::OPTIONS[:use_openssl]
    fail "OpenSSL is not present or has been disabled."
  end

  params =
  {
    :method => "adbe.pkcs7.detached",
    :ca => [],
    :annotation => nil,
    :location => nil,
    :contact => nil,
    :reason => nil
  }.update(options)
  
  unless certificate.is_a?(OpenSSL::X509::Certificate)
    raise TypeError, "A OpenSSL::X509::Certificate object must be passed."
  end
  
  unless key.is_a?(OpenSSL::PKey::RSA)
    raise TypeError, "A OpenSSL::PKey::RSA object must be passed."
  end
  
  ca = params[:ca]
  unless ca.is_a?(::Array)
    raise TypeError, "Expected an Array of CA certificate."
  end
  
  annotation = params[:annotation]
  unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature)
    raise TypeError, "Expected a Annotation::Widget::Signature object."
  end

  case params[:method]
    when 'adbe.pkcs7.detached'
      signfield_size = lambda{|crt,key,ca|
        datatest = "abcdefghijklmnopqrstuvwxyz"
        OpenSSL::PKCS7.sign(
          crt, 
          key, 
          datatest, 
          ca, 
          OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
        ).to_der.size + 128
      }
    when 'adbe.pkcs7.sha1'
      signfield_size = lambda{|crt,key,ca|
        datatest = "abcdefghijklmnopqrstuvwxyz"
        OpenSSL::PKCS7.sign(
          crt, 
          key, 
          Digest::SHA1.digest(datatest), 
          ca, 
          OpenSSL::PKCS7::BINARY
        ).to_der.size + 128
      }

    when 'adbe.x509.rsa_sha1'
      signfield_size = lambda{|crt,key,ca|
        datatest = "abcdefghijklmnopqrstuvwxyz"
        key.private_encrypt(
          Digest::SHA1.digest(datatest)
        ).size + 128
      }
      raise NotImplementedError, "Unsupported method #{params[:method].inspect}"
      
  else
    raise NotImplementedError, "Unsupported method #{params[:method].inspect}"
  end

  digsig = Signature::DigitalSignature.new.set_indirect(true)
 
  if annotation.nil?
    annotation = Annotation::Widget::Signature.new
    annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0]        
  end
  
  annotation.V = digsig
  add_fields(annotation)
  self.Catalog.AcroForm.SigFlags = 
    InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY
  
  digsig.Type = :Sig #:nodoc:
  digsig.Contents = HexaString.new("\x00" * signfield_size[certificate, key, ca]) #:nodoc:
  digsig.Filter = Name.new("Adobe.PPKMS") #:nodoc:
  digsig.SubFilter = Name.new(params[:method]) #:nodoc:
  digsig.ByteRange = [0, 0, 0, 0] #:nodoc:
  
  digsig.Location = HexaString.new(params[:location]) if params[:location]
  digsig.ContactInfo = HexaString.new(params[:contact]) if params[:contact]
  digsig.Reason = HexaString.new(params[:reason]) if params[:reason]
  
  if params[:method] == 'adbe.x509.rsa_sha1'
    digsig.Cert =
      if ca.empty?
        HexaString.new(certificate.to_der)
      else
        [ HexaString.new(certificate.to_der) ] + ca.map{ |crt| HexaString.new(crt.to_der) }
      end
  end

  #
  #  Flattening the PDF to get file view.
  #
  compile
  
  #
  # Creating an empty Xref table to compute signature byte range.
  #
  rebuild_dummy_xrefs
  
  sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset
  
  digsig.ByteRange[0] = 0 
  digsig.ByteRange[1] = sigoffset
  digsig.ByteRange[2] = sigoffset + digsig.Contents.size
  
  digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
  
  # From that point the file size remains constant
  
  #
  # Correct Xrefs variations caused by ByteRange modifications.
  #
  rebuildxrefs
  
  filedata = output()
  signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]]
  
  signature = 
    case params[:method]
      when 'adbe.pkcs7.detached'
        OpenSSL::PKCS7.sign(
          certificate, 
          key, 
          signable_data, 
          ca, 
          OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY
        ).to_der

      when 'adbe.pkcs7.sha1'
        OpenSSL::PKCS7.sign(
          certificate,
          key,
          Digest::SHA1.digest(signable_data),
          ca,
          OpenSSL::PKCS7::BINARY
        ).to_der

      when 'adbe.x509.rsa_sha1'
        key.private_encrypt(Digest::SHA1.digest(signable_data))
    end

  digsig.Contents[0, signature.size] = signature
  
  #
  # No more modification are allowed after signing.
  #
  self.freeze
end
signature() click to toggle source
# File lib/origami/signature.rb, line 381
def signature
  raise SignatureError, "Not a signed document" unless self.is_signed?

  self.each_field do |field|
    if field.FT == :Sig and field.V.is_a?(Dictionary)
      return field.V
    end
  end

  raise SignatureError, "Cannot find digital signature"
end
subject() click to toggle source
# File lib/origami/metadata.rb, line 55
def subject; get_document_info_field(:Subject) end
title() click to toggle source
# File lib/origami/metadata.rb, line 53
def title; get_document_info_field(:Title) end
verify(options = {}) click to toggle source

Verify a document signature.

Options:
  _:trusted_: an array of trusted X509 certificates.
If no argument is passed, embedded certificates are treated as trusted.
# File lib/origami/signature.rb, line 47
def verify(options = {})

  unless Origami::OPTIONS[:use_openssl]
    fail "OpenSSL is not present or has been disabled."
  end

  params =
  {
    :trusted => []
  }.update(options)

  digsig = self.signature

  unless digsig[:Contents].is_a?(String)
    raise SignatureError, "Invalid digital signature contents"
  end

  store = OpenSSL::X509::Store.new
  params[:trusted].each do |ca| store.add_cert(ca) end
  flags = 0
  flags |= OpenSSL::PKCS7::NOVERIFY if params[:trusted].empty?

  stream = StringScanner.new(self.original_data)
  stream.pos = digsig[:Contents].file_offset
  Object.typeof(stream).parse(stream)
  endofsig_offset = stream.pos
  stream.terminate

  s1,l1,s2,l2 = digsig.ByteRange
  if s1.value != 0 or 
    (s2.value + l2.value) != self.original_data.size or
    (s1.value + l1.value) != digsig[:Contents].file_offset or
    s2.value != endofsig_offset

    raise SignatureError, "Invalid signature byte range"
  end

  data = self.original_data[s1,l1] + self.original_data[s2,l2]
  
  case digsig.SubFilter.value.to_s 
    when 'adbe.pkcs7.detached'
      flags |= OpenSSL::PKCS7::DETACHED 
      p7 = OpenSSL::PKCS7.new(digsig[:Contents].value)
      raise SignatureError, "Not a PKCS7 detached signature" unless p7.detached?
      p7.verify([], store, data, flags)

    when 'adbe.pkcs7.sha1'          
      p7 = OpenSSL::PKCS7.new(digsig[:Contents].value)
      p7.verify([], store, nil, flags) and p7.data == Digest::SHA1.digest(data)
      
  else
    raise NotImplementedError, "Unsupported method #{digsig.SubFilter}"
  end
end
write(path, params = {})
Alias for: save

Private Instance Methods

build_dummy_xrefs(objects) click to toggle source
# File lib/origami/pdf.rb, line 1035
def build_dummy_xrefs(objects)
  
  lastno = 0
  brange = 0
  
  xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]

  xrefsection = XRef::Section.new
  objects.sort.each { |object|
    if (object.no - lastno).abs > 1
      xrefsection << XRef::Subsection.new(brange, xrefs)
      brange = object.no
      xrefs.clear
    end
    
    xrefs << XRef.new(0, 0, XRef::FREE)

    lastno = object.no
  }
  
  xrefsection << XRef::Subsection.new(brange, xrefs)
  
  xrefsection
end
compile(options = {}) click to toggle source

This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.

  • Allocates objects references.

  • Sets some objects missing required values.

# File lib/origami/pdf.rb, line 684
def compile(options = {})

  #
  # A valid document must have at least one page.
  #
  append_page if pages.empty?
 
  #
  # Allocates object numbers and creates references.
  # Invokes object finalization methods.
  #
  if self.is_a?(Encryption::EncryptedDocument)
    physicalize(options)
  else
    physicalize
  end
        
  #
  # Sets the PDF version header.
  #
  version, level = version_required
  @header.majorversion = version[0,1].to_i
  @header.minorversion = version[2,1].to_i

  set_extension_level(version, level) if level > 0
  
  self
end
gen_id() click to toggle source
# File lib/origami/trailer.rb, line 71
def gen_id
  fileInfo = get_trailer_info
  if fileInfo.nil?
    raise InvalidPDFError, "Cannot access trailer information"
  end

  id = Digest::MD5.hexdigest( rand.to_s )
  fileInfo.ID = [ id, id ]
end
init() click to toggle source

Instanciates basic structures required for a valid PDF file.

# File lib/origami/pdf.rb, line 997
def init
  catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
  catalog.Pages = PageTreeNode.new.set_indirect(true)
  @revisions.last.trailer.Root = catalog.reference

  self
end
intents_as_pdfa1() click to toggle source
# File lib/origami/outputintents.rb, line 65
def intents_as_pdfa1
  unless self.is_a_pdfa1?
    self.Catalog.OutputIntents ||= []
    self.Catalog.OutputIntents << self.insert(
      OutputIntent.new(
        :Type => :OutputIntent,
        :S => OutputIntent::Intent::PDFA1,
        :OutputConditionIdentifier => "RGB"
      )
    )

    metadata = self.create_metadata
    doc = REXML::Document.new(metadata.data)

    desc = REXML::Element.new 'rdf:Description'
    desc.add_attribute 'rdf:about', ''
    desc.add_attribute 'xmlns:pdfaid', 'http://www.aiim.org/pdfa/ns/id/'
    desc.add REXML::Element.new('pdfaid:conformance').add_text('A')
    desc.add REXML::Element.new('pdfaid:part').add_text('1')
    doc.elements["*/rdf:RDF"].add desc

    xml = ""; doc.write(xml, 3)
    metadata.data = xml
  end
end
output(params = {}) click to toggle source

Returns the final binary representation of the current document.

# File lib/origami/pdf.rb, line 820
def output(params = {})
   
  has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}

  options =
  {
    :rebuildxrefs => true,
    :noindent => false,
    :obfuscate => false,
    :use_xrefstm => has_objstm,
    :use_xreftable => (not has_objstm),
    :up_to_revision => @revisions.size
  }
  options.update(params)

  options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size

  # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
  if options[:use_xrefstm] == options[:use_xreftable]
    options[:use_xrefstm] = has_objstm
    options[:use_xreftable] = (not has_objstm)
  end

  # Get trailer dictionary
  trailer_info = get_trailer_info
  if trailer_info.nil?
    raise InvalidPDFError, "No trailer information found"
  end
  trailer_dict = trailer_info.dictionary
 
  prev_xref_offset = nil
  xrefstm_offset = nil
  xreftable_offset = nil

  # Header
  bin = ""
  bin << @header.to_s
  
  # For each revision
  @revisions[0, options[:up_to_revision]].each do |rev|
    
    # Create xref table/stream.
    if options[:rebuildxrefs] == true
      lastno_table, lastno_stm = 0, 0
      brange_table, brange_stm = 0, 0
      
      xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
      xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]

      if options[:use_xreftable] == true
        xrefsection = XRef::Section.new
      end

      if options[:use_xrefstm] == true
        xrefstm = rev.xrefstm || XRefStream.new
        if xrefstm == rev.xrefstm
          xrefstm.clear
        else
          add_to_revision(xrefstm, rev) 
        end
      end
    end
   
    objset = rev.objects
    
    objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
      objset.concat objstm.objects
    end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true

    # For each object, in number order
    objset.sort.each do |obj|
     
      # Create xref entry.
      if options[:rebuildxrefs] == true
       
        # Adding subsections if needed
        if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
          xrefsection << XRef::Subsection.new(brange_table, xrefs_table)

          xrefs_table.clear
          brange_table = obj.no
        end
        if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
          xrefs_stm.each do |xref| xrefstm << xref end
          xrefstm.Index ||= []
          xrefstm.Index << brange_stm << xrefs_stm.length

          xrefs_stm.clear
          brange_stm = obj.no
        end

        # Process embedded objects
        if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
          index = obj.parent.index(obj.no)
         
          xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
          
          lastno_stm = obj.no
        else
          xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
          xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)

          lastno_table = lastno_stm = obj.no
        end

      end
     
      if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
       
        # Finalize XRefStm
        if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
          xrefstm_offset = bin.size
   
          xrefs_stm.each do |xref| xrefstm << xref end

          xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
          if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
            xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
          end

          xrefstm.Index ||= []
          xrefstm.Index << brange_stm << xrefs_stm.size
   
          xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict) 
          xrefstm.Prev = prev_xref_offset
          rev.trailer.dictionary = nil

          add_to_revision(xrefstm, rev)

          xrefstm.pre_build
          xrefstm.post_build
        end

        # Output object code
        if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
          bin << obj.to_s(0)
        else
          bin << obj.to_s
        end
      end
    end
  
    rev.trailer ||= Trailer.new
    
    # XRef table
    if options[:rebuildxrefs] == true
 
      if options[:use_xreftable] == true
        table_offset = bin.size
        
        xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
        rev.xreftable = xrefsection
 
        rev.trailer.dictionary = trailer_dict
        rev.trailer.Size = objset.size + 1
        rev.trailer.Prev = prev_xref_offset

        rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
      end

      startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
      rev.trailer.startxref = prev_xref_offset = startxref

    end # end each rev
    
    # Trailer
    bin << rev.xreftable.to_s if options[:use_xreftable] == true
    bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
    
  end
  
  bin
end
physicalize() click to toggle source

Converts a logical PDF view into a physical view ready for writing.

# File lib/origami/pdf.rb, line 764
def physicalize
 
  #
  # Indirect objects are added to the revision and assigned numbers.
  #
  def build(obj, revision) #:nodoc:

    #
    # Finalize any subobjects before building the stream.
    #
    if obj.is_a?(ObjectStream)
      obj.each do |subobj|
        build(subobj, revision)
      end
    end
  
    obj.pre_build

    if obj.is_a?(Dictionary) or obj.is_a?(Array)
        
        obj.map! do |subobj|
          if subobj.is_indirect?
            if get_object(subobj.reference)
              subobj.reference
            else
              ref = add_to_revision(subobj, revision)
              build(subobj, revision)
              ref
            end
          else
            subobj
          end
        end
        
        obj.each do |subobj|
          build(subobj, revision)
        end
        
    elsif obj.is_a?(Stream)
      build(obj.dictionary, revision)
    end

    obj.post_build
    
  end
  
  indirect_objects_by_rev.each do |obj, revision|
      build(obj, revision)          
  end
  
  self
end
rebuild_dummy_xrefs() click to toggle source

Compute and update XRef::Section for each Revision.

# File lib/origami/pdf.rb, line 1033
def rebuild_dummy_xrefs #:nodoc
  
  def build_dummy_xrefs(objects)
    
    lastno = 0
    brange = 0
    
    xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]

    xrefsection = XRef::Section.new
    objects.sort.each { |object|
      if (object.no - lastno).abs > 1
        xrefsection << XRef::Subsection.new(brange, xrefs)
        brange = object.no
        xrefs.clear
      end
      
      xrefs << XRef.new(0, 0, XRef::FREE)

      lastno = object.no
    }
    
    xrefsection << XRef::Subsection.new(brange, xrefs)
    
    xrefsection
  end
  
  size = 0
  startxref = @header.to_s.size
  
  @revisions.each do |revision|
    revision.objects.each do |object|
      startxref += object.to_s.size
    end
    
    size += revision.body.size
    revision.xreftable = build_dummy_xrefs(revision.objects)
    
    revision.trailer ||= Trailer.new
    revision.trailer.Size = size + 1
    revision.trailer.startxref = startxref
    
    startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
  end
  
  self
end
rebuildxrefs() click to toggle source

Compute and update XRef::Section for each Revision.

# File lib/origami/pdf.rb, line 655
def rebuildxrefs
  
  size = 0
  startxref = @header.to_s.size
  
  @revisions.each do |revision|
  
    revision.objects.each do |object|
      startxref += object.to_s.size
    end
    
    size += revision.body.size
    revision.xreftable = buildxrefs(revision.objects)
    
    revision.trailer ||= Trailer.new
    revision.trailer.Size = size + 1
    revision.trailer.startxref = startxref
    
    startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
  end
  
  self
end