init

2025-08-11 22:23:30 +02:00 · 2025-08-11 22:23:30 +02:00 · 72a26edcff
commit 72a26edcff
22092 changed files with 2101903 additions and 0 deletions
--- a/lib/PdfParser/Element/ElementArray.php
+++ b/lib/PdfParser/Element/ElementArray.php
@ -0,0 +1,161 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+use Smalot\PdfParser\Header;
+use Smalot\PdfParser\Object;
+
+/**
+ * Class ElementArray
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementArray extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct($value, $document);
+    }
+
+    /**
+     * @return mixed
+     */
+    public function getContent()
+    {
+        foreach ($this->value as $name => $element) {
+            $this->resolveXRef($name);
+        }
+
+        return parent::getContent();
+    }
+
+    /**
+     * @return array
+     */
+    public function getRawContent()
+    {
+        return $this->value;
+    }
+
+    /**
+     * @param bool $deep
+     *
+     * @return array
+     */
+    public function getDetails($deep = true)
+    {
+        $values   = array();
+        $elements = $this->getContent();
+
+        foreach ($elements as $key => $element) {
+            if ($element instanceof Header && $deep) {
+                $values[$key] = $element->getDetails($deep);
+            } elseif ($element instanceof Object && $deep) {
+                $values[$key] = $element->getDetails(false);
+            } elseif ($element instanceof ElementArray) {
+                if ($deep) {
+                    $values[$key] = $element->getDetails();
+                }
+            } elseif ($element instanceof Element && !($element instanceof ElementArray)) {
+                $values[$key] = $element->getContent();
+            }
+        }
+
+        return $values;
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return implode(',', $this->value);
+    }
+
+    /**
+     * @param string $name
+     *
+     * @return Element|Object
+     */
+    protected function resolveXRef($name)
+    {
+        if (($obj = $this->value[$name]) instanceof ElementXRef) {
+            /** @var Object $obj */
+            $obj                = $this->document->getObjectById($obj->getId());
+            $this->value[$name] = $obj;
+        }
+
+        return $this->value[$name];
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementArray
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
+            preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
+
+            $level = 0;
+            $sub   = '';
+            foreach ($matches[0] as $part) {
+                $sub .= $part;
+                $level += (strpos($part, '[') !== false ? 1 : -1);
+                if ($level <= 0) {
+                    break;
+                }
+            }
+
+            // Removes 1 level [ and ].
+            $sub        = substr(trim($sub), 1, -1);
+            $sub_offset = 0;
+            $values     = Element::parse($sub, $document, $sub_offset, true);
+
+            $offset += strpos($content, '[') + 1;
+            // Find next ']' position
+            $offset += strlen($sub) + 1;
+
+            return new self($values, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementBoolean.php
+++ b/lib/PdfParser/Element/ElementBoolean.php
@ -0,0 +1,88 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementBoolean
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementBoolean extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct((strtolower($value) == 'true' || $value === true), null);
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return $this->value ? 'true' : 'false';
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        return ($this->getContent() === $value);
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementBoolean
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
+            $value  = $match['value'];
+            $offset += strpos($content, $value) + strlen($value);
+
+            return new self($value, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementDate.php
+++ b/lib/PdfParser/Element/ElementDate.php
@ -0,0 +1,155 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHPi, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementDate
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementDate extends ElementString
+{
+    /**
+     * @var array
+     */
+    protected static $formats = array(
+        4  => 'Y',
+        6  => 'Ym',
+        8  => 'Ymd',
+        10 => 'YmdH',
+        12 => 'YmdHi',
+        14 => 'YmdHis',
+        15 => 'YmdHise',
+        17 => 'YmdHisO',
+        18 => 'YmdHisO',
+        19 => 'YmdHisO',
+    );
+
+    /**
+     * @var string
+     */
+    protected $format = 'c';
+
+    /**
+     * @param \DateTime $value
+     * @param Document  $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        if (!($value instanceof \DateTime)) {
+            throw new \Exception('DateTime required.');
+        }
+
+        parent::__construct($value, null);
+    }
+
+    /**
+     * @param string $format
+     */
+    public function setFormat($format)
+    {
+        $this->format = $format;
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        if ($value instanceof \DateTime) {
+            $timestamp = $value->getTimeStamp();
+        } else {
+            $timestamp = strtotime($value);
+        }
+
+        return ($timestamp == $this->value->getTimeStamp());
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return (string)($this->value->format($this->format));
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementDate
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
+            $name = $match['name'];
+            $name = str_replace("'", '', $name);
+            $date = false;
+
+            // Smallest format : Y
+            // Full format     : YmdHisP
+            if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
+                if ($pos = strpos($name, 'Z')) {
+                    $name = substr($name, 0, $pos + 1);
+                } elseif (strlen($name) == 18 && preg_match('/[^\+-]0000$/', $name)) {
+                    $name = substr($name, 0, -4) . '+0000';
+                }
+
+                $format = self::$formats[strlen($name)];
+                $date   = \DateTime::createFromFormat($format, $name);
+            } else {
+                // special cases
+                if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
+                    $name   = str_replace(',', '', $name);
+                    $format = 'n-j-Y H:i:sO';
+                    $date   = \DateTime::createFromFormat($format, $name);
+                }
+            }
+
+            if (!$date) {
+                return false;
+            }
+
+            $offset += strpos($content, '(D:') + strlen($match['name']) + 4; // 1 for '(D:' and ')'
+            $element = new self($date, $document);
+
+            return $element;
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementHexa.php
+++ b/lib/PdfParser/Element/ElementHexa.php
@ -0,0 +1,93 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementHexa
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementHexa extends ElementString
+{
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementHexa
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
+            $name    = $match['name'];
+            $offset += strpos($content, '<' . $name) + strlen($name) + 2; // 1 for '>'
+            // repackage string as standard
+            $name    = '(' . self::decode($name, $document) . ')';
+            $element = false;
+
+            if (!($element = ElementDate::parse($name, $document))) {
+                $element = ElementString::parse($name, $document);
+            }
+
+            return $element;
+        }
+
+        return false;
+    }
+
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public static function decode($value, Document $document = null)
+    {
+        $text   = '';
+        $length = strlen($value);
+
+        if (substr($value, 0, 2) == '00') {
+            for ($i = 0; $i < $length; $i += 4) {
+                $hex = substr($value, $i, 4);
+                $text .= '&#' . str_pad(hexdec($hex), 4, '0', STR_PAD_LEFT) . ';';
+            }
+        } else {
+            for ($i = 0; $i < $length; $i += 2) {
+                $hex = substr($value, $i, 2);
+                $text .= chr(hexdec($hex));
+            }
+        }
+
+        $text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8');
+
+        return $text;
+    }
+}
--- a/lib/PdfParser/Element/ElementMissing.php
+++ b/lib/PdfParser/Element/ElementMissing.php
@ -0,0 +1,85 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementMissing
+ */
+class ElementMissing extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct(null, null);
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        return false;
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function contains($value)
+    {
+        return false;
+    }
+
+    /**
+     * @return bool
+     */
+    public function getContent()
+    {
+        return false;
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return '';
+    }
+}
--- a/lib/PdfParser/Element/ElementName.php
+++ b/lib/PdfParser/Element/ElementName.php
@ -0,0 +1,82 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+use Smalot\PdfParser\Font;
+
+/**
+ * Class ElementName
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementName extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct($value, null);
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        return $value == $this->value;
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementName
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*\/(?P<name>[A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
+            $name   = $match['name'];
+            $offset += strpos($content, $name) + strlen($name);
+            $name   = Font::decodeEntities($name);
+
+            return new self($name, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementNull.php
+++ b/lib/PdfParser/Element/ElementNull.php
@ -0,0 +1,87 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementNull
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementNull extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct(null, null);
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return 'null';
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        return ($this->getContent() === $value);
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementNull
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*(null)/s', $content, $match)) {
+            $offset += strpos($content, 'null') + strlen('null');
+
+            return new self(null, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementNumeric.php
+++ b/lib/PdfParser/Element/ElementNumeric.php
@ -0,0 +1,70 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementNumeric
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementNumeric extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct(floatval($value), null);
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementNumeric
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
+            $value  = $match['value'];
+            $offset += strpos($content, $value) + strlen($value);
+
+            return new self($value, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementString.php
+++ b/lib/PdfParser/Element/ElementString.php
@ -0,0 +1,106 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+use Smalot\PdfParser\Font;
+
+/**
+ * Class ElementString
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementString extends Element
+{
+    /**
+     * @param string   $value
+     * @param Document $document
+     */
+    public function __construct($value, Document $document = null)
+    {
+        parent::__construct($value, null);
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        return $value == $this->value;
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementString
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
+            $name = $match['name'];
+
+            // Find next ')' not escaped.
+            $cur_start_text = $start_search_end = 0;
+            while (($cur_start_pos = strpos($name, ')', $start_search_end)) !== false) {
+                $cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
+                preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
+                if (!(strlen($match['escape']) % 2)) {
+                    break;
+                }
+                $start_search_end = $cur_start_pos + 1;
+            }
+
+            // Extract string.
+            $name   = substr($name, 0, $cur_start_pos);
+            $offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
+            $name   = str_replace(
+                array('\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'),
+                array('\\',   ' ',   '/',   '(',  ')',  "\n", "\r", "\t"),
+                $name
+            );
+
+            // Decode string.
+            $name = Font::decodeOctal($name);
+            $name = Font::decodeEntities($name);
+            $name = Font::decodeHexadecimal($name, false);
+            $name = Font::decodeUnicode($name);
+
+            return new self($name, $document);
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementStruct.php
+++ b/lib/PdfParser/Element/ElementStruct.php
@ -0,0 +1,80 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+use Smalot\PdfParser\Header;
+
+/**
+ * Class ElementStruct
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementStruct extends Element
+{
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementStruct
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
+            preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
+
+            $level = 0;
+            $sub   = '';
+            foreach ($matches[0] as $part) {
+                $sub .= $part;
+                $level += (strpos($part, '<<') !== false ? 1 : -1);
+                if ($level <= 0) {
+                    break;
+                }
+            }
+
+            $offset += strpos($content, '<<') + strlen(rtrim($sub));
+
+            // Removes '<<' and '>>'.
+            $sub = trim(preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
+
+            $position = 0;
+            $elements = Element::parse($sub, $document, $position);
+            $header   = new Header($elements, $document);
+
+            return $header;
+        }
+
+        return false;
+    }
+}
--- a/lib/PdfParser/Element/ElementXRef.php
+++ b/lib/PdfParser/Element/ElementXRef.php
@ -0,0 +1,98 @@
+<?php
+
+/**
+ * @file
+ *          This file is part of the PdfParser library.
+ *
+ * @author  Sébastien MALOT <sebastien@malot.fr>
+ * @date    2017-01-03
+ * @license LGPLv3
+ * @url     <https://github.com/smalot/pdfparser>
+ *
+ *  PdfParser is a pdf library written in PHP, extraction oriented.
+ *  Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
+ *
+ *  This program is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.
+ *  If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
+ *
+ */
+
+namespace Smalot\PdfParser\Element;
+
+use Smalot\PdfParser\Element;
+use Smalot\PdfParser\Document;
+
+/**
+ * Class ElementXRef
+ *
+ * @package Smalot\PdfParser\Element
+ */
+class ElementXRef extends Element
+{
+    /**
+     * @return string
+     */
+    public function getId()
+    {
+        return $this->getContent();
+    }
+
+    /**
+     * @return mixed
+     */
+    public function getObject()
+    {
+        return $this->document->getObjectById($this->getId());
+    }
+
+    /**
+     * @param mixed $value
+     *
+     * @return bool
+     */
+    public function equals($value)
+    {
+        $id = ($value instanceof ElementXRef) ? $value->getId() : $value;
+
+        return $this->getId() == $id;
+    }
+
+    /**
+     * @return string
+     */
+    public function __toString()
+    {
+        return '#Obj#' . $this->getId();
+    }
+
+    /**
+     * @param string   $content
+     * @param Document $document
+     * @param int      $offset
+     *
+     * @return bool|ElementXRef
+     */
+    public static function parse($content, Document $document = null, &$offset = 0)
+    {
+        if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
+            $id = $match['id'];
+            $offset += strpos($content, $id) + strlen($id);
+            $id = str_replace(' ', '_', rtrim($id, ' R'));
+
+            return new self($id, $document);
+        }
+
+        return false;
+    }
+}