init
This commit is contained in:
commit
72a26edcff
22092 changed files with 2101903 additions and 0 deletions
161
lib/PdfParser/Element/ElementArray.php
Normal file
161
lib/PdfParser/Element/ElementArray.php
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
use Smalot\PdfParser\Header;
|
||||
use Smalot\PdfParser\Object;
|
||||
|
||||
/**
|
||||
* Class ElementArray
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementArray extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct($value, $document);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
foreach ($this->value as $name => $element) {
|
||||
$this->resolveXRef($name);
|
||||
}
|
||||
|
||||
return parent::getContent();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
public function getRawContent()
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $deep
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getDetails($deep = true)
|
||||
{
|
||||
$values = array();
|
||||
$elements = $this->getContent();
|
||||
|
||||
foreach ($elements as $key => $element) {
|
||||
if ($element instanceof Header && $deep) {
|
||||
$values[$key] = $element->getDetails($deep);
|
||||
} elseif ($element instanceof Object && $deep) {
|
||||
$values[$key] = $element->getDetails(false);
|
||||
} elseif ($element instanceof ElementArray) {
|
||||
if ($deep) {
|
||||
$values[$key] = $element->getDetails();
|
||||
}
|
||||
} elseif ($element instanceof Element && !($element instanceof ElementArray)) {
|
||||
$values[$key] = $element->getContent();
|
||||
}
|
||||
}
|
||||
|
||||
return $values;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return implode(',', $this->value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
*
|
||||
* @return Element|Object
|
||||
*/
|
||||
protected function resolveXRef($name)
|
||||
{
|
||||
if (($obj = $this->value[$name]) instanceof ElementXRef) {
|
||||
/** @var Object $obj */
|
||||
$obj = $this->document->getObjectById($obj->getId());
|
||||
$this->value[$name] = $obj;
|
||||
}
|
||||
|
||||
return $this->value[$name];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementArray
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
|
||||
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
|
||||
|
||||
$level = 0;
|
||||
$sub = '';
|
||||
foreach ($matches[0] as $part) {
|
||||
$sub .= $part;
|
||||
$level += (strpos($part, '[') !== false ? 1 : -1);
|
||||
if ($level <= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Removes 1 level [ and ].
|
||||
$sub = substr(trim($sub), 1, -1);
|
||||
$sub_offset = 0;
|
||||
$values = Element::parse($sub, $document, $sub_offset, true);
|
||||
|
||||
$offset += strpos($content, '[') + 1;
|
||||
// Find next ']' position
|
||||
$offset += strlen($sub) + 1;
|
||||
|
||||
return new self($values, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
88
lib/PdfParser/Element/ElementBoolean.php
Normal file
88
lib/PdfParser/Element/ElementBoolean.php
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementBoolean
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementBoolean extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct((strtolower($value) == 'true' || $value === true), null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return $this->value ? 'true' : 'false';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
return ($this->getContent() === $value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementBoolean
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
|
||||
$value = $match['value'];
|
||||
$offset += strpos($content, $value) + strlen($value);
|
||||
|
||||
return new self($value, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
155
lib/PdfParser/Element/ElementDate.php
Normal file
155
lib/PdfParser/Element/ElementDate.php
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHPi, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementDate
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementDate extends ElementString
|
||||
{
|
||||
/**
|
||||
* @var array
|
||||
*/
|
||||
protected static $formats = array(
|
||||
4 => 'Y',
|
||||
6 => 'Ym',
|
||||
8 => 'Ymd',
|
||||
10 => 'YmdH',
|
||||
12 => 'YmdHi',
|
||||
14 => 'YmdHis',
|
||||
15 => 'YmdHise',
|
||||
17 => 'YmdHisO',
|
||||
18 => 'YmdHisO',
|
||||
19 => 'YmdHisO',
|
||||
);
|
||||
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $format = 'c';
|
||||
|
||||
/**
|
||||
* @param \DateTime $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
if (!($value instanceof \DateTime)) {
|
||||
throw new \Exception('DateTime required.');
|
||||
}
|
||||
|
||||
parent::__construct($value, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $format
|
||||
*/
|
||||
public function setFormat($format)
|
||||
{
|
||||
$this->format = $format;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
if ($value instanceof \DateTime) {
|
||||
$timestamp = $value->getTimeStamp();
|
||||
} else {
|
||||
$timestamp = strtotime($value);
|
||||
}
|
||||
|
||||
return ($timestamp == $this->value->getTimeStamp());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return (string)($this->value->format($this->format));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementDate
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
|
||||
$name = $match['name'];
|
||||
$name = str_replace("'", '', $name);
|
||||
$date = false;
|
||||
|
||||
// Smallest format : Y
|
||||
// Full format : YmdHisP
|
||||
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
|
||||
if ($pos = strpos($name, 'Z')) {
|
||||
$name = substr($name, 0, $pos + 1);
|
||||
} elseif (strlen($name) == 18 && preg_match('/[^\+-]0000$/', $name)) {
|
||||
$name = substr($name, 0, -4) . '+0000';
|
||||
}
|
||||
|
||||
$format = self::$formats[strlen($name)];
|
||||
$date = \DateTime::createFromFormat($format, $name);
|
||||
} else {
|
||||
// special cases
|
||||
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
|
||||
$name = str_replace(',', '', $name);
|
||||
$format = 'n-j-Y H:i:sO';
|
||||
$date = \DateTime::createFromFormat($format, $name);
|
||||
}
|
||||
}
|
||||
|
||||
if (!$date) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$offset += strpos($content, '(D:') + strlen($match['name']) + 4; // 1 for '(D:' and ')'
|
||||
$element = new self($date, $document);
|
||||
|
||||
return $element;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
93
lib/PdfParser/Element/ElementHexa.php
Normal file
93
lib/PdfParser/Element/ElementHexa.php
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementHexa
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementHexa extends ElementString
|
||||
{
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementHexa
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
|
||||
$name = $match['name'];
|
||||
$offset += strpos($content, '<' . $name) + strlen($name) + 2; // 1 for '>'
|
||||
// repackage string as standard
|
||||
$name = '(' . self::decode($name, $document) . ')';
|
||||
$element = false;
|
||||
|
||||
if (!($element = ElementDate::parse($name, $document))) {
|
||||
$element = ElementString::parse($name, $document);
|
||||
}
|
||||
|
||||
return $element;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public static function decode($value, Document $document = null)
|
||||
{
|
||||
$text = '';
|
||||
$length = strlen($value);
|
||||
|
||||
if (substr($value, 0, 2) == '00') {
|
||||
for ($i = 0; $i < $length; $i += 4) {
|
||||
$hex = substr($value, $i, 4);
|
||||
$text .= '&#' . str_pad(hexdec($hex), 4, '0', STR_PAD_LEFT) . ';';
|
||||
}
|
||||
} else {
|
||||
for ($i = 0; $i < $length; $i += 2) {
|
||||
$hex = substr($value, $i, 2);
|
||||
$text .= chr(hexdec($hex));
|
||||
}
|
||||
}
|
||||
|
||||
$text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8');
|
||||
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
85
lib/PdfParser/Element/ElementMissing.php
Normal file
85
lib/PdfParser/Element/ElementMissing.php
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementMissing
|
||||
*/
|
||||
class ElementMissing extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct(null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function contains($value)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return bool
|
||||
*/
|
||||
public function getContent()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return '';
|
||||
}
|
||||
}
|
||||
82
lib/PdfParser/Element/ElementName.php
Normal file
82
lib/PdfParser/Element/ElementName.php
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
use Smalot\PdfParser\Font;
|
||||
|
||||
/**
|
||||
* Class ElementName
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementName extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct($value, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
return $value == $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementName
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*\/(?P<name>[A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
|
||||
$name = $match['name'];
|
||||
$offset += strpos($content, $name) + strlen($name);
|
||||
$name = Font::decodeEntities($name);
|
||||
|
||||
return new self($name, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
87
lib/PdfParser/Element/ElementNull.php
Normal file
87
lib/PdfParser/Element/ElementNull.php
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementNull
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementNull extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct(null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return 'null';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
return ($this->getContent() === $value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementNull
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*(null)/s', $content, $match)) {
|
||||
$offset += strpos($content, 'null') + strlen('null');
|
||||
|
||||
return new self(null, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
70
lib/PdfParser/Element/ElementNumeric.php
Normal file
70
lib/PdfParser/Element/ElementNumeric.php
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementNumeric
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementNumeric extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct(floatval($value), null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementNumeric
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
|
||||
$value = $match['value'];
|
||||
$offset += strpos($content, $value) + strlen($value);
|
||||
|
||||
return new self($value, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
106
lib/PdfParser/Element/ElementString.php
Normal file
106
lib/PdfParser/Element/ElementString.php
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
use Smalot\PdfParser\Font;
|
||||
|
||||
/**
|
||||
* Class ElementString
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementString extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $value
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct($value, Document $document = null)
|
||||
{
|
||||
parent::__construct($value, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
return $value == $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementString
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
|
||||
$name = $match['name'];
|
||||
|
||||
// Find next ')' not escaped.
|
||||
$cur_start_text = $start_search_end = 0;
|
||||
while (($cur_start_pos = strpos($name, ')', $start_search_end)) !== false) {
|
||||
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
|
||||
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
|
||||
if (!(strlen($match['escape']) % 2)) {
|
||||
break;
|
||||
}
|
||||
$start_search_end = $cur_start_pos + 1;
|
||||
}
|
||||
|
||||
// Extract string.
|
||||
$name = substr($name, 0, $cur_start_pos);
|
||||
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
|
||||
$name = str_replace(
|
||||
array('\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'),
|
||||
array('\\', ' ', '/', '(', ')', "\n", "\r", "\t"),
|
||||
$name
|
||||
);
|
||||
|
||||
// Decode string.
|
||||
$name = Font::decodeOctal($name);
|
||||
$name = Font::decodeEntities($name);
|
||||
$name = Font::decodeHexadecimal($name, false);
|
||||
$name = Font::decodeUnicode($name);
|
||||
|
||||
return new self($name, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
80
lib/PdfParser/Element/ElementStruct.php
Normal file
80
lib/PdfParser/Element/ElementStruct.php
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
use Smalot\PdfParser\Header;
|
||||
|
||||
/**
|
||||
* Class ElementStruct
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementStruct extends Element
|
||||
{
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementStruct
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
|
||||
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
|
||||
|
||||
$level = 0;
|
||||
$sub = '';
|
||||
foreach ($matches[0] as $part) {
|
||||
$sub .= $part;
|
||||
$level += (strpos($part, '<<') !== false ? 1 : -1);
|
||||
if ($level <= 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$offset += strpos($content, '<<') + strlen(rtrim($sub));
|
||||
|
||||
// Removes '<<' and '>>'.
|
||||
$sub = trim(preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
|
||||
|
||||
$position = 0;
|
||||
$elements = Element::parse($sub, $document, $position);
|
||||
$header = new Header($elements, $document);
|
||||
|
||||
return $header;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
98
lib/PdfParser/Element/ElementXRef.php
Normal file
98
lib/PdfParser/Element/ElementXRef.php
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This file is part of the PdfParser library.
|
||||
*
|
||||
* @author Sébastien MALOT <sebastien@malot.fr>
|
||||
* @date 2017-01-03
|
||||
* @license LGPLv3
|
||||
* @url <https://github.com/smalot/pdfparser>
|
||||
*
|
||||
* PdfParser is a pdf library written in PHP, extraction oriented.
|
||||
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public License
|
||||
* along with this program.
|
||||
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
|
||||
*
|
||||
*/
|
||||
|
||||
namespace Smalot\PdfParser\Element;
|
||||
|
||||
use Smalot\PdfParser\Element;
|
||||
use Smalot\PdfParser\Document;
|
||||
|
||||
/**
|
||||
* Class ElementXRef
|
||||
*
|
||||
* @package Smalot\PdfParser\Element
|
||||
*/
|
||||
class ElementXRef extends Element
|
||||
{
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getId()
|
||||
{
|
||||
return $this->getContent();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
public function getObject()
|
||||
{
|
||||
return $this->document->getObjectById($this->getId());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function equals($value)
|
||||
{
|
||||
$id = ($value instanceof ElementXRef) ? $value->getId() : $value;
|
||||
|
||||
return $this->getId() == $id;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return '#Obj#' . $this->getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $content
|
||||
* @param Document $document
|
||||
* @param int $offset
|
||||
*
|
||||
* @return bool|ElementXRef
|
||||
*/
|
||||
public static function parse($content, Document $document = null, &$offset = 0)
|
||||
{
|
||||
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
|
||||
$id = $match['id'];
|
||||
$offset += strpos($content, $id) + strlen($id);
|
||||
$id = str_replace(' ', '_', rtrim($id, ' R'));
|
||||
|
||||
return new self($id, $document);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue