This commit is contained in:
steven 2025-08-11 22:23:30 +02:00
commit 72a26edcff
22092 changed files with 2101903 additions and 0 deletions

283
lib/PdfParser/Document.php Normal file
View file

@ -0,0 +1,283 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementDate;
/**
* Technical references :
* - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
* - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
* - http://www.php.net/manual/en/ref.pdf.php#74211
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
*
* Class Document
*
* @package Smalot\PdfParser
*/
class Document
{
/**
* @var Object[]
*/
protected $objects = array();
/**
* @var array
*/
protected $dictionary = array();
/**
* @var Header
*/
protected $trailer = null;
/**
* @var array
*/
protected $details = null;
/**
*
*/
public function __construct()
{
$this->trailer = new Header(array(), $this);
}
/**
*
*/
public function init()
{
$this->buildDictionary();
$this->buildDetails();
// Propagate init to objects.
foreach ($this->objects as $object) {
$object->init();
}
}
/**
* Build dictionary based on type header field.
*/
protected function buildDictionary()
{
// Build dictionary.
$this->dictionary = array();
foreach ($this->objects as $id => $object) {
$type = $object->getHeader()->get('Type')->getContent();
if (!empty($type)) {
$this->dictionary[$type][$id] = $id;
}
}
}
/**
* Build details array.
*/
protected function buildDetails()
{
// Build details array.
$details = array();
// Extract document info
if ($this->trailer->has('Info')) {
/** @var Object $info */
$info = $this->trailer->get('Info');
if ($info !== null) {
$details = $info->getHeader()->getDetails();
}
}
// Retrieve the page count
try {
$pages = $this->getPages();
$details['Pages'] = count($pages);
} catch (\Exception $e) {
$details['Pages'] = 0;
}
$this->details = $details;
}
/**
* @return array
*/
public function getDictionary()
{
return $this->dictionary;
}
/**
* @param Object[] $objects
*/
public function setObjects($objects = array())
{
$this->objects = (array)$objects;
$this->init();
}
/**
* @return Object[]
*/
public function getObjects()
{
return $this->objects;
}
/**
* @param string $id
*
* @return Object
*/
public function getObjectById($id)
{
if (isset($this->objects[$id])) {
return $this->objects[$id];
} else {
return null;
}
}
/**
* @param string $type
* @param string $subtype
*
* @return Object[]
*/
public function getObjectsByType($type, $subtype = null)
{
$objects = array();
foreach ($this->objects as $id => $object) {
if ($object->getHeader()->get('Type') == $type &&
(is_null($subtype) || $object->getHeader()->get('Subtype') == $subtype)
) {
$objects[$id] = $object;
}
}
return $objects;
}
/**
* @return \Object[]
*/
public function getFonts()
{
return $this->getObjectsByType('Font');
}
/**
* @return Page[]
* @throws \Exception
*/
public function getPages()
{
if (isset($this->dictionary['Catalog'])) {
// Search for catalog to list pages.
$id = reset($this->dictionary['Catalog']);
/** @var Pages $object */
$object = $this->objects[$id]->get('Pages');
if (method_exists($object, 'getPages')) {
$pages = $object->getPages(true);
return $pages;
}
}
if (isset($this->dictionary['Pages'])) {
// Search for pages to list kids.
$pages = array();
/** @var Pages[] $objects */
$objects = $this->getObjectsByType('Pages');
foreach ($objects as $object) {
$pages = array_merge($pages, $object->getPages(true));
}
return $pages;
}
if (isset($this->dictionary['Page'])) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');
return array_values($pages);
}
throw new \Exception('Missing catalog.');
}
/**
* @param Page $page
*
* @return string
*/
public function getText(Page $page = null)
{
$texts = array();
$pages = $this->getPages();
foreach ($pages as $index => $page) {
if ($text = trim($page->getText())) {
$texts[] = $text;
}
}
return implode("\n\n", $texts);
}
/**
* @param Header $trailer
*/
public function setTrailer(Header $trailer)
{
$this->trailer = $trailer;
}
/**
* @return array
*/
public function getDetails($deep = true)
{
return $this->details;
}
}

185
lib/PdfParser/Element.php Normal file
View file

@ -0,0 +1,185 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Element
*
* @package Smalot\PdfParser
*/
class Element
{
/**
* @var Document
*/
protected $document = null;
/**
* @var mixed
*/
protected $value = null;
/**
* @param mixed $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
$this->value = $value;
$this->document = $document;
}
/**
*
*/
public function init()
{
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return ($value == $this->value);
}
/**
* @param mixed $value
*
* @return bool
*/
public function contains($value)
{
if (is_array($this->value)) {
/** @var Element $val */
foreach ($this->value as $val) {
if ($val->equals($value)) {
return true;
}
}
return false;
} else {
return $this->equals($value);
}
}
/**
* @return mixed
*/
public function getContent()
{
return $this->value;
}
/**
* @return string
*/
public function __toString()
{
return (string)($this->value);
}
/**
* @param string $content
* @param Document $document
* @param int $position
*
* @return array
* @throws \Exception
*/
public static function parse($content, Document $document = null, &$position = 0)
{
$args = func_get_args();
$only_values = isset($args[3]) ? $args[3] : false;
$content = trim($content);
$values = array();
do {
$old_position = $position;
if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
break;
} else {
$name = ltrim($match['name'], '/');
$value = $match['value'];
$position = strpos($content, $value, $position + strlen($match['name']));
}
} else {
$name = count($values);
$value = substr($content, $position);
}
if ($element = ElementName::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementXRef::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNumeric::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementStruct::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementBoolean::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNull::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementDate::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementString::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementHexa::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementArray::parse($value, $document, $position)) {
$values[$name] = $element;
} else {
$position = $old_position;
break;
}
} while ($position < strlen($content));
return $values;
}
}

View file

@ -0,0 +1,161 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Object;
/**
* Class ElementArray
*
* @package Smalot\PdfParser\Element
*/
class ElementArray extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct($value, $document);
}
/**
* @return mixed
*/
public function getContent()
{
foreach ($this->value as $name => $element) {
$this->resolveXRef($name);
}
return parent::getContent();
}
/**
* @return array
*/
public function getRawContent()
{
return $this->value;
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
$values = array();
$elements = $this->getContent();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof Object && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element && !($element instanceof ElementArray)) {
$values[$key] = $element->getContent();
}
}
return $values;
}
/**
* @return string
*/
public function __toString()
{
return implode(',', $this->value);
}
/**
* @param string $name
*
* @return Element|Object
*/
protected function resolveXRef($name)
{
if (($obj = $this->value[$name]) instanceof ElementXRef) {
/** @var Object $obj */
$obj = $this->document->getObjectById($obj->getId());
$this->value[$name] = $obj;
}
return $this->value[$name];
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementArray
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (strpos($part, '[') !== false ? 1 : -1);
if ($level <= 0) {
break;
}
}
// Removes 1 level [ and ].
$sub = substr(trim($sub), 1, -1);
$sub_offset = 0;
$values = Element::parse($sub, $document, $sub_offset, true);
$offset += strpos($content, '[') + 1;
// Find next ']' position
$offset += strlen($sub) + 1;
return new self($values, $document);
}
return false;
}
}

View file

@ -0,0 +1,88 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementBoolean
*
* @package Smalot\PdfParser\Element
*/
class ElementBoolean extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct((strtolower($value) == 'true' || $value === true), null);
}
/**
* @return string
*/
public function __toString()
{
return $this->value ? 'true' : 'false';
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return ($this->getContent() === $value);
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementBoolean
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + strlen($value);
return new self($value, $document);
}
return false;
}
}

View file

@ -0,0 +1,155 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHPi, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementDate
*
* @package Smalot\PdfParser\Element
*/
class ElementDate extends ElementString
{
/**
* @var array
*/
protected static $formats = array(
4 => 'Y',
6 => 'Ym',
8 => 'Ymd',
10 => 'YmdH',
12 => 'YmdHi',
14 => 'YmdHis',
15 => 'YmdHise',
17 => 'YmdHisO',
18 => 'YmdHisO',
19 => 'YmdHisO',
);
/**
* @var string
*/
protected $format = 'c';
/**
* @param \DateTime $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
if (!($value instanceof \DateTime)) {
throw new \Exception('DateTime required.');
}
parent::__construct($value, null);
}
/**
* @param string $format
*/
public function setFormat($format)
{
$this->format = $format;
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
if ($value instanceof \DateTime) {
$timestamp = $value->getTimeStamp();
} else {
$timestamp = strtotime($value);
}
return ($timestamp == $this->value->getTimeStamp());
}
/**
* @return string
*/
public function __toString()
{
return (string)($this->value->format($this->format));
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementDate
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
$name = $match['name'];
$name = str_replace("'", '', $name);
$date = false;
// Smallest format : Y
// Full format : YmdHisP
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
if ($pos = strpos($name, 'Z')) {
$name = substr($name, 0, $pos + 1);
} elseif (strlen($name) == 18 && preg_match('/[^\+-]0000$/', $name)) {
$name = substr($name, 0, -4) . '+0000';
}
$format = self::$formats[strlen($name)];
$date = \DateTime::createFromFormat($format, $name);
} else {
// special cases
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
$name = str_replace(',', '', $name);
$format = 'n-j-Y H:i:sO';
$date = \DateTime::createFromFormat($format, $name);
}
}
if (!$date) {
return false;
}
$offset += strpos($content, '(D:') + strlen($match['name']) + 4; // 1 for '(D:' and ')'
$element = new self($date, $document);
return $element;
}
return false;
}
}

View file

@ -0,0 +1,93 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementHexa
*
* @package Smalot\PdfParser\Element
*/
class ElementHexa extends ElementString
{
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementHexa
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, '<' . $name) + strlen($name) + 2; // 1 for '>'
// repackage string as standard
$name = '(' . self::decode($name, $document) . ')';
$element = false;
if (!($element = ElementDate::parse($name, $document))) {
$element = ElementString::parse($name, $document);
}
return $element;
}
return false;
}
/**
* @param string $value
* @param Document $document
*/
public static function decode($value, Document $document = null)
{
$text = '';
$length = strlen($value);
if (substr($value, 0, 2) == '00') {
for ($i = 0; $i < $length; $i += 4) {
$hex = substr($value, $i, 4);
$text .= '&#' . str_pad(hexdec($hex), 4, '0', STR_PAD_LEFT) . ';';
}
} else {
for ($i = 0; $i < $length; $i += 2) {
$hex = substr($value, $i, 2);
$text .= chr(hexdec($hex));
}
}
$text = html_entity_decode($text, ENT_NOQUOTES, 'UTF-8');
return $text;
}
}

View file

@ -0,0 +1,85 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementMissing
*/
class ElementMissing extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct(null, null);
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return false;
}
/**
* @param mixed $value
*
* @return bool
*/
public function contains($value)
{
return false;
}
/**
* @return bool
*/
public function getContent()
{
return false;
}
/**
* @return string
*/
public function __toString()
{
return '';
}
}

View file

@ -0,0 +1,82 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Font;
/**
* Class ElementName
*
* @package Smalot\PdfParser\Element
*/
class ElementName extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct($value, null);
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return $value == $this->value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementName
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\/(?P<name>[A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, $name) + strlen($name);
$name = Font::decodeEntities($name);
return new self($name, $document);
}
return false;
}
}

View file

@ -0,0 +1,87 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementNull
*
* @package Smalot\PdfParser\Element
*/
class ElementNull extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct(null, null);
}
/**
* @return string
*/
public function __toString()
{
return 'null';
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return ($this->getContent() === $value);
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementNull
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(null)/s', $content, $match)) {
$offset += strpos($content, 'null') + strlen('null');
return new self(null, $document);
}
return false;
}
}

View file

@ -0,0 +1,70 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementNumeric
*
* @package Smalot\PdfParser\Element
*/
class ElementNumeric extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct(floatval($value), null);
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementNumeric
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + strlen($value);
return new self($value, $document);
}
return false;
}
}

View file

@ -0,0 +1,106 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Font;
/**
* Class ElementString
*
* @package Smalot\PdfParser\Element
*/
class ElementString extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct($value, null);
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
return $value == $this->value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementString
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
$name = $match['name'];
// Find next ')' not escaped.
$cur_start_text = $start_search_end = 0;
while (($cur_start_pos = strpos($name, ')', $start_search_end)) !== false) {
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
if (!(strlen($match['escape']) % 2)) {
break;
}
$start_search_end = $cur_start_pos + 1;
}
// Extract string.
$name = substr($name, 0, $cur_start_pos);
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
$name = str_replace(
array('\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'),
array('\\', ' ', '/', '(', ')', "\n", "\r", "\t"),
$name
);
// Decode string.
$name = Font::decodeOctal($name);
$name = Font::decodeEntities($name);
$name = Font::decodeHexadecimal($name, false);
$name = Font::decodeUnicode($name);
return new self($name, $document);
}
return false;
}
}

View file

@ -0,0 +1,80 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Header;
/**
* Class ElementStruct
*
* @package Smalot\PdfParser\Element
*/
class ElementStruct extends Element
{
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementStruct
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (strpos($part, '<<') !== false ? 1 : -1);
if ($level <= 0) {
break;
}
}
$offset += strpos($content, '<<') + strlen(rtrim($sub));
// Removes '<<' and '>>'.
$sub = trim(preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
$position = 0;
$elements = Element::parse($sub, $document, $position);
$header = new Header($elements, $document);
return $header;
}
return false;
}
}

View file

@ -0,0 +1,98 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementXRef
*
* @package Smalot\PdfParser\Element
*/
class ElementXRef extends Element
{
/**
* @return string
*/
public function getId()
{
return $this->getContent();
}
/**
* @return mixed
*/
public function getObject()
{
return $this->document->getObjectById($this->getId());
}
/**
* @param mixed $value
*
* @return bool
*/
public function equals($value)
{
$id = ($value instanceof ElementXRef) ? $value->getId() : $value;
return $this->getId() == $id;
}
/**
* @return string
*/
public function __toString()
{
return '#Obj#' . $this->getId();
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementXRef
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
$id = $match['id'];
$offset += strpos($content, $id) + strlen($id);
$id = str_replace(' ', '_', rtrim($id, ' R'));
return new self($id, $document);
}
return false;
}
}

142
lib/PdfParser/Encoding.php Normal file
View file

@ -0,0 +1,142 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementNumeric;
/**
* Class Encoding
*
* @package Smalot\PdfParser
*/
class Encoding extends Object
{
/**
* @var array
*/
protected $encoding;
/**
* @var array
*/
protected $differences;
/**
* @var array
*/
protected $mapping;
/**
*
*/
public function init()
{
$this->mapping = array();
$this->differences = array();
$this->encoding = null;
if ($this->has('BaseEncoding')) {
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\' . $baseEncoding;
if (class_exists($className)) {
$class = new $className();
$this->encoding = $class->getTranslations();
} else {
throw new \Exception('Missing encoding data for: "' . $baseEncoding . '".');
}
// Build table including differences.
$differences = $this->get('Differences')->getContent();
$code = 0;
if (!is_array($differences)) {
return;
}
foreach ($differences as $difference) {
/** @var ElementNumeric $difference */
if ($difference instanceof ElementNumeric) {
$code = $difference->getContent();
continue;
}
// ElementName
if (is_object($difference)) {
$this->differences[$code] = $difference->getContent();
} else {
$this->differences[$code] = $difference;
}
// For the next char.
$code++;
}
// Build final mapping (custom => standard).
$table = array_flip(array_reverse($this->encoding, true));
foreach ($this->differences as $code => $difference) {
/** @var string $difference */
$this->mapping[$code] = (isset($table[$difference]) ? $table[$difference] : Font::MISSING);
}
}
}
/**
* @return array
*/
public function getDetails($deep = true)
{
$details = array();
$details['BaseEncoding'] = ($this->has('BaseEncoding') ? (string)$this->get('BaseEncoding') : 'Ansi');
$details['Differences'] = ($this->has('Differences') ? (string)$this->get('Differences') : '');
$details += parent::getDetails($deep);
return $details;
}
/**
* @param int $char
*
* @return int
*/
public function translateChar($dec)
{
if (isset($this->mapping[$dec])) {
$dec = $this->mapping[$dec];
}
return $dec;
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin1Encoding
*
* @package Smalot\PdfParser\Encoding
*/
class ISOLatin1Encoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'space exclam quotedbl numbersign dollar percent ampersand quoteright ' .
'parenleft parenright asterisk plus comma minus period slash zero one ' .
'two three four five six seven eight nine colon semicolon less equal ' .
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X ' .
'Y Z bracketleft backslash bracketright asciicircum underscore ' .
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z ' .
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute ' .
'circumflex tilde macron breve dotaccent dieresis .notdef ring ' .
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent ' .
'sterling currency yen brokenbar section dieresis copyright ' .
'ordfeminine guillemotleft logicalnot hyphen registered macron degree ' .
'plusminus twosuperior threesuperior acute mu paragraph ' .
'periodcentered cedilla onesuperior ordmasculine guillemotright ' .
'onequarter onehalf threequarters questiondown Agrave Aacute ' .
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute ' .
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde ' .
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave ' .
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute ' .
'acircumflex atilde adieresis aring ae ccedilla egrave eacute ' .
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde ' .
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave ' .
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin9Encoding
*
* @package Smalot\PdfParser\Encoding
*/
class ISOLatin9Encoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'space exclam quotedbl numbersign dollar percent ampersand quoteright ' .
'parenleft parenright asterisk plus comma minus period slash zero one ' .
'two three four five six seven eight nine colon semicolon less equal ' .
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X ' .
'Y Z bracketleft backslash bracketright asciicircum underscore ' .
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z ' .
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute ' .
'circumflex tilde macron breve dotaccent dieresis .notdef ring ' .
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent ' .
'sterling Euro yen Scaron section scaron copyright ' .
'ordfeminine guillemotleft logicalnot hyphen registered macron degree ' .
'plusminus twosuperior threesuperior Zcaron mu paragraph ' .
'periodcentered zcaron onesuperior ordmasculine guillemotright ' .
'OE oe Ydieresis questiondown Agrave Aacute ' .
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute ' .
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde ' .
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave ' .
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute ' .
'acircumflex atilde adieresis aring ae ccedilla egrave eacute ' .
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde ' .
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave ' .
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,80 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
// Source : http://www.opensource.apple.com/source/vim/vim-34/vim/runtime/print/mac-roman.ps
namespace Smalot\PdfParser\Encoding;
/**
* Class MacRomanEncoding
*
* @package Smalot\PdfParser\Encoding
*/
class MacRomanEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'space exclam quotedbl numbersign dollar percent ampersand quotesingle ' .
'parenleft parenright asterisk plus comma minus period slash ' .
'zero one two three four five six seven ' .
'eight nine colon semicolon less equal greater question ' .
'at A B C D E F G ' .
'H I J K L M N O ' .
'P Q R S T U V W ' .
'X Y Z bracketleft backslash bracketright asciicircum underscore ' .
'grave a b c d e f g ' .
'h i j k l m n o ' .
'p q r s t u v w ' .
'x y z braceleft bar braceright asciitilde .notdef ' .
'Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute ' .
'agrave acircumflex adieresis atilde aring ccedilla eacute egrave ' .
'ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute ' .
'ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis ' .
'dagger degree cent sterling section bullet paragraph germandbls ' .
'registered copyright trademark acute dieresis notequal AE Oslash ' .
'infinity plusminus lessequal greaterequal yen mu partialdiff summation ' .
'Pi pi integral ordfeminine ordmasculine Omega ae oslash ' .
'questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft ' .
'guillemotright ellipsis space Agrave Atilde Otilde OE oe ' .
'endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge ' .
'ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl ' .
'daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute ' .
'Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex ' .
'heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde ' .
'macron breve dotaccent ring cedilla hungarumlaut ogonek caron';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class StandardEncoding
*
* @package Smalot\PdfParser\Encoding
*/
class StandardEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'space exclam quotedbl numbersign dollar percent ampersand quoteright ' .
'parenleft parenright asterisk plus comma hyphen period slash zero ' .
'one two three four five six seven eight nine colon semicolon less ' .
'equal greater question at A B C D E F G H I J K L M N O P Q R S T U ' .
'V W X Y Z bracketleft backslash bracketright asciicircum underscore ' .
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z ' .
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef exclamdown cent ' .
'sterling fraction yen florin section currency quotesingle ' .
'quotedblleft guillemotleft guilsinglleft guilsinglright fi fl ' .
'.notdef endash dagger daggerdbl periodcentered .notdef paragraph ' .
'bullet quotesinglbase quotedblbase quotedblright guillemotright ' .
'ellipsis perthousand .notdef questiondown .notdef grave acute ' .
'circumflex tilde macron breve dotaccent dieresis .notdef ring ' .
'cedilla .notdef hungarumlaut ogonek caron emdash .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef AE .notdef ' .
'ordfeminine .notdef .notdef .notdef .notdef Lslash Oslash OE ' .
'ordmasculine .notdef .notdef .notdef .notdef .notdef ae .notdef ' .
'.notdef .notdef dotlessi .notdef .notdef lslash oslash oe germandbls ' .
'.notdef .notdef .notdef .notdef';
return explode(' ', $encoding);
}
}

View file

@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinANSIEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class WinAnsiEncoding
*
* @package Smalot\PdfParser\Encoding
*/
class WinAnsiEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef ' .
'space exclam quotedbl numbersign dollar percent ampersand quotesingle ' .
'parenleft parenright asterisk plus comma hyphen period slash zero one ' .
'two three four five six seven eight nine colon semicolon less equal ' .
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X ' .
'Y Z bracketleft backslash bracketright asciicircum underscore ' .
'grave a b c d e f g h i j k l m n o p q r s t u v w x y z ' .
'braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase ' .
'florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand ' .
'Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright ' .
'quotedblleft quotedblright bullet endash emdash tilde trademark scaron ' .
'guilsinglright oe bullet zcaron Ydieresis space exclamdown cent ' .
'sterling currency yen brokenbar section dieresis copyright ' .
'ordfeminine guillemotleft logicalnot hyphen registered macron degree ' .
'plusminus twosuperior threesuperior acute mu paragraph ' .
'periodcentered cedilla onesuperior ordmasculine guillemotright ' .
'onequarter onehalf threequarters questiondown Agrave Aacute ' .
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute ' .
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde ' .
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave ' .
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute ' .
'acircumflex atilde adieresis aring ae ccedilla egrave eacute ' .
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde ' .
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave ' .
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

517
lib/PdfParser/Font.php Normal file
View file

@ -0,0 +1,517 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
/**
* Class Font
*
* @package Smalot\PdfParser
*/
class Font extends Object
{
/**
*
*/
const MISSING = '?';
/**
* @var array
*/
protected $table = null;
/**
* @var array
*/
protected $tableSizes = null;
/**
*
*/
public function init()
{
// Load translate table.
$this->loadTranslateTable();
}
/**
* @return string
*/
public function getName()
{
return $this->has('BaseFont') ? (string)$this->get('BaseFont') : '[Unknown]';
}
/**
* @return string
*/
public function getType()
{
return (string)$this->header->get('Subtype');
}
/**
* @return array
*/
public function getDetails($deep = true)
{
$details = array();
$details['Name'] = $this->getName();
$details['Type'] = $this->getType();
$details['Encoding'] = ($this->has('Encoding') ? (string)$this->get('Encoding') : 'Ansi');
$details += parent::getDetails($deep);
return $details;
}
/**
* @param string $char
* @param bool $use_default
*
* @return string
*/
public function translateChar($char, $use_default = true)
{
$dec = hexdec(bin2hex($char));
if (array_key_exists($dec, $this->table)) {
$char = $this->table[$dec];
} else {
$char = ($use_default ? self::MISSING : $char);
}
return $char;
}
/**
* @param int $code
*
* @return string
*/
public static function uchr($code)
{
return html_entity_decode('&#' . ((int)$code) . ';', ENT_NOQUOTES, 'UTF-8');
}
/**
* @return array
*/
public function loadTranslateTable()
{
if (!is_null($this->table)) {
return $this->table;
}
$this->table = array();
$this->tableSizes = array(
'from' => 1,
'to' => 1,
);
if ($this->has('ToUnicode')) {
$content = $this->get('ToUnicode')->getContent();
$matches = array();
// Support for multiple spacerange sections
if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes = array(
'from' => max(1, strlen(current($matches['from'])) / 2),
'to' => max(1, strlen(current($matches['to'])) / 2),
);
break;
}
}
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes['from'] = max(1, strlen(current($matches['from'])) / 2);
foreach ($matches['from'] as $key => $from) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$matches['to'][$key],
0,
PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[hexdec($from)] = $text;
}
}
}
// Support for multiple bfrange sections
if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
// Support for : <srcCode1> <srcCode2> <dstString>
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$char_to = hexdec($matches['to'][$key]);
$offset = hexdec($matches['offset'][$key]);
for ($char = $char_from; $char <= $char_to; $char++) {
$this->table[$char] = self::uchr($char - $char_from + $offset);
}
}
// Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
// Some PDF file has 2-byte Unicode values on new lines > added \r\n
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$strings = array();
preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
foreach ($strings['string'] as $position => $string) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$string,
0,
PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[$char_from + $position] = $text;
}
}
}
}
}
return $this->table;
}
/**
* @param array $table
*/
public function setTable($table)
{
$this->table = $table;
}
/**
* @param string $hexa
* @param bool $add_braces
*
* @return string
*/
public static function decodeHexadecimal($hexa, $add_braces = false)
{
$text = '';
$parts = preg_split('/(<[a-z0-9]+>)/si', $hexa, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (preg_match('/^<.*>$/', $part) && strpos($part, '<?xml') === false) {
$part = trim($part, '<>');
if ($add_braces) {
$text .= '(';
}
$part = pack('H*', $part);
$text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
if ($add_braces) {
$text .= ')';
}
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param string $text
*
* @return string
*/
public static function decodeOctal($text)
{
$parts = preg_split('/(\\\\\d{3})/s', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^\\\\\d{3}$/', $part)) {
$text .= chr(octdec(trim($part, '\\')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param $text
*
* @return string
*/
public static function decodeEntities($text)
{
$parts = preg_split('/(#\d{2})/s', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^#\d{2}$/', $part)) {
$text .= chr(hexdec(trim($part, '#')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param string $text
*
* @return string
*/
public static function decodeUnicode($text)
{
if (preg_match('/^\xFE\xFF/i', $text)) {
// Strip U+FEFF byte order marker.
$decode = substr($text, 2);
$text = '';
$length = strlen($decode);
for ($i = 0; $i < $length; $i += 2) {
$text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
}
}
return $text;
}
/**
* @return int
*/
protected function getFontSpaceLimit()
{
return -50;
}
/**
* @param array $commands
*
* @return string
*/
public function decodeText($commands)
{
$word_position = 0;
$words = array();
$unicode = false;
$font_space = $this->getFontSpaceLimit();
foreach ($commands as $command) {
switch ($command[Object::TYPE]) {
case 'n':
if (floatval(trim($command[Object::COMMAND])) < $font_space) {
$word_position = count($words);
}
continue(2);
case '<':
// Decode hexadecimal.
$text = self::decodeHexadecimal('<' . $command[Object::COMMAND] . '>');
if (mb_check_encoding($text, "UTF-8")) {
$unicode = true;
}
break;
default:
// Decode octal (if necessary).
$text = self::decodeOctal($command[Object::COMMAND]);
}
// replace escaped chars
$text = str_replace(
array('\\\\', '\(', '\)', '\n', '\r', '\t', '\ '),
array('\\', '(', ')', "\n", "\r", "\t", ' '),
$text
);
// add content to result string
if (isset($words[$word_position])) {
$words[$word_position] .= $text;
} else {
$words[$word_position] = $text;
}
}
foreach ($words as &$word) {
$loop_unicode = $unicode;
$word = $this->decodeContent($word, $loop_unicode);
}
return implode(' ', $words);
}
/**
* @param string $text
* @param bool $unicode
*
* @return string
*/
protected function decodeContent($text, &$unicode)
{
if ($this->has('ToUnicode')) {
$bytes = $this->tableSizes['from'];
if ($bytes) {
$result = '';
$length = strlen($text);
for ($i = 0; $i < $length; $i += $bytes) {
$char = substr($text, $i, $bytes);
if (($decoded = $this->translateChar($char, false)) !== false) {
$char = $decoded;
} elseif ($this->has('DescendantFonts')) {
if ($this->get('DescendantFonts') instanceof Object) {
$fonts = $this->get('DescendantFonts')->getHeader()->getElements();
} else {
$fonts = $this->get('DescendantFonts')->getContent();
}
$decoded = false;
foreach ($fonts as $font) {
if ($font instanceof Font) {
if (($decoded = $font->translateChar($char, false)) !== false) {
$decoded = @iconv('Windows-1252', 'UTF-8//TRANSLIT//IGNORE', $decoded);
break;
}
}
}
if ($decoded !== false) {
$char = $decoded;
} else {
$char = @iconv('Windows-1252', 'UTF-8//TRANSLIT//IGNORE', $char);
}
} else {
$char = self::MISSING;
}
$result .= $char;
}
$text = $result;
// By definition, this code generates unicode chars.
$unicode = true;
}
} elseif ($this->has('Encoding')) {
/** @var Encoding $encoding */
$encoding = $this->get('Encoding');
if ($encoding instanceof Encoding) {
if ($unicode) {
$chars = preg_split(
'//s' . ($unicode ? 'u' : ''),
$text,
-1,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
);
$result = '';
foreach ($chars as $char) {
$dec_av = hexdec(bin2hex($char));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap);
}
$text = $result;
} else {
$result = '';
$length = strlen($text);
for ($i = 0; $i < $length; $i++) {
$dec_av = hexdec(bin2hex($text[$i]));
$dec_ap = $encoding->translateChar($dec_av);
$result .= chr($dec_ap);
}
$text = $result;
if ($encoding->get('BaseEncoding')->equals('MacRomanEncoding')) {
$text = @iconv('Mac', 'UTF-8//TRANSLIT//IGNORE', $text);
return $text;
}
}
}
}
// Convert to unicode if not already done.
if (!$unicode) {
if ($this->get('Encoding') instanceof Element &&
$this->get('Encoding')->equals('MacRomanEncoding')
) {
$text = @iconv('Mac', 'UTF-8//TRANSLIT//IGNORE', $text);
} else {
$text = @iconv('Windows-1252', 'UTF-8//TRANSLIT//IGNORE', $text);
}
}
return $text;
}
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType0
*
* @package Smalot\PdfParser\Font
*/
class FontCIDFontType0 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType2
*
* @package Smalot\PdfParser\Font
*/
class FontCIDFontType2 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontTrueType
*
* @package Smalot\PdfParser\Font
*/
class FontTrueType extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType0
*
* @package Smalot\PdfParser\Font
*/
class FontType0 extends Font
{
}

View file

@ -0,0 +1,42 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType1
*
* @package Smalot\PdfParser\Font
*/
class FontType1 extends Font
{
}

205
lib/PdfParser/Header.php Normal file
View file

@ -0,0 +1,205 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Header
*
* @package Smalot\PdfParser
*/
class Header
{
/**
* @var Document
*/
protected $document = null;
/**
* @var Element[]
*/
protected $elements = null;
/**
* @param Element[] $elements List of elements.
* @param Document $document Document.
*/
public function __construct($elements = array(), Document $document = null)
{
$this->elements = $elements;
$this->document = $document;
}
/**
* Returns all elements.
*
* @return mixed
*/
public function getElements()
{
foreach ($this->elements as $name => $element) {
$this->resolveXRef($name);
}
return $this->elements;
}
/**
* Used only for debug.
*
* @return array
*/
public function getElementTypes()
{
$types = array();
foreach ($this->elements as $key => $element) {
$types[$key] = get_class($element);
}
return $types;
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
$values = array();
$elements = $this->getElements();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof Object && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element) {
$values[$key] = (string) $element;
}
}
return $values;
}
/**
* Indicate if an element name is available in header.
*
* @param string $name The name of the element
*
* @return bool
*/
public function has($name)
{
if (array_key_exists($name, $this->elements)) {
return true;
} else {
return false;
}
}
/**
* @param string $name
*
* @return Element|Object
*/
public function get($name)
{
if (array_key_exists($name, $this->elements)) {
return $this->resolveXRef($name);
}
return new ElementMissing(null, null);
}
/**
* Resolve XRef to object.
*
* @param string $name
*
* @return Element|Object
* @throws \Exception
*/
protected function resolveXRef($name)
{
if (($obj = $this->elements[$name]) instanceof ElementXRef && !is_null($this->document)) {
/** @var ElementXRef $obj */
$object = $this->document->getObjectById($obj->getId());
if (is_null($object)) {
return null;
}
// Update elements list for future calls.
$this->elements[$name] = $object;
}
return $this->elements[$name];
}
/**
* @param string $content The content to parse
* @param Document $document The document
* @param int $position The new position of the cursor after parsing
*
* @return Header
*/
public static function parse($content, Document $document, &$position = 0)
{
/** @var Header $header */
if (substr(trim($content), 0, 2) == '<<') {
$header = ElementStruct::parse($content, $document, $position);
} else {
$elements = ElementArray::parse($content, $document, $position);
if ($elements) {
$header = new self($elements->getRawContent(), null);//$document);
} else {
$header = new self(array(), $document);
}
}
if ($header) {
return $header;
} else {
// Build an empty header.
return new self(array(), $document);
}
}
}

781
lib/PdfParser/Object.php Normal file
View file

@ -0,0 +1,781 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\XObject\Form;
use Smalot\PdfParser\XObject\Image;
/**
* Class Object
*
* @package Smalot\PdfParser
*/
class Object
{
const TYPE = 't';
const OPERATOR = 'o';
const COMMAND = 'c';
/**
* The recursion stack.
*
* @var array
*/
static $recursionStack = array();
/**
* @var Document
*/
protected $document = null;
/**
* @var Header
*/
protected $header = null;
/**
* @var string
*/
protected $content = null;
/**
* @param Document $document
* @param Header $header
* @param string $content
*/
public function __construct(Document $document, Header $header = null, $content = null)
{
$this->document = $document;
$this->header = !is_null($header) ? $header : new Header();
$this->content = $content;
}
/**
*
*/
public function init()
{
}
/**
* @return null|Header
*/
public function getHeader()
{
return $this->header;
}
/**
* @param string $name
*
* @return Element|Object
*/
public function get($name)
{
return $this->header->get($name);
}
/**
* @param $name
*
* @return bool
*/
public function has($name)
{
return $this->header->has($name);
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
return $this->header->getDetails($deep);
}
/**
* @return null|string
*/
public function getContent()
{
return $this->content;
}
/**
* @param $content
*/
public function cleanContent($content, $char = 'X')
{
$char = $char[0];
$content = str_replace(array('\\\\', '\\)', '\\('), $char . $char, $content);
// Remove image bloc with binary content
preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $part) {
$content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0]));
}
// Clean content in square brackets [.....]
preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0]));
}
// Clean content in round brackets (.....)
preg_match_all('/\((.*?)\)/s', $content, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0]));
}
// Clean structure
if ($parts = preg_split('/(<|>)/s', $content, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE)) {
$content = '';
$level = 0;
foreach ($parts as $part) {
if ($part == '<') {
$level++;
}
$content .= ($level == 0 ? $part : str_repeat($char, strlen($part)));
if ($part == '>') {
$level--;
}
}
}
// Clean BDC and EMC markup
preg_match_all(
'/(\/[A-Za-z0-9\_]*\s*' . preg_quote($char) . '*BDC)/s',
$content,
$matches,
PREG_OFFSET_CAPTURE
);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0]));
}
preg_match_all('/\s(EMC)\s/s', $content, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, strlen($part[0])), $part[1], strlen($part[0]));
}
return $content;
}
/**
* @param $content
*
* @return array
*/
public function getSectionsText($content)
{
$sections = array();
$content = ' ' . $content . ' ';
$textCleaned = $this->cleanContent($content, '_');
// Extract text blocks.
if (preg_match_all('/\s+BT[\s|\(|\[]+(.*?)\s*ET/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
if ($text === '') {
continue;
}
$offset = $part[1];
$section = substr($content, $offset, strlen($text));
// Removes BDC and EMC markup.
$section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section . ' ');
$sections[] = $section;
}
}
// Extract 'do' commands.
if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
$offset = $part[1];
$section = substr($content, $offset, strlen($text));
$sections[] = $section;
}
}
return $sections;
}
/**
* @param Page
*
* @return string
* @throws \Exception
*/
public function getText(Page $page = null)
{
$text = '';
$sections = $this->getSectionsText($this->content);
$current_font = null;
foreach ($this->document->getObjects() as $obj) {
if ($obj instanceof Font) {
$current_font = $obj;
break;
}
}
if ($current_font === null) {
$current_font = new Font($this->document);
}
$current_position_td = array('x' => false, 'y' => false);
$current_position_tm = array('x' => false, 'y' => false);
array_push(self::$recursionStack, $this->getUniqueId());
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ((floatval($x) <= 0) ||
($current_position_td['y'] !== false && floatval($y) < floatval($current_position_td['y']))
) {
// vertical offset
$text .= "\n";
} elseif ($current_position_td['x'] !== false && floatval($x) > floatval(
$current_position_td['x']
)
) {
// horizontal offset
$text .= ' ';
}
$current_position_td = array('x' => $x, 'y' => $y);
break;
// move text current point and set leading
case 'TD':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (floatval($y) < 0) {
$text .= "\n";
} elseif (floatval($x) <= 0) {
$text .= ' ';
}
break;
case 'Tf':
list($id,) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
if (!is_null($page)) {
$current_font = $page->getFont($id);
}
break;
case "'":
case 'Tj':
$command[self::COMMAND] = array($command);
case 'TJ':
// Skip if not previously defined, should never happened.
if (is_null($current_font)) {
// Fallback
// TODO : Improve
$text .= $command[self::COMMAND][0][self::COMMAND];
continue;
}
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text .= $sub_text;
break;
// set leading
case 'TL':
$text .= ' ';
break;
case 'Tm':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ($current_position_tm['y'] !== false) {
$delta = abs(floatval($y) - floatval($current_position_tm['y']));
if ($delta > 10) {
$text .= "\n";
}
}
$current_position_tm = array('x' => $x, 'y' => $y);
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
$text .= "\n";
break;
// move to start of next line
case 'T*':
$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (!is_null($page)) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
$xobject = $page->getXObject($id);
if ( is_object($xobject) && !in_array($xobject->getUniqueId(), self::$recursionStack) ) {
// Not a circular reference.
$text .= $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
}
array_pop(self::$recursionStack);
return $text . ' ';
}
/**
* @param Page
*
* @return array
* @throws \Exception
*/
public function getTextArray(Page $page = null)
{
$text = array();
$sections = $this->getSectionsText($this->content);
$current_font = new Font($this->document);
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
break;
// move text current point and set leading
case 'TD':
break;
case 'Tf':
list($id,) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
$current_font = $page->getFont($id);
break;
case "'":
case 'Tj':
$command[self::COMMAND] = array($command);
case 'TJ':
// Skip if not previously defined, should never happened.
if (is_null($current_font)) {
// Fallback
// TODO : Improve
$text[] = $command[self::COMMAND][0][self::COMMAND];
continue;
}
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text[] = $sub_text;
break;
// set leading
case 'TL':
break;
case 'Tm':
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
//$text .= "\n";
break;
// move to start of next line
case 'T*':
//$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (!is_null($page)) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
if ($xobject = $page->getXObject($id)) {
$text[] = $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
}
return $text;
}
/**
* @param string $text_part
* @param int $offset
*
* @return array
*/
public function getCommandsText($text_part, &$offset = 0)
{
$commands = $matches = array();
while ($offset < strlen($text_part)) {
$offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
$char = $text_part[$offset];
$operator = '';
$type = '';
$command = false;
switch ($char) {
case '/':
$type = $char;
if (preg_match(
'/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += strlen($matches[0]);
} elseif (preg_match(
'/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += strlen($matches[0]);
}
break;
case '[':
case ']':
// array object
$type = $char;
if ($char == '[') {
++$offset;
// get elements
$command = $this->getCommandsText($text_part, $offset);
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += strlen($matches[0]);
}
} else {
++$offset;
break;
}
break;
case '<':
case '>':
// array object
$type = $char;
++$offset;
if ($char == '<') {
$strpos = strpos($text_part, '>', $offset);
$command = substr($text_part, $offset, ($strpos - $offset));
$offset = $strpos + 1;
}
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += strlen($matches[0]);
}
break;
case '(':
case ')':
++$offset;
$type = $char;
$strpos = $offset;
if ($char == '(') {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($text_part[$strpos])) {
break;
}
$ch = $text_part[$strpos];
switch ($ch) {
case '\\':
{ // REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
}
case '(':
{ // LEFT PARENHESIS (28h)
++$open_bracket;
break;
}
case ')':
{ // RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
}
++$strpos;
}
$command = substr($text_part, $offset, ($strpos - $offset - 1));
$offset = $strpos;
if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
$operator = $matches[1];
$offset += strlen($matches[0]);
}
}
break;
default:
if (substr($text_part, $offset, 2) == 'ET') {
break;
} elseif (preg_match(
'/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
$type = 'n';
$command = trim($matches[0]);
$offset += strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
$type = '';
$operator = $matches[1];
$command = '';
$offset += strlen($matches[0]);
}
}
if ($command !== false) {
$commands[] = array(
self::TYPE => $type,
self::OPERATOR => $operator,
self::COMMAND => $command,
);
} else {
break;
}
}
return $commands;
}
/**
* @param $document Document
* @param $header Header
* @param $content string
*
* @return Object
*/
public static function factory(Document $document, Header $header, $content)
{
switch ($header->get('Type')->getContent()) {
case 'XObject':
switch ($header->get('Subtype')->getContent()) {
case 'Image':
return new Image($document, $header, $content);
case 'Form':
return new Form($document, $header, $content);
default:
return new Object($document, $header, $content);
}
break;
case 'Pages':
return new Pages($document, $header, $content);
case 'Page':
return new Page($document, $header, $content);
case 'Encoding':
return new Encoding($document, $header, $content);
case 'Font':
$subtype = $header->get('Subtype')->getContent();
$classname = '\Smalot\PdfParser\Font\Font' . $subtype;
if (class_exists($classname)) {
return new $classname($document, $header, $content);
} else {
return new Font($document, $header, $content);
}
default:
return new Object($document, $header, $content);
}
}
/**
* Returns unique id identifying the object.
*
* @return string
*/
protected function getUniqueId()
{
return spl_object_hash($this);
}
}

273
lib/PdfParser/Page.php Normal file
View file

@ -0,0 +1,273 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementXRef;
use Smalot\PdfParser\Element\ElementNull;
/**
* Class Page
*
* @package Smalot\PdfParser
*/
class Page extends Object
{
/**
* @var Font[]
*/
protected $fonts = null;
/**
* @var Object[]
*/
protected $xobjects = null;
/**
* @return Font[]
*/
public function getFonts()
{
if (!is_null($this->fonts)) {
return $this->fonts;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('Font')) {
if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
}
$table = array();
foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ($id != '') {
$table[$id] = $font;
}
}
}
return ($this->fonts = $table);
} else {
return array();
}
}
/**
* @param string $id
*
* @return Font
*/
public function getFont($id)
{
$fonts = $this->getFonts();
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
return null;
}
}
}
/**
* Support for XObject
*
* @return Object[]
*/
public function getXObjects()
{
if (!is_null($this->xobjects)) {
return $this->xobjects;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('XObject')) {
if ($resources->get('XObject') instanceof Header) {
$xobjects = $resources->get('XObject')->getElements();
} else {
$xobjects = $resources->get('XObject')->getHeader()->getElements();
}
$table = array();
foreach ($xobjects as $id => $xobject) {
$table[$id] = $xobject;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ($id != '') {
$table[$id] = $xobject;
}
}
return ($this->xobjects = $table);
} else {
return array();
}
}
/**
* @param string $id
*
* @return Object
*/
public function getXObject($id)
{
$xobjects = $this->getXObjects();
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
/*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
}*/
}
}
/**
* @param Page
*
* @return string
*/
public function getText(Page $page = null)
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return '';
} elseif ($contents instanceof ElementNull) {
return '';
} elseif ($contents instanceof Object) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header(array(), $this->document);
$contents = new Object($this->document, $header, $new_content);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent() . "\n";
}
$header = new Header(array(), $this->document);
$contents = new Object($this->document, $header, $new_content);
}
return $contents->getText($this);
}
return '';
}
/**
* @param Page
*
* @return array
*/
public function getTextArray(Page $page = null)
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return array();
} elseif ($contents instanceof ElementNull) {
return array();
} elseif ($contents instanceof Object) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header(array(), $this->document);
$contents = new Object($this->document, $header, $new_content);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent() . "\n";
}
$header = new Header(array(), $this->document);
$contents = new Object($this->document, $header, $new_content);
}
return $contents->getTextArray($this);
}
return array();
}
}

70
lib/PdfParser/Pages.php Normal file
View file

@ -0,0 +1,70 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
/**
* Class Pages
*
* @package Smalot\PdfParser
*/
class Pages extends Object
{
/**
* @param bool $deep
*
* @return array
*/
public function getPages($deep = false)
{
if ($this->has('Kids')) {
if (!$deep) {
return $this->get('Kids')->getContent();
} else {
$kids = $this->get('Kids')->getContent();
$pages = array();
foreach ($kids as $kid) {
if ($kid instanceof Pages) {
$pages = array_merge($pages, $kid->getPages(true));
} else {
$pages[] = $kid;
}
}
return $pages;
}
}
return array();
}
}

314
lib/PdfParser/Parser.php Normal file
View file

@ -0,0 +1,314 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Parser
*
* @package Smalot\PdfParser
*/
class Parser
{
/**
* @var Object[]
*/
protected $objects = array();
/**
*
*/
public function __construct()
{
}
/**
* Parse PDF file
*
* @param string $filename
*
* @return Document
*/
public function parseFile($filename)
{
$content = file_get_contents($filename);
return @$this->parseContent($content);
}
/**
* Parse PDF content
*
* @param string $content
*
* @return Document
*/
public function parseContent($content)
{
// Create structure using TCPDF Parser.
ob_start();
@$parser = new \TCPDF_PARSER(ltrim($content));
list($xref, $data) = $parser->getParsedData();
unset($parser);
ob_end_clean();
if (isset($xref['trailer']['encrypt'])) {
throw new \Exception('Secured pdf file are currently not supported.');
}
if (empty($data)) {
throw new \Exception('Object list not found. Possible secured file.');
}
// Create destination object.
$document = new Document();
$this->objects = array();
foreach ($data as $id => $structure) {
$this->parseObject($id, $structure, $document);
unset($data[$id]);
}
$document->setTrailer($this->parseTrailer($xref['trailer'], $document));
$document->setObjects($this->objects);
return $document;
}
protected function parseTrailer($structure, $document)
{
$trailer = array();
foreach ($structure as $name => $values) {
$name = ucfirst($name);
if (is_numeric($values)) {
$trailer[$name] = new ElementNumeric($values, $document);
} elseif (is_array($values)) {
$value = $this->parseTrailer($values, null);
$trailer[$name] = new ElementArray($value, null);
} elseif (strpos($values, '_') !== false) {
$trailer[$name] = new ElementXRef($values, $document);
} else {
$trailer[$name] = $this->parseHeaderElement('(', $values, $document);
}
}
return new Header($trailer, $document);
}
/**
* @param string $id
* @param array $structure
* @param Document $document
*/
protected function parseObject($id, $structure, $document)
{
$header = new Header(array(), $document);
$content = '';
foreach ($structure as $position => $part) {
switch ($part[0]) {
case '[':
$elements = array();
foreach ($part[1] as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
$header = new Header($elements, $document);
break;
case '<<':
$header = $this->parseHeader($part[1], $document);
break;
case 'stream':
$content = isset($part[3][0]) ? $part[3][0] : $part[1];
if ($header->get('Type')->equals('ObjStm')) {
$match = array();
// Split xrefs and contents.
preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
$content = $match[3];
// Extract xrefs.
$xrefs = preg_split(
'/(\d+\s+\d+\s*)/s',
$match[1],
-1,
PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
);
$table = array();
foreach ($xrefs as $xref) {
list($id, $position) = explode(' ', trim($xref));
$table[$position] = $id;
}
ksort($table);
$ids = array_values($table);
$positions = array_keys($table);
foreach ($positions as $index => $position) {
$id = $ids[$index] . '_0';
$next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : strlen($content);
$sub_content = substr($content, $position, $next_position - $position);
$sub_header = Header::parse($sub_content, $document);
$object = Object::factory($document, $sub_header, '');
$this->objects[$id] = $object;
}
// It is not necessary to store this content.
$content = '';
return;
}
break;
default:
if ($part != 'null') {
$element = $this->parseHeaderElement($part[0], $part[1], $document);
if ($element) {
$header = new Header(array($element), $document);
}
}
break;
}
}
if (!isset($this->objects[$id])) {
$this->objects[$id] = Object::factory($document, $header, $content);
}
}
/**
* @param array $structure
* @param Document $document
*
* @return Header
* @throws \Exception
*/
protected function parseHeader($structure, $document)
{
$elements = array();
$count = count($structure);
for ($position = 0; $position < $count; $position += 2) {
$name = $structure[$position][1];
$type = $structure[$position + 1][0];
$value = $structure[$position + 1][1];
$elements[$name] = $this->parseHeaderElement($type, $value, $document);
}
return new Header($elements, $document);
}
/**
* @param $type
* @param $value
* @param $document
*
* @return Element|Header
* @throws \Exception
*/
protected function parseHeaderElement($type, $value, $document)
{
switch ($type) {
case '<<':
return $this->parseHeader($value, $document);
case 'numeric':
return new ElementNumeric($value, $document);
case 'boolean':
return new ElementBoolean($value, $document);
case 'null':
return new ElementNull($value, $document);
case '(':
if ($date = ElementDate::parse('(' . $value . ')', $document)) {
return $date;
} else {
return ElementString::parse('(' . $value . ')', $document);
}
case '<':
return $this->parseHeaderElement('(', ElementHexa::decode($value, $document), $document);
case '/':
return ElementName::parse('/' . $value, $document);
case 'ojbref': // old mistake in tcpdf parser
case 'objref':
return new ElementXRef($value, $document);
case '[':
$values = array();
foreach ($value as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
return new ElementArray($values, $document);
case 'endstream':
case 'obj': //I don't know what it means but got my project fixed.
case '':
// Nothing to do with.
break;
default:
throw new \Exception('Invalid type: "' . $type . '".');
}
}
}

View file

@ -0,0 +1,184 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Document
*
* @package Smalot\PdfParser\Tests\Units
*/
class Document extends atoum\test
{
public function testSetObjects()
{
$document = new \Smalot\PdfParser\Document();
$object = new \Smalot\PdfParser\Object($document);
// Obj #1 is missing
$this->assert->variable($document->getObjectById(1))->isNull();
$document->setObjects(array(1 => $object));
// Obj #1 exists
$this->assert->object($document->getObjectById(1))->isInstanceOf('\Smalot\PdfParser\Object');
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object = new \Smalot\PdfParser\Object($document, $header);
$document->setObjects(array(2 => $object));
// Obj #1 is missing
$this->assert->assert->variable($document->getObjectById(1))->isNull();
// Obj #2 exists
$this->assert->object($document->getObjectById(2))->isInstanceOf('\Smalot\PdfParser\Object');
}
public function testGetObjects()
{
$document = new \Smalot\PdfParser\Document();
$object1 = new \Smalot\PdfParser\Object($document);
$content = '<</Type/Page>>unparsed content';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array(1 => $object1, 2 => $object2));
$this->assert->integer(count($objects = $document->getObjects()))->isEqualTo(2);
$this->assert->object($objects[1])->isInstanceOf('\Smalot\PdfParser\Object');
$this->assert->object($objects[2])->isInstanceOf('\Smalot\PdfParser\Object');
$this->assert->object($objects[2])->isInstanceOf('\Smalot\PdfParser\Page');
}
public function testDictionary()
{
$document = new \Smalot\PdfParser\Document();
$this->assert->integer(count($objects = $document->getDictionary()))->isEqualTo(0);
$object1 = new \Smalot\PdfParser\Object($document);
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array(1 => $object1, 2 => $object2));
$this->assert->integer(count($objects = $document->getDictionary()))->isEqualTo(1);
$this->assert->integer(count($objects['Page']))->isEqualTo(1);
$this->assert->integer($objects['Page'][2])->isEqualTo(2);
}
public function testGetObjectsByType()
{
$document = new \Smalot\PdfParser\Document();
$object1 = new \Smalot\PdfParser\Object($document);
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array(1 => $object1, 2 => $object2));
$this->assert->integer(count($objects = $document->getObjectsByType('Page')))->isEqualTo(1);
$this->assert->object($objects[2])->isInstanceOf('\Smalot\PdfParser\Object');
$this->assert->object($objects[2])->isInstanceOf('\Smalot\PdfParser\Page');
}
public function testGetPages()
{
// Missing catalog
$document = new \Smalot\PdfParser\Document();
try {
$pages = $document->getPages();
$this->assert->boolean($pages)->isEqualTo(false);
} catch (\Exception $e) {
$this->assert->object($e)->isInstanceOf('\Exception');
}
// Listing pages from type Page
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object1 = new \Smalot\PdfParser\Page($document, $header);
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array(1 => $object1, 2 => $object2));
$pages = $document->getPages();
$this->assert->integer(count($pages))->isEqualTo(2);
$this->assert->object($pages[0])->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($pages[1])->isInstanceOf('\Smalot\PdfParser\Page');
// Listing pages from type Pages (kids)
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object1 = new \Smalot\PdfParser\Page($document, $header);
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object3 = new \Smalot\PdfParser\Page($document, $header);
$content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object4 = new \Smalot\PdfParser\Pages($document, $header);
$content = '<</Type/Pages/Kids[3 0 R]>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object5 = new \Smalot\PdfParser\Pages($document, $header);
$document->setObjects(
array('1_0' => $object1, '2_0' => $object2, '3_0' => $object3, '4_0' => $object4, '5_0' => $object5)
);
$pages = $document->getPages();
$this->assert->integer(count($pages))->isEqualTo(3);
$this->assert->object($pages[0])->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($pages[1])->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($pages[2])->isInstanceOf('\Smalot\PdfParser\Page');
// Listing pages from type Catalog
$content = '<</Type/Page>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object1 = new \Smalot\PdfParser\Page($document, $header);
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object2 = new \Smalot\PdfParser\Page($document, $header);
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object3 = new \Smalot\PdfParser\Page($document, $header);
$content = '<</Type/Pages/Kids[1 0 R 2 0 R]>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object4 = new \Smalot\PdfParser\Pages($document, $header);
$content = '<</Type/Pages/Kids[4 0 R 3 0 R]>>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object5 = new \Smalot\PdfParser\Pages($document, $header);
$content = '<</Type/Catalog/Pages 5 0 R >>';
$header = \Smalot\PdfParser\Header::parse($content, $document);
$object6 = new \Smalot\PdfParser\Pages($document, $header);
$document->setObjects(
array(
'1_0' => $object1,
'2_0' => $object2,
'3_0' => $object3,
'4_0' => $object4,
'5_0' => $object5,
'6_0' => $object6
)
);
$pages = $document->getPages();
$this->assert->integer(count($pages))->isEqualTo(3);
$this->assert->object($pages[0])->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($pages[1])->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($pages[2])->isInstanceOf('\Smalot\PdfParser\Page');
}
}

View file

@ -0,0 +1,154 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Element
*
* @package Smalot\PdfParser\Tests\Units
*/
class Element extends atoum\test
{
public function testParse()
{
$document = new \Smalot\PdfParser\Document(array());
// Only_values = false.
$content = '/NameType /FlateDecode
/Contents[4 0 R 42]/Fonts<</F1 41/F2 43>>/NullType
null/StringType(hello)/DateType(D:20130901235555+02\'00\')/XRefType 2 0 R
/NumericType 8/HexaType<0020>/BooleanType false';
$offset = 0;
$elements = \Smalot\PdfParser\Element::parse($content, $document, $offset, false);
$this->assert->array($elements)->hasKey('NameType');
$this->assert->object($elements['NameType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementName');
$this->assert->string($elements['NameType']->getContent())->isEqualTo('FlateDecode');
$this->assert->boolean(array_key_exists('Contents', $elements))->isEqualTo(true);
$this->assert->object($elements['Contents'])->isInstanceOf('\Smalot\PdfParser\Element\ElementArray');
$this->assert->boolean($elements['Contents']->contains(42))->isEqualTo(true);
$this->assert->boolean(array_key_exists('Fonts', $elements))->isEqualTo(true);
$this->assert->object($elements['Fonts'])->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->boolean(array_key_exists('NullType', $elements))->isEqualTo(true);
$this->assert->object($elements['NullType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementNull');
$this->assert->castToString($elements['NullType'])->isEqualTo('null');
$this->assert->boolean(array_key_exists('StringType', $elements))->isEqualTo(true);
$this->assert->object($elements['StringType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementString');
$this->assert->string($elements['StringType']->getContent())->isEqualTo('hello');
$this->assert->boolean(array_key_exists('DateType', $elements))->isEqualTo(true);
$this->assert->object($elements['DateType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementDate');
// $this->assert->castToString($elements['DateType'])->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->boolean(array_key_exists('XRefType', $elements))->isEqualTo(true);
$this->assert->object($elements['XRefType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementXRef');
$this->assert->string($elements['XRefType']->getId())->isEqualTo('2_0');
$this->assert->boolean(array_key_exists('NumericType', $elements))->isEqualTo(true);
$this->assert->object($elements['NumericType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementNumeric');
$this->assert->castToString($elements['NumericType'])->isEqualTo('8');
$this->assert->boolean(array_key_exists('HexaType', $elements))->isEqualTo(true);
$this->assert->object($elements['HexaType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementString');
$this->assert->string($elements['HexaType']->getContent())->isEqualTo(' ');
$this->assert->boolean(array_key_exists('BooleanType', $elements))->isEqualTo(true);
$this->assert->object($elements['BooleanType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementBoolean');
$this->assert->boolean($elements['BooleanType']->getContent())->isEqualTo(false);
// Only_values = true.
$content = '/NameType /FlateDecode';
$offset = 0;
$elements = \Smalot\PdfParser\Element::parse($content, $document, $offset, true);
$this->assert->array($elements)->hasSize(2);
$this->assert->integer($offset)->isEqualTo(22);
// Test error.
$content = '/NameType /FlateDecode $$$';
$offset = 0;
$elements = \Smalot\PdfParser\Element::parse($content, $document, $offset, false);
$this->assert->array($elements)->hasSize(1);
$this->assert->integer($offset)->isEqualTo(22);
$this->assert->string(key($elements))->isEqualTo('NameType');
$this->assert->object(current($elements))->isInstanceOf('\Smalot\PdfParser\Element\ElementName');
$content = '/NameType $$$';
$offset = 0;
$elements = \Smalot\PdfParser\Element::parse($content, $document, $offset, false);
$this->assert->integer($offset)->isEqualTo(0);
$this->assert->array($elements)->isEmpty();
/*$this->assert->boolean(array_key_exists('NameType', $elements))->isEqualTo(true);
$this->assert->boolean($elements['NameType'])->isInstanceOf('\Smalot\PdfParser\Element\ElementName)->isEqualTo(true);
$this->assert->string($elements['NameType']->getContent())->isEqualTo('FlateDecode');*/
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element(42);
$content = $element->getContent();
$this->assert->integer($content)->isEqualTo(42);
$element = new \Smalot\PdfParser\Element(array(4, 2));
$content = $element->getContent();
$this->assert->array($content)->hasSize(2);
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element(2);
$this->assert->boolean($element->equals(2))->isEqualTo(true);
$this->assert->boolean($element->equals(8))->isEqualTo(false);
}
public function testContains()
{
$val_4 = new \Smalot\PdfParser\Element(4);
$val_2 = new \Smalot\PdfParser\Element(2);
$element = new \Smalot\PdfParser\Element(array($val_4, $val_2));
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element(2);
$this->assert->castToString($element)->isEqualTo('2');
}
}

View file

@ -0,0 +1,189 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
/**
* Class ElementArray
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementArray extends atoum\test
{
public function testParse()
{
$document = new \Smalot\PdfParser\Document(array());
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse('ABC', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(' / [ 4 2 ] ', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(' 0 [ 4 2 ] ', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(" 0 \n [ 4 2 ] ", $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(' [ 4 2 ] ', $document, $offset);
$this->assert->boolean($element->contains(4))->isEqualTo(true);
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(8);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(' [ 4 2 ]', $document, $offset);
$this->assert->boolean($element->contains(4))->isEqualTo(true);
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(8);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse('[ 4 2 ]', $document, $offset);
$this->assert->boolean($element->contains(4))->isEqualTo(true);
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(7);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementArray::parse(" \n [ 4 2 ] ", $document, $offset);
$this->assert->boolean($element->contains(4))->isEqualTo(true);
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(10);
}
public function testGetContent()
{
$val_4 = new \Smalot\PdfParser\Element\ElementNumeric('4');
$val_2 = new \Smalot\PdfParser\Element\ElementNumeric('2');
$element = new \Smalot\PdfParser\Element\ElementArray(array($val_4, $val_2));
$content = $element->getContent();
$this->assert->array($content)->hasSize(2);
}
public function testContains()
{
$val_4 = new \Smalot\PdfParser\Element\ElementNumeric('4');
$val_2 = new \Smalot\PdfParser\Element\ElementNumeric('2');
$element = new \Smalot\PdfParser\Element\ElementArray(array($val_4, $val_2));
$this->assert->boolean($element->contains(2))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
}
public function testResolveXRef()
{
// Document with text.
$filename = __DIR__ . '/../../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$object = $document->getObjectById('3_0');
$kids = $object->get('Kids');
$this->assert->object($kids)->isInstanceOf('\Smalot\PdfParser\Element\ElementArray');
$this->assert->array($kids->getContent())->hasSize(1);
$pages = $kids->getContent();
$this->assert->object(reset($pages))->isInstanceOf('\Smalot\PdfParser\Page');
}
public function testGetDetails()
{
// // Document with text.
// $filename = __DIR__ . '/../../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
// $parser = new \Smalot\PdfParser\Parser();
// $document = $parser->parseFile($filename);
// $object = $document->getObjectById('3_0');
// /** @var \Smalot\PdfParser\Element\ElementArray $kids */
// $kids = $object->get('Kids');
// $details = $kids->getDetails();
//
// $this->assert->array($details)->hasSize(1);
// $this->assert->string($details[0]['Type'])->isEqualTo('Page');
$document = new Document();
$content = '<</Type/Page/Types[8]/Sizes[1 2 3 4 5 <</Subtype/XObject>> [8 [9 <</FontSize 10>>]]]>>';
$details_reference = array(
'Type' => 'Page',
'Types' => array(
8,
),
'Sizes' => array(
1,
2,
3,
4,
5,
array(
'Subtype' => 'XObject',
),
array(
8,
array(
9,
array(
'FontSize' => 10,
),
),
),
),
);
$header = Header::parse($content, $document);
$details = $header->getDetails();
$this->assert->array($details)->hasSize(3);
$this->assert->array($details)->isEqualTo($details_reference);
}
public function test__toString()
{
$val_4 = new \Smalot\PdfParser\Element\ElementNumeric('4');
$val_2 = new \Smalot\PdfParser\Element\ElementNumeric('2');
$element = new \Smalot\PdfParser\Element\ElementArray(array($val_4, $val_2));
$this->assert->castToString($element)->isEqualTo('4,2');
$document = new \Smalot\PdfParser\Document(array());
$element = \Smalot\PdfParser\Element\ElementArray::parse(' [ 4 2 ]', $document);
$this->assert->castToString($element)->isEqualTo('4,2');
}
}

View file

@ -0,0 +1,135 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementBoolean
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementBoolean extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' [ false ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' << true >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' / false ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' 0 true ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(" 0 \n true ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' true ', null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' TRUE ', null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(' True', null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse('true', null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(4);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse('False', null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementBoolean::parse(" \n true ", null, $offset);
$this->assert->boolean($element->getContent())->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(7);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementBoolean('true');
$this->assert->boolean($element->getContent())->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementBoolean('false');
$this->assert->boolean($element->getContent())->isEqualTo(false);
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementBoolean('true');
$this->assert->boolean($element->equals(true))->isEqualTo(true);
$this->assert->boolean($element->equals(1))->isEqualTo(false);
$this->assert->boolean($element->equals(false))->isEqualTo(false);
$this->assert->boolean($element->equals(null))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementBoolean('false');
$this->assert->boolean($element->equals(false))->isEqualTo(true);
$this->assert->boolean($element->equals(0))->isEqualTo(false);
$this->assert->boolean($element->equals(true))->isEqualTo(false);
$this->assert->boolean($element->equals(null))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementBoolean('true');
$this->assert->boolean($element->contains(true))->isEqualTo(true);
$this->assert->boolean($element->contains(false))->isEqualTo(false);
$this->assert->boolean($element->contains(1))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementBoolean('true');
$this->assert->castToString($element)->isEqualTo('true');
$element = new \Smalot\PdfParser\Element\ElementBoolean('false');
$this->assert->castToString($element)->isEqualTo('false');
}
}

View file

@ -0,0 +1,164 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementDate
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementDate extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' [ (ABC) 5 6 ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' << (invalid) >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' / (FlateDecode) ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' 0 (FlateDecode) ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(" 0 \n (FlateDecode) ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' (D:20130901235555+02\'00\') ', null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->integer($offset)->isEqualTo(26);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' (D:20130901235555+02\'00\') ', null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->integer($offset)->isEqualTo(26);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(' (D:20130901235555+02\'00\')', null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->integer($offset)->isEqualTo(26);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse('(D:20130901235555+02\'00\')', null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->integer($offset)->isEqualTo(25);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(" \n (D:20130901235555+02'00') ", null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
$this->assert->integer($offset)->isEqualTo(28);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(" \n (D:20130901235555) ", null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->boolean($element->equals(new \DateTime('2013-09-01T23:55:55')))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(21);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse("(D:20131206091846Z00'00')", null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
//$this->assert->boolean($element->equals(new \DateTime('2013-09-01T23:55:55')))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(25);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(" \n (D:1-23-2014, 19:02:15-03'00') ", null, $offset);
$element->setFormat('c');
$this->assert->object($element->getContent())->isInstanceOf('\DateTime');
$this->assert->castToString($element)->isEqualTo('2014-01-23T19:02:15-03:00');
$this->assert->integer($offset)->isEqualTo(33);
// Format invalid
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementDate::parse(" \n (D:2013+02'00') ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementDate(new \DateTime('2013-09-01 23:55:55+02:00'));
$this->assert->dateTime($element->getContent())->isEqualTo(new \DateTime('2013-09-01 21:55:55+00:00'));
try {
$element = new \Smalot\PdfParser\Element\ElementDate('2013-09-01 23:55:55+02:00');
$this->assert->boolean(false)->isEqualTo(true);
} catch (\Exception $e) {
$this->assert->exception($e)->hasMessage('DateTime required.');
}
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementDate(new \DateTime('2013-09-01 23:55:55+02:00'));
$element->setFormat('c');
$this->assert->boolean($element->equals('2013-09-01T23:55:55+02:00'))->isEqualTo(true);
$this->assert->boolean($element->equals('2013-09-01T23:55:55+01:00'))->isEqualTo(false);
$this->assert->boolean($element->equals(new \DateTime('2013-09-01T21:55:55+00:00')))->isEqualTo(true);
$this->assert->boolean($element->equals(new \DateTime('2013-09-01T23:55:55+01:00')))->isEqualTo(false);
$this->assert->boolean($element->equals('ABC'))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementDate(new \DateTime('2013-09-01 23:55:55+02:00'));
$this->assert->boolean($element->contains('2013-09-01T21:55:55+00:00'))->isEqualTo(true);
$this->assert->boolean($element->contains('2013-06-15'))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementDate(new \DateTime('2013-09-01 23:55:55+02:00'));
$element->setFormat('c');
$this->assert->castToString($element)->isEqualTo('2013-09-01T23:55:55+02:00');
}
}

View file

@ -0,0 +1,106 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementHexa
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementHexa extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' [ <0020> 5 6 ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' << <0020> >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' / <0020> ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' 0 <0020> ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(" 0 \n <0020> ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' <0020> ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(7);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' <0020> ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(7);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(' <0020>', null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(7);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse('<0020>', null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(" \n <0020> ", null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(9);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(" \n <5465616d204d616e6167656d656e742053797374656d73> ", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Team Management Systems');
$this->assert->integer($offset)->isEqualTo(51);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(" \n <5265706f72744275696c646572> ", null, $offset);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Element\ElementString');
$this->assert->string($element->getContent())->isEqualTo('ReportBuilder');
$this->assert->integer($offset)->isEqualTo(31);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementHexa::parse(" \n <443a3230313331323137313334303435303027303027> ", null, $offset);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Element\ElementDate');
$this->assert->castToString($element)->isEqualTo('2013-12-17T13:40:45+00:00');
$this->assert->integer($offset)->isEqualTo(49);
}
}

View file

@ -0,0 +1,71 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementMissing
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementMissing extends atoum\test
{
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementMissing(null);
$this->assert->boolean($element->equals(null))->isEqualTo(false);
$this->assert->boolean($element->equals(true))->isEqualTo(false);
$this->assert->boolean($element->equals('A'))->isEqualTo(false);
$this->assert->boolean($element->equals(false))->isEqualTo(false);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementMissing(null);
$this->assert->boolean($element->getContent())->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementMissing(null);
$this->assert->boolean($element->contains(null))->isEqualTo(false);
$this->assert->boolean($element->contains(true))->isEqualTo(false);
$this->assert->boolean($element->contains('A'))->isEqualTo(false);
$this->assert->boolean($element->contains(false))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementMissing(null);
$this->assert->castToString($element)->isEqualTo('');
}
}

View file

@ -0,0 +1,157 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementName
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementName extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' [ /ABC 5 6 ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' << invalid >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' / FlateDecode ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' 0 /FlateDecode ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(" 0 \n /FlateDecode ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' /FlateDecode ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('FlateDecode');
$this->assert->integer($offset)->isEqualTo(13);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(' /FlateDecode', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('FlateDecode');
$this->assert->integer($offset)->isEqualTo(13);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/FlateDecode', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('FlateDecode');
$this->assert->integer($offset)->isEqualTo(12);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse(" \n /FlateDecode ", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('FlateDecode');
$this->assert->integer($offset)->isEqualTo(15);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/FlateDecode2', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('FlateDecode2');
$this->assert->integer($offset)->isEqualTo(13);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/Flate-Decode2', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Flate-Decode2');
$this->assert->integer($offset)->isEqualTo(14);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/OJHCYD+Cambria', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('OJHCYD+Cambria');
$this->assert->integer($offset)->isEqualTo(15);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/OJHCYD+Cambria,Bold', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('OJHCYD+Cambria,Bold');
$this->assert->integer($offset)->isEqualTo(20);
//
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/Flate_Decode2', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Flate');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementName::parse('/Flate.Decode2', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Flate.Decode2');
$this->assert->integer($offset)->isEqualTo(14);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode');
$this->assert->string($element->getContent())->isEqualTo('FlateDecode');
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode');
$this->assert->boolean($element->equals('FlateDecode'))->isEqualTo(true);
$this->assert->boolean($element->equals('Flatedecode'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode2');
$this->assert->boolean($element->equals('FlateDecode2'))->isEqualTo(true);
$this->assert->boolean($element->equals('FlateDecode3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementName('Flate-Decode2');
$this->assert->boolean($element->equals('Flate-Decode2'))->isEqualTo(true);
$this->assert->boolean($element->equals('Flate-Decode3'))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode');
$this->assert->boolean($element->contains('FlateDecode'))->isEqualTo(true);
$this->assert->boolean($element->contains('Flatedecode'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode2');
$this->assert->boolean($element->contains('FlateDecode2'))->isEqualTo(true);
$this->assert->boolean($element->contains('FlateDecode3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementName('Flate-Decode2');
$this->assert->boolean($element->contains('Flate-Decode2'))->isEqualTo(true);
$this->assert->boolean($element->contains('Flate-Decode3'))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementName('FlateDecode');
$this->assert->castToString($element)->isEqualTo('FlateDecode');
}
}

View file

@ -0,0 +1,121 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementNull
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementNull extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' [ null ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' << null >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' / null ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' 0 null ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(" 0 \n null ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' null ', null, $offset);
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' null ', null, $offset);
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(' null', null, $offset);
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse('null', null, $offset);
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(4);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNull::parse(" \n null ", null, $offset);
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
$this->assert->integer($offset)->isEqualTo(7);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementNull('null');
$this->assert->boolean(is_null($element->getContent()))->isEqualTo(true);
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementNull('null');
$this->assert->boolean($element->equals(null))->isEqualTo(true);
$this->assert->boolean($element->equals(false))->isEqualTo(false);
$this->assert->boolean($element->equals(0))->isEqualTo(false);
$this->assert->boolean($element->equals(1))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementNull('null');
$this->assert->boolean($element->contains(null))->isEqualTo(true);
$this->assert->boolean($element->contains(false))->isEqualTo(false);
$this->assert->boolean($element->contains(0))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementNull('null');
$this->assert->castToString($element)->isEqualTo('null');
}
}

View file

@ -0,0 +1,184 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementNumeric
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementNumeric extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' [ 2 ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' /2', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(" /2 \n 2", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' -2', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$this->assert->integer($offset)->isEqualTo(3);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse('2BC', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(2.0);
$this->assert->integer($offset)->isEqualTo(1);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' 2BC', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(2.0);
$this->assert->integer($offset)->isEqualTo(2);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' -2BC', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$this->assert->integer($offset)->isEqualTo(3);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' -2', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$this->assert->integer($offset)->isEqualTo(3);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(' 2 0 obj', null, $offset);
$this->assert->float($element->getContent())->isEqualTo(2.0);
$this->assert->integer($offset)->isEqualTo(2);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementNumeric::parse(" \n -2 ", null, $offset);
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$this->assert->integer($offset)->isEqualTo(5);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementNumeric('B');
$this->assert->float($element->getContent())->isEqualTo(0.0);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->float($element->getContent())->isEqualTo(-2.5);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$element = new \Smalot\PdfParser\Element\ElementNumeric(' -2');
$this->assert->float($element->getContent())->isEqualTo(-2.0);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->float($element->getContent())->isEqualTo(2.5);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->float($element->getContent())->isEqualTo(2.0);
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementNumeric('1');
$this->assert->boolean($element->equals('B'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('1.5');
$this->assert->boolean($element->equals('B'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->boolean($element->equals('2'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->boolean($element->equals('3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->boolean($element->equals('-2'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->boolean($element->equals('-3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->boolean($element->equals('2.5'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->boolean($element->equals('3.5'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->boolean($element->equals('-2.5'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->boolean($element->equals('-3.5'))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementNumeric('1');
$this->assert->boolean($element->contains('B'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('1.5');
$this->assert->boolean($element->contains('B'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->boolean($element->contains('2'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->boolean($element->contains('3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->boolean($element->contains('-2'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->boolean($element->contains('-3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->boolean($element->contains('2.5'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->boolean($element->contains('3.5'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->boolean($element->contains('-2.5'))->isEqualTo(true);
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->boolean($element->contains('-3.5'))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementNumeric('B');
$this->assert->castToString($element)->isEqualTo('0');
$element = new \Smalot\PdfParser\Element\ElementNumeric('1B');
$this->assert->castToString($element)->isEqualTo('1');
$element = new \Smalot\PdfParser\Element\ElementNumeric('2');
$this->assert->castToString($element)->isEqualTo('2');
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2');
$this->assert->castToString($element)->isEqualTo('-2');
$element = new \Smalot\PdfParser\Element\ElementNumeric('2.5');
$this->assert->castToString($element)->isEqualTo('2.5');
$element = new \Smalot\PdfParser\Element\ElementNumeric('-2.5');
$this->assert->castToString($element)->isEqualTo('-2.5');
}
}

View file

@ -0,0 +1,156 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementString
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementString extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' [ (ABC) 5 6 ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' << (invalid) >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' / (FlateDecode) ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' 0 (FlateDecode) ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(" 0 \n (FlateDecode) ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' (Copyright) ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copyright');
$this->assert->integer($offset)->isEqualTo(12);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' (Copyright) ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copyright');
$this->assert->integer($offset)->isEqualTo(12);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(' (Copyright)', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copyright');
$this->assert->integer($offset)->isEqualTo(12);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse('(Copyright)', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copyright');
$this->assert->integer($offset)->isEqualTo(11);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse('(Copy-right2)', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copy-right2');
$this->assert->integer($offset)->isEqualTo(13);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse(" \n (Copyright) ", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Copyright');
$this->assert->integer($offset)->isEqualTo(14);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse('()', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('');
$this->assert->integer($offset)->isEqualTo(2);
// Complex study case : Unicode + octal.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse("(ABC\\))", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('ABC)');
$this->assert->integer($offset)->isEqualTo(7);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse("(\xFE\xFF\\000M)", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('M');
$this->assert->integer($offset)->isEqualTo(9);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse("(<20>)", null, $offset);
$this->assert->string($element->getContent())->isEqualTo(' ');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementString::parse("(Gutter\\ console\\ assembly)", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('Gutter console assembly');
$this->assert->integer($offset)->isEqualTo(27);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementString('Copyright');
$this->assert->string($element->getContent())->isEqualTo('Copyright');
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementString('CopyRight');
$this->assert->boolean($element->equals('CopyRight'))->isEqualTo(true);
$this->assert->boolean($element->equals('Flatedecode'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementString('CopyRight2');
$this->assert->boolean($element->equals('CopyRight2'))->isEqualTo(true);
$this->assert->boolean($element->equals('CopyRight3'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementString('Flate-Decode2');
$this->assert->boolean($element->equals('Flate-Decode2'))->isEqualTo(true);
$this->assert->boolean($element->equals('Flate-Decode3'))->isEqualTo(false);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementString('CopyRight');
$this->assert->boolean($element->contains('CopyRight'))->isEqualTo(true);
$this->assert->boolean($element->contains('Copyright'))->isEqualTo(false);
$element = new \Smalot\PdfParser\Element\ElementString('CopyRight2');
$this->assert->boolean($element->contains('CopyRight2'))->isEqualTo(true);
$this->assert->boolean($element->contains('CopyRight3'))->isEqualTo(false);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementString('CopyRight');
$this->assert->castToString($element)->isEqualTo('CopyRight');
}
}

View file

@ -0,0 +1,98 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementStruct
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementStruct extends atoum\test
{
public function testParse()
{
$document = new \Smalot\PdfParser\Document(array());
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse('ABC', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(
' [ << /Filter /FlateDecode >> ]',
$document,
$offset
);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(' / << /Filter /FlateDecode >> ', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(' 0 << /Filter /FlateDecode >> ', $document, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(
" 0 \n << /Filter /FlateDecode >> ",
$document,
$offset
);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(' << /Filter /FlateDecode >> ', $document, $offset);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($offset)->isEqualTo(27);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(' << /Filter /FlateDecode >>', $document, $offset);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($offset)->isEqualTo(27);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse('<< /Filter /FlateDecode >>', $document, $offset);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($offset)->isEqualTo(26);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementStruct::parse(
" \n << /Filter /FlateDecode >> ",
$document,
$offset
);
$this->assert->object($element)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($offset)->isEqualTo(29);
}
}

View file

@ -0,0 +1,126 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units\Element;
use mageekguy\atoum;
/**
* Class ElementXRef
*
* @package Smalot\PdfParser\Tests\Units\Element
*/
class ElementXRef extends atoum\test
{
public function testParse()
{
// Skipped.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse('ABC', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' [ 5 0 R ]', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' << 5 0 R >>', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' / 5 0 R ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' 0 5 0 R ', null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(" 0 \n 5 0 R ", null, $offset);
$this->assert->boolean($element)->isEqualTo(false);
$this->assert->integer($offset)->isEqualTo(0);
// Valid.
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' 5 0 R ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('5_0');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' 5 0 R ', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('5_0');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(' 5 0 R', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('5_0');
$this->assert->integer($offset)->isEqualTo(6);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse('5 0 R', null, $offset);
$this->assert->string($element->getContent())->isEqualTo('5_0');
$this->assert->integer($offset)->isEqualTo(5);
$offset = 0;
$element = \Smalot\PdfParser\Element\ElementXRef::parse(" \n 5 0 R ", null, $offset);
$this->assert->string($element->getContent())->isEqualTo('5_0');
$this->assert->integer($offset)->isEqualTo(8);
}
public function testGetContent()
{
$element = new \Smalot\PdfParser\Element\ElementXRef('5_0');
$this->assert->string($element->getContent())->isEqualTo('5_0');
}
public function testGetId()
{
$element = new \Smalot\PdfParser\Element\ElementXRef('5_0');
$this->assert->string($element->getId())->isEqualTo('5_0');
}
public function testEquals()
{
$element = new \Smalot\PdfParser\Element\ElementXRef('5_0');
$this->assert->boolean($element->equals(5))->isEqualTo(true);
$this->assert->boolean($element->equals(8))->isEqualTo(false);
$this->assert->boolean($element->equals($element))->isEqualTo(true);
}
public function testContains()
{
$element = new \Smalot\PdfParser\Element\ElementXRef('5_0');
$this->assert->boolean($element->contains(5))->isEqualTo(true);
$this->assert->boolean($element->contains(8))->isEqualTo(false);
$this->assert->boolean($element->contains($element))->isEqualTo(true);
}
public function test__toString()
{
$element = new \Smalot\PdfParser\Element\ElementXRef('5_0');
$this->assert->castToString($element)->isEqualTo('#Obj#5_0');
}
}

View file

@ -0,0 +1,322 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
use Smalot\PdfParser\Header;
/**
* Class Font
*
* @package Smalot\PdfParser\Tests\Units
*/
class Font extends atoum\test
{
public function testGetName()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$this->assert->string($font->getName())->isEqualTo('OJHCYD+Cambria,Bold');
}
public function testGetType()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$this->assert->string($font->getType())->isEqualTo('TrueType');
}
public function testGetDetails()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$reference = array(
'Name' => 'OJHCYD+Cambria,Bold',
'Type' => 'TrueType',
'Encoding' => 'Ansi',
'BaseFont' => 'OJHCYD+Cambria,Bold',
'FontDescriptor' =>
array(
'Type' => 'FontDescriptor',
'FontName' => 'OJHCYD+Cambria,Bold',
'Flags' => 4,
'Ascent' => 699,
'CapHeight' => 699,
'Descent' => -7,
'ItalicAngle' => 0,
'StemV' => 128,
'MissingWidth' => 658,
),
'ToUnicode' =>
array(
'Filter' => 'FlateDecode',
'Length' => 219,
),
'FirstChar' => 1,
'LastChar' => 11,
'Widths' =>
array(
0 => 705,
1 => 569,
2 => 469,
3 => 597,
4 => 890,
5 => 531,
6 => 604,
7 => 365,
8 => 220,
9 => 314,
10 => 308,
),
'Subtype' => 'TrueType',
);
$this->assert->array($font->getDetails())->isEqualTo($reference);
}
public function testTranslateChar()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
$font = reset($fonts);
$this->assert->string($font->translateChar("\x01"))->isEqualTo('D');
$this->assert->string($font->translateChar("\x02"))->isEqualTo('o');
$this->assert->string($font->translateChar("\x03"))->isEqualTo('c');
$this->assert->string($font->translateChar("\x04"))->isEqualTo('u');
$this->assert->string($font->translateChar("\x99"))->isEqualTo(\Smalot\PdfParser\Font::MISSING);
}
public function testLoadTranslateTable()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Font /Subtype /Type0 /ToUnicode 2 0 R>>';
$header = Header::parse($content, $document);
$font = new \Smalot\PdfParser\Font($document, $header);
$content = '/CIDInit /ProcSet findresource begin
14 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS)
/Supplement 0
>> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0003> <0020>
<000F> <002C>
<0011> <002E>
endbfchar
2 beginbfrange
<0013> <0016> <0030>
<0018> <001C> <0035>
endbfrange
7 beginbfchar
<0023> <0040>
<0026> <0043>
<0028> <0045>
<0030> <004D>
<0033> <0050>
<0035> <0052>
<0039> <0056>
endbfchar
4 beginbfrange
<0044> <004C> <0061>
<004F> <0052> <006C>
<0054> <0059> <0071>
<005B> <005C> <0078>
endbfrange
4 beginbfchar
<0070> <00E9>
<00AB> <2026>
<00B0> <0153>
<00B6> <2019>
endbfchar
1 beginbfrange
<0084> <0086> [<0061> <0071> <0081>]
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end';
$unicode = new \Smalot\PdfParser\Object($document, null, $content);
$document->setObjects(array('1_0' => $font, '2_0' => $unicode));
$font->init();
// Test reload
$table = $font->loadTranslateTable();
$this->assert->array($table)->hasSize(47);
// Test chars
$this->assert->string($table[3])->isEqualTo(' ');
$this->assert->string($table[15])->isEqualTo(',');
$this->assert->string($table[17])->isEqualTo('.');
$this->assert->string($table[35])->isEqualTo('@');
$this->assert->string($table[57])->isEqualTo('V');
// Test ranges
$this->assert->string($table[85])->isEqualTo('r');
$this->assert->string($table[92])->isEqualTo('y');
}
public function testDecodeHexadecimal()
{
$hexa = '<322041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("2 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("2 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(2 A)");
$hexa = '<003200200041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("\x002\x00 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(\x002\x00 \x00A)");
$hexa = '<00320020> 8 <0041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 8 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("\x002\x00 8 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo(
"(\x002\x00 ) 8 (\x00A)"
);
$hexa = '<3220> 8 <41>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("2 8 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("2 8 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(2 ) 8 (A)");
$hexa = '<00320020005C>-10<0041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 \x00\\-10\x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo(
"\x002\x00 \x00\\-10\x00A"
);
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo(
"(\x002\x00 \x00\\\\)-10(\x00A)"
);
}
public function testDecodeOctal()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeOctal("\\101\\102\\040\\103"))->isEqualTo('AB C');
$this->assert->string(\Smalot\PdfParser\Font::decodeOctal("\\101\\102\\040\\103D"))->isEqualTo('AB CD');
}
public function testDecodeEntities()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeEntities("File#20Type"))->isEqualTo('File Type');
$this->assert->string(\Smalot\PdfParser\Font::decodeEntities("File##20Ty#pe"))->isEqualTo('File# Ty#pe');
}
public function testDecodeUnicode()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeUnicode("\xFE\xFF\x00A\x00B"))->isEqualTo('AB');
}
public function testDecodeText()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
// Cambria
$font = reset($fonts);
$commands = array(
array(
't' => '',
'c' => "\x01\x02",
),
array(
't' => 'n',
'c' => -10,
),
array(
't' => '',
'c' => "\x03",
),
array(
't' => '',
'c' => "\x04",
),
array(
't' => 'n',
'c' => -100,
),
array(
't' => '<',
'c' => "01020304",
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('Docu Docu');
//Check if ANSI/Unicode detection is working properly
$filename = __DIR__ . '/../../../../../samples/bugs/Issue95_ANSI.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
$font = reset($fonts);
$commands = array(
array(
't' => '<',
'c' => "E6F6FC", //ANSI encoded string
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('æöü');
$commands = array(
array(
't' => '<',
'c' => "C3A6C3B6C3BC", //Unicode encoded string
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('æöü');
}
}

View file

@ -0,0 +1,145 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Header
*
* @package Smalot\PdfParser\Tests\Units
*/
class Header extends atoum\test
{
public function testParse()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Page/SubType/Text>>foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->object($header)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($position)->isEqualTo(27);
$this->assert->array($header->getElements())->hasSize(2);
// No header to parse
$this->assert->castToString($header->get('Type'))->isEqualTo('Page');
$content = 'foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->object($header)->isInstanceOf('\Smalot\PdfParser\Header');
$this->assert->integer($position)->isEqualTo(0);
$this->assert->array($header->getElements())->hasSize(0);
$position = 0;
$content = "<</CreationDate(D:20100309184803+01'00')/Author(Utilisateur)/Creator(PScript5.dll Version 5.2.2)/Producer(Acrobat Distiller 7.0.5 \(Windows\))/ModDate(D:20100310104810+01'00')/Title(Microsoft Word - CLEMI.docx)>>";
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->integer($position)->isEqualTo(212);
$position = 0;
$content = '[5 0 R ] foo';
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->integer($position)->isEqualTo(8);
$this->assert->array($header->getElements())->hasSize(1);
}
public function testGetElements()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Page/Subtype/Text>>foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->array($elements = $header->getElements())->hasSize(2);
$this->assert->object(current($elements))->isInstanceOf('\Smalot\PdfParser\Element\ElementName');
$types = $header->getElementTypes();
$this->assert->array($types);
$this->assert->string($types['Type'])->isEqualTo('Smalot\PdfParser\Element\ElementName');
$this->assert->string($types['Subtype'])->isEqualTo('Smalot\PdfParser\Element\ElementName');
}
public function testHas()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Page/SubType/Text/Font 5 0 R>>foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$this->assert->boolean($header->has('Type'))->isEqualTo(true);
$this->assert->boolean($header->has('SubType'))->isEqualTo(true);
$this->assert->boolean($header->has('Font'))->isEqualTo(true);
$this->assert->boolean($header->has('Text'))->isEqualTo(false);
}
public function testGet()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Page/SubType/Text/Font 5 0 R/Resources 8 0 R>>foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$object = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array('5_0' => $object));
$this->assert->object($header->get('Type'))->isInstanceOf('\Smalot\PdfParser\Element\ElementName');
$this->assert->object($header->get('SubType'))->isInstanceOf('\Smalot\PdfParser\Element\ElementName');
$this->assert->object($header->get('Font'))->isInstanceOf('\Smalot\PdfParser\Page');
$this->assert->object($header->get('Image'))->isInstanceOf('\Smalot\PdfParser\Element\ElementMissing');
$resources = $header->get('Resources');
$this->assert->variable($resources)->isNull();
}
public function testResolveXRef()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Page/SubType/Text/Font 5 0 R/Resources 8 0 R>>foo';
$position = 0;
$header = \Smalot\PdfParser\Header::parse($content, $document, $position);
$object = new \Smalot\PdfParser\Page($document, $header);
$document->setObjects(array('5_0' => $object));
$this->assert->object($header->get('Font'))->isInstanceOf('\Smalot\PdfParser\Object');
$header=$header->get('Resources');
try {
$this->assert->variable($header)->isInstanceOf('\Smalot\PdfParser\Element\ElementMissing');
$this->assert->boolean(true)->isEqualTo(false);
} catch (\Exception $e) {
$this->assert->variable($header)->isNull();
}
}
}

View file

@ -0,0 +1,310 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Object
*
* @package Smalot\PdfParser\Tests\Units
*/
class Object extends atoum\test
{
const TYPE = 't';
const OPERATOR = 'o';
const COMMAND = 'c';
public function testGetTextParts()
{
}
// public function testGetCommandsImage()
// {
// $content = "/CS/RGB
///W 22
///H 1
///BPC 8
///F/Fl
///DP<</Predictor 15
///Columns 22
///Colors 3>>
//ID \x00\x50c\x63
//EI Q
//q -124.774 124.127 5.64213 5.67154 930.307 4436.95 cm
//BI
//";
//
// $document = new \Smalot\PdfParser\Document();
// $object = new \Smalot\PdfParser\Object($document);
// $offset = 0;
// $parts = $object->getCommandsImage($content, $offset);
// $reference = array(
// array(
// self::TYPE => '/',
// self::OPERATOR => 'CS',
// self::COMMAND => 'RGB',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'W',
// self::COMMAND => '22',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'H',
// self::COMMAND => '1',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'BPC',
// self::COMMAND => '8',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'F',
// self::COMMAND => 'Fl',
// ),
// array(
// self::TYPE => 'struct',
// self::OPERATOR => 'DP',
// self::COMMAND => array(
// array(
// self::TYPE => '/',
// self::OPERATOR => 'Predictor',
// self::COMMAND => '15',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'Columns',
// self::COMMAND => '22',
// ),
// array(
// self::TYPE => '/',
// self::OPERATOR => 'Colors',
// self::COMMAND => '3',
// ),
// ),
// ),
// array(
// self::TYPE => '',
// self::OPERATOR => 'ID',
// self::COMMAND => "\x00\x50c\x63",
// ),
// );
//
// $this->assert->array($parts)->isEqualTo($reference);
// $this->assert->integer($offset)->isEqualTo(83);
// }
public function testGetCommandsText()
{
$content = "/R14 30 Tf 0.999016 0 0 1 137.4
342.561 Tm
[(A)-168.854( BC D)-220.905(\\(E\\))20.905<20>]
TJ /R14 17.16 Tf <20> Tj
0.999014 0 0 1 336.84 319.161 Tm T* ( \x00m)Tj
/R14 20.04 Tf
ET Q
q -124.774 124.127 5.64213 5.67154 930.307 4436.95 cm
BI";
$document = new \Smalot\PdfParser\Document();
$object = new \Smalot\PdfParser\Object($document);
$offset = 0;
$parts = $object->getCommandsText($content, $offset);
$reference = array(
array(
self::TYPE => '/',
self::OPERATOR => 'Tf',
self::COMMAND => 'R14 30',
),
array(
self::TYPE => '',
self::OPERATOR => 'Tm',
self::COMMAND => "0.999016 0 0 1 137.4\n342.561",
),
array(
self::TYPE => '[',
self::OPERATOR => 'TJ',
self::COMMAND => array(
array(
self::TYPE => '(',
self::OPERATOR => '',
self::COMMAND => 'A',
),
array(
self::TYPE => 'n',
self::OPERATOR => '',
self::COMMAND => '-168.854',
),
array(
self::TYPE => '(',
self::OPERATOR => '',
self::COMMAND => ' BC D',
),
array(
self::TYPE => 'n',
self::OPERATOR => '',
self::COMMAND => '-220.905',
),
array(
self::TYPE => '(',
self::OPERATOR => '',
self::COMMAND => '\\(E\\)',
),
array(
self::TYPE => 'n',
self::OPERATOR => '',
self::COMMAND => '20.905',
),
array(
self::TYPE => '<',
self::OPERATOR => '',
self::COMMAND => '20',
),
),
),
array(
self::TYPE => '/',
self::OPERATOR => 'Tf',
self::COMMAND => 'R14 17.16',
),
array(
self::TYPE => '<',
self::OPERATOR => 'Tj',
self::COMMAND => '20',
),
array(
self::TYPE => '',
self::OPERATOR => 'Tm',
self::COMMAND => '0.999014 0 0 1 336.84 319.161',
),
array(
self::TYPE => '',
self::OPERATOR => 'T*',
self::COMMAND => '',
),
array(
self::TYPE => '(',
self::OPERATOR => 'Tj',
self::COMMAND => " \x00m",
),
array(
self::TYPE => '/',
self::OPERATOR => 'Tf',
self::COMMAND => 'R14 20.04',
),
);
$this->assert->array($parts)->isEqualTo($reference);
$this->assert->integer($offset)->isEqualTo(172);
}
public function testCleanContent()
{
$content = '/Shape <</MCID << /Font<8>>> BT >>BDC
Q
/CS0 cs 1 1 0 scn
1 i
/GS0 gs
BT
/TT0 1 Tf
0.0007 Tc 0.0018 Tw 0 Ts 100 Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
(Modificatio[ns] au \\(14\\) septembre 2009 ET 2010)Tj
EMC
(ABC) Tj
[ (a)-4.5(b)6(c)8.8 ( fsdfsdfsdf[]sd) ] TD
ET
/Shape <</MCID 2 >>BDC
q
0.03 841';
$expected = '_____________________________________
Q
/CS0 cs 1 1 0 scn
1 i
/GS0 gs
BT
/TT0 1 Tf
0.0007 Tc 0.0018 Tw 0 Ts 100 Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
(________________________________________________)Tj
___
(___) Tj
[_____________________________________] TD
ET
______________________
q
0.03 841';
$document = new \Smalot\PdfParser\Document();
$object = new \Smalot\PdfParser\Object($document);
$cleaned = $object->cleanContent($content, '_');
$this->assert->string($cleaned)->length->isEqualTo(strlen($content));
$this->assert->string($cleaned)->isEqualTo($expected);
}
public function testGetSectionText()
{
$content = '/Shape <</MCID 1 >>BDC
Q
/CS0 cs 1 1 0 scn
1 i
/GS0 gs
BT
/TT0 1 Tf
0.0007 Tc 0.0018 Tw 0 Ts 100 Tz 0 Tr 24 0 0 24 51.3 639.26025 Tm
(Mod BT atio[ns] au \\(14\\) septembre 2009 ET 2010)Tj
EMC
(ABC) Tj
[ (a)-4.5(b) 6(c)8.8 ( fsdfsdfsdf[ sd) ] TD
ET
/Shape <</MCID [BT] >>BDC BT /TT1 1.5 Tf (BT )Tj ET
q
0.03 841';
$document = new \Smalot\PdfParser\Document();
$object = new \Smalot\PdfParser\Object($document);
$sections = $object->getSectionsText($content);
// $this->assert->string($cleaned)->length->isEqualTo(strlen($content));
// $this->assert->string($cleaned)->isEqualTo($expected);
}
}

View file

@ -0,0 +1,112 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Page
*
* @package Smalot\PdfParser\Tests\Units
*/
class Page extends atoum\test
{
public function testGetFonts()
{
// Document with text.
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
// the first to load data.
$fonts = $page->getFonts();
$this->assert->array($fonts)->isNotEmpty();
foreach ($fonts as $font) {
$this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
}
// the second to use cache.
$fonts = $page->getFonts();
$this->assert->array($fonts)->isNotEmpty();
// ------------------------------------------------------
// Document without text.
$filename = __DIR__ . '/../../../../../samples/Document3_pdfcreator_nocompressed.pdf';
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
// the first to load data.
$fonts = $page->getFonts();
$this->assert->array($fonts)->isEmpty();
// the second to use cache.
$fonts = $page->getFonts();
$this->assert->array($fonts)->isEmpty();
}
public function testGetFont()
{
// Document with text.
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
// the first to load data.
$font = $page->getFont('R7');
$this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
$font = $page->getFont('ABC7');
$this->assert->object($font)->isInstanceOf('\Smalot\PdfParser\Font');
}
public function testGetText()
{
// Document with text.
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$pages = $document->getPages();
$page = $pages[0];
$text = $page->getText();
$this->assert->string($text)->hasLengthGreaterThan(150);
$this->assert->string($text)->contains('Document title');
$this->assert->string($text)->contains('Lorem ipsum');
$this->assert->string($text)->contains('Calibri');
$this->assert->string($text)->contains('Arial');
$this->assert->string($text)->contains('Times');
$this->assert->string($text)->contains('Courier New');
$this->assert->string($text)->contains('Verdana');
}
}

View file

@ -0,0 +1,67 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
/**
* Class Parser
*
* @package Smalot\PdfParser\Tests\Units
*/
class Parser extends atoum\test
{
public function testParseFile()
{
$directory = getcwd() . '/samples/bugs';
if (is_dir($directory)) {
$files = scandir($directory);
$parser = new \Smalot\PdfParser\Parser();
foreach ($files as $file) {
if (preg_match('/^.*\.pdf$/i', $file)) {
try {
$document = $parser->parseFile($directory . '/' . $file);
$pages = $document->getPages();
$page = $pages[0];
$content = $page->getText();
$this->assert->string($content);
} catch (\Exception $e) {
if ($e->getMessage() != 'Secured pdf file are currently not supported.' && strpos($e->getMessage(), 'TCPDF_PARSER') != 0) {
throw $e;
}
}
}
}
}
}
}

View file

@ -0,0 +1,56 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Object;
use Smalot\PdfParser\Page;
/**
* Class Form
*
* @package Smalot\PdfParser\XObject
*/
class Form extends Page
{
/**
* @param Page
*
* @return string
*/
public function getText(Page $page = null)
{
$header = new Header(array(), $this->document);
$contents = new Object($this->document, $header, $this->content);
return $contents->getText($this);
}
}

View file

@ -0,0 +1,52 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Object;
use Smalot\PdfParser\Page;
/**
* Class Image
*
* @package Smalot\PdfParser\XObject
*/
class Image extends Object
{
/**
* @param Page
*
* @return string
*/
public function getText(Page $page = null)
{
return '';
}
}