jb-data.de/lib/PdfParser/Tests/Units/Font.php
2025-08-11 22:23:30 +02:00

322 lines
11 KiB
PHP

<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
*/
namespace Smalot\PdfParser\Tests\Units;
use mageekguy\atoum;
use Smalot\PdfParser\Header;
/**
* Class Font
*
* @package Smalot\PdfParser\Tests\Units
*/
class Font extends atoum\test
{
public function testGetName()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$this->assert->string($font->getName())->isEqualTo('OJHCYD+Cambria,Bold');
}
public function testGetType()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$this->assert->string($font->getType())->isEqualTo('TrueType');
}
public function testGetDetails()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
$font = reset($fonts);
$reference = array(
'Name' => 'OJHCYD+Cambria,Bold',
'Type' => 'TrueType',
'Encoding' => 'Ansi',
'BaseFont' => 'OJHCYD+Cambria,Bold',
'FontDescriptor' =>
array(
'Type' => 'FontDescriptor',
'FontName' => 'OJHCYD+Cambria,Bold',
'Flags' => 4,
'Ascent' => 699,
'CapHeight' => 699,
'Descent' => -7,
'ItalicAngle' => 0,
'StemV' => 128,
'MissingWidth' => 658,
),
'ToUnicode' =>
array(
'Filter' => 'FlateDecode',
'Length' => 219,
),
'FirstChar' => 1,
'LastChar' => 11,
'Widths' =>
array(
0 => 705,
1 => 569,
2 => 469,
3 => 597,
4 => 890,
5 => 531,
6 => 604,
7 => 365,
8 => 220,
9 => 314,
10 => 308,
),
'Subtype' => 'TrueType',
);
$this->assert->array($font->getDetails())->isEqualTo($reference);
}
public function testTranslateChar()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
$font = reset($fonts);
$this->assert->string($font->translateChar("\x01"))->isEqualTo('D');
$this->assert->string($font->translateChar("\x02"))->isEqualTo('o');
$this->assert->string($font->translateChar("\x03"))->isEqualTo('c');
$this->assert->string($font->translateChar("\x04"))->isEqualTo('u');
$this->assert->string($font->translateChar("\x99"))->isEqualTo(\Smalot\PdfParser\Font::MISSING);
}
public function testLoadTranslateTable()
{
$document = new \Smalot\PdfParser\Document();
$content = '<</Type/Font /Subtype /Type0 /ToUnicode 2 0 R>>';
$header = Header::parse($content, $document);
$font = new \Smalot\PdfParser\Font($document, $header);
$content = '/CIDInit /ProcSet findresource begin
14 dict begin
begincmap
/CIDSystemInfo
<< /Registry (Adobe)
/Ordering (UCS)
/Supplement 0
>> def
/CMapName /Adobe-Identity-UCS def
/CMapType 2 def
1 begincodespacerange
<0000> <FFFF>
endcodespacerange
3 beginbfchar
<0003> <0020>
<000F> <002C>
<0011> <002E>
endbfchar
2 beginbfrange
<0013> <0016> <0030>
<0018> <001C> <0035>
endbfrange
7 beginbfchar
<0023> <0040>
<0026> <0043>
<0028> <0045>
<0030> <004D>
<0033> <0050>
<0035> <0052>
<0039> <0056>
endbfchar
4 beginbfrange
<0044> <004C> <0061>
<004F> <0052> <006C>
<0054> <0059> <0071>
<005B> <005C> <0078>
endbfrange
4 beginbfchar
<0070> <00E9>
<00AB> <2026>
<00B0> <0153>
<00B6> <2019>
endbfchar
1 beginbfrange
<0084> <0086> [<0061> <0071> <0081>]
endbfrange
endcmap
CMapName currentdict /CMap defineresource pop
end
end';
$unicode = new \Smalot\PdfParser\Object($document, null, $content);
$document->setObjects(array('1_0' => $font, '2_0' => $unicode));
$font->init();
// Test reload
$table = $font->loadTranslateTable();
$this->assert->array($table)->hasSize(47);
// Test chars
$this->assert->string($table[3])->isEqualTo(' ');
$this->assert->string($table[15])->isEqualTo(',');
$this->assert->string($table[17])->isEqualTo('.');
$this->assert->string($table[35])->isEqualTo('@');
$this->assert->string($table[57])->isEqualTo('V');
// Test ranges
$this->assert->string($table[85])->isEqualTo('r');
$this->assert->string($table[92])->isEqualTo('y');
}
public function testDecodeHexadecimal()
{
$hexa = '<322041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("2 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("2 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(2 A)");
$hexa = '<003200200041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("\x002\x00 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(\x002\x00 \x00A)");
$hexa = '<00320020> 8 <0041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 8 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("\x002\x00 8 \x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo(
"(\x002\x00 ) 8 (\x00A)"
);
$hexa = '<3220> 8 <41>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("2 8 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo("2 8 A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo("(2 ) 8 (A)");
$hexa = '<00320020005C>-10<0041>';
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa))->isEqualTo("\x002\x00 \x00\\-10\x00A");
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, false))->isEqualTo(
"\x002\x00 \x00\\-10\x00A"
);
$this->assert->string(\Smalot\PdfParser\Font::decodeHexadecimal($hexa, true))->isEqualTo(
"(\x002\x00 \x00\\\\)-10(\x00A)"
);
}
public function testDecodeOctal()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeOctal("\\101\\102\\040\\103"))->isEqualTo('AB C');
$this->assert->string(\Smalot\PdfParser\Font::decodeOctal("\\101\\102\\040\\103D"))->isEqualTo('AB CD');
}
public function testDecodeEntities()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeEntities("File#20Type"))->isEqualTo('File Type');
$this->assert->string(\Smalot\PdfParser\Font::decodeEntities("File##20Ty#pe"))->isEqualTo('File# Ty#pe');
}
public function testDecodeUnicode()
{
$this->assert->string(\Smalot\PdfParser\Font::decodeUnicode("\xFE\xFF\x00A\x00B"))->isEqualTo('AB');
}
public function testDecodeText()
{
$filename = __DIR__ . '/../../../../../samples/Document1_pdfcreator_nocompressed.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
// Cambria
$font = reset($fonts);
$commands = array(
array(
't' => '',
'c' => "\x01\x02",
),
array(
't' => 'n',
'c' => -10,
),
array(
't' => '',
'c' => "\x03",
),
array(
't' => '',
'c' => "\x04",
),
array(
't' => 'n',
'c' => -100,
),
array(
't' => '<',
'c' => "01020304",
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('Docu Docu');
//Check if ANSI/Unicode detection is working properly
$filename = __DIR__ . '/../../../../../samples/bugs/Issue95_ANSI.pdf';
$parser = new \Smalot\PdfParser\Parser();
$document = $parser->parseFile($filename);
$fonts = $document->getFonts();
/** @var \Smalot\PdfParser\Font $font */
$font = reset($fonts);
$commands = array(
array(
't' => '<',
'c' => "E6F6FC", //ANSI encoded string
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('æöü');
$commands = array(
array(
't' => '<',
'c' => "C3A6C3B6C3BC", //Unicode encoded string
),
);
$this->assert->string($font->decodeText($commands))->isEqualTo('æöü');
}
}