jb-data.de/lib/sc/tests/LinkTest.php
2025-08-11 22:23:30 +02:00

406 lines
13 KiB
PHP

<?php
namespace Spekulatius\PHPScraper\Tests;
class LinkTest extends \PHPUnit\Framework\TestCase
{
/**
* @test
*/
public function testNoLinks()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/no-links.html');
// No links -> an empty array is expected.
$this->assertSame([], $web->links);
$this->assertSame([], $web->linksWithDetails);
}
/**
* @test
*/
public function testTarget()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/target.html');
// Check the number of links
$this->assertSame(6, count($web->links));
// Check the simple links list
$this->assertSame([
'https://placekitten.com/408/287',
'https://placekitten.com/444/333',
'https://placekitten.com/444/321',
'https://placekitten.com/408/287',
'https://placekitten.com/444/333',
'https://placekitten.com/444/321',
], $web->links);
// Check the complex links list
$this->assertSame([
[
'url' => 'https://placekitten.com/408/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => '_blank',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/444/333',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => '_blank',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/444/321',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => '_blank',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/408/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => 'kitten',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/444/333',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => 'kitten',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/444/321',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => 'kitten',
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
],
], $web->linksWithDetails);
}
/**
* @test
*/
public function testRel()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
// This page contains several links with different rel attributes.
$web->go('https://test-pages.phpscraper.de/links/rel.html');
// Check the number of links
$this->assertSame(5, count($web->links));
// Check the simple links list
$this->assertSame([
'https://placekitten.com/432/287',
'https://placekitten.com/456/287',
'https://placekitten.com/345/287',
'https://placekitten.com/345/287',
'https://placekitten.com/345/222',
], $web->links);
// Check the complex links list
$this->assertSame([
[
'url' => 'https://placekitten.com/432/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => null,
'rel' => 'nofollow',
'image' => [],
'isNofollow' => true,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/456/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => null,
'rel' => 'ugc',
'image' => [],
'isNofollow' => false,
'isUGC' => true,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/345/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => null,
'rel' => 'nofollow ugc',
'image' => [],
'isNofollow' => true,
'isUGC' => true,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/345/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => null,
'rel' => 'noopener',
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => true,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/345/222',
'protocol' => 'https',
'text' => 'external kitten',
'title' => null,
'target' => null,
'rel' => 'noreferrer',
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => true,
],
], $web->linksWithDetails);
}
/**
* @test
*/
public function testBaseHref()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
// Check the number of links
$this->assertSame(3, count($web->links));
// Check the simple links list
$this->assertSame([
'https://placekitten.com/408/287',
'https://test-pages.phpscraper.de/assets/cat.jpg',
'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
], $web->links);
// Check the complex links list
$this->assertSame([
[
'url' => 'https://placekitten.com/408/287',
'protocol' => 'https',
'text' => 'external kitten',
'title' => 'external path with base href',
'target' => null,
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://test-pages.phpscraper.de/assets/cat.jpg',
'protocol' => 'https',
'text' => 'absolute path to cat',
'title' => 'absolute internal path with base href',
'target' => null,
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
'protocol' => 'https',
'text' => 'relative cat',
'title' => 'relative path with base href',
'target' => null,
'rel' => null,
'image' => [],
'isNofollow' => false,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
],
], $web->linksWithDetails);
}
/**
* @test
*/
public function testImageUrl()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/image-url.html');
// Check the number of links
$this->assertSame(3, count($web->links));
// Check the complex links list
$this->assertSame([
[
'url' => 'https://placekitten.com/432/500',
'protocol' => 'https',
'text' => '',
'title' => null,
'target' => null,
'rel' => 'nofollow',
'image' => [
'https://placekitten.com/432/287',
],
'isNofollow' => true,
'isUGC' => false,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/456/500',
'protocol' => 'https',
'text' => '',
'title' => null,
'target' => null,
'rel' => 'ugc',
'image' => [
'https://placekitten.com/456/400',
'https://placekitten.com/456/300',
],
'isNofollow' => false,
'isUGC' => true,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
], [
'url' => 'https://placekitten.com/345/500',
'protocol' => 'https',
'text' => 'This is image',
'title' => null,
'target' => null,
'rel' => 'nofollow ugc',
'image' => [
'https://placekitten.com/345/287',
],
'isNofollow' => true,
'isUGC' => true,
'isSponsored' => false,
'isMe' => false,
'isNoopener' => false,
'isNoreferrer' => false,
],
], $web->linksWithDetails);
}
/**
* @test
*/
public function testInternalLinks()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
// Check the internal links list
$this->assertSame(
['https://test-pages.phpscraper.de/assets/cat.jpg'],
$web->internalLinks
);
}
/**
* @test
*/
public function testExternalLinks()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page.
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
// Check the external links list
$this->assertSame(
[
'https://placekitten.com/408/287',
'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
],
$web->externalLinks
);
}
}