init
This commit is contained in:
commit
72a26edcff
22092 changed files with 2101903 additions and 0 deletions
406
lib/sc/tests/LinkTest.php
Normal file
406
lib/sc/tests/LinkTest.php
Normal file
|
|
@ -0,0 +1,406 @@
|
|||
<?php
|
||||
|
||||
namespace Spekulatius\PHPScraper\Tests;
|
||||
|
||||
class LinkTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testNoLinks()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/no-links.html');
|
||||
|
||||
// No links -> an empty array is expected.
|
||||
$this->assertSame([], $web->links);
|
||||
$this->assertSame([], $web->linksWithDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testTarget()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/target.html');
|
||||
|
||||
// Check the number of links
|
||||
$this->assertSame(6, count($web->links));
|
||||
|
||||
// Check the simple links list
|
||||
$this->assertSame([
|
||||
'https://placekitten.com/408/287',
|
||||
'https://placekitten.com/444/333',
|
||||
'https://placekitten.com/444/321',
|
||||
'https://placekitten.com/408/287',
|
||||
'https://placekitten.com/444/333',
|
||||
'https://placekitten.com/444/321',
|
||||
], $web->links);
|
||||
|
||||
// Check the complex links list
|
||||
$this->assertSame([
|
||||
[
|
||||
'url' => 'https://placekitten.com/408/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => '_blank',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/444/333',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => '_blank',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/444/321',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => '_blank',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/408/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => 'kitten',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/444/333',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => 'kitten',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/444/321',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => 'kitten',
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
],
|
||||
], $web->linksWithDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testRel()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
// This page contains several links with different rel attributes.
|
||||
$web->go('https://test-pages.phpscraper.de/links/rel.html');
|
||||
|
||||
// Check the number of links
|
||||
$this->assertSame(5, count($web->links));
|
||||
|
||||
// Check the simple links list
|
||||
$this->assertSame([
|
||||
'https://placekitten.com/432/287',
|
||||
'https://placekitten.com/456/287',
|
||||
'https://placekitten.com/345/287',
|
||||
'https://placekitten.com/345/287',
|
||||
'https://placekitten.com/345/222',
|
||||
], $web->links);
|
||||
|
||||
// Check the complex links list
|
||||
$this->assertSame([
|
||||
[
|
||||
'url' => 'https://placekitten.com/432/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'nofollow',
|
||||
'image' => [],
|
||||
'isNofollow' => true,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/456/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'ugc',
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => true,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/345/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'nofollow ugc',
|
||||
'image' => [],
|
||||
'isNofollow' => true,
|
||||
'isUGC' => true,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/345/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'noopener',
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => true,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/345/222',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'noreferrer',
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => true,
|
||||
],
|
||||
], $web->linksWithDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testBaseHref()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
|
||||
|
||||
// Check the number of links
|
||||
$this->assertSame(3, count($web->links));
|
||||
|
||||
// Check the simple links list
|
||||
$this->assertSame([
|
||||
'https://placekitten.com/408/287',
|
||||
'https://test-pages.phpscraper.de/assets/cat.jpg',
|
||||
'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
|
||||
], $web->links);
|
||||
|
||||
// Check the complex links list
|
||||
$this->assertSame([
|
||||
[
|
||||
'url' => 'https://placekitten.com/408/287',
|
||||
'protocol' => 'https',
|
||||
'text' => 'external kitten',
|
||||
'title' => 'external path with base href',
|
||||
'target' => null,
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://test-pages.phpscraper.de/assets/cat.jpg',
|
||||
'protocol' => 'https',
|
||||
'text' => 'absolute path to cat',
|
||||
'title' => 'absolute internal path with base href',
|
||||
'target' => null,
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
|
||||
'protocol' => 'https',
|
||||
'text' => 'relative cat',
|
||||
'title' => 'relative path with base href',
|
||||
'target' => null,
|
||||
'rel' => null,
|
||||
'image' => [],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
],
|
||||
], $web->linksWithDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testImageUrl()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/image-url.html');
|
||||
|
||||
// Check the number of links
|
||||
$this->assertSame(3, count($web->links));
|
||||
|
||||
// Check the complex links list
|
||||
$this->assertSame([
|
||||
[
|
||||
'url' => 'https://placekitten.com/432/500',
|
||||
'protocol' => 'https',
|
||||
'text' => '',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'nofollow',
|
||||
'image' => [
|
||||
'https://placekitten.com/432/287',
|
||||
],
|
||||
'isNofollow' => true,
|
||||
'isUGC' => false,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/456/500',
|
||||
'protocol' => 'https',
|
||||
'text' => '',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'ugc',
|
||||
'image' => [
|
||||
'https://placekitten.com/456/400',
|
||||
'https://placekitten.com/456/300',
|
||||
],
|
||||
'isNofollow' => false,
|
||||
'isUGC' => true,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
], [
|
||||
'url' => 'https://placekitten.com/345/500',
|
||||
'protocol' => 'https',
|
||||
'text' => 'This is image',
|
||||
'title' => null,
|
||||
'target' => null,
|
||||
'rel' => 'nofollow ugc',
|
||||
'image' => [
|
||||
'https://placekitten.com/345/287',
|
||||
],
|
||||
'isNofollow' => true,
|
||||
'isUGC' => true,
|
||||
'isSponsored' => false,
|
||||
'isMe' => false,
|
||||
'isNoopener' => false,
|
||||
'isNoreferrer' => false,
|
||||
],
|
||||
], $web->linksWithDetails);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testInternalLinks()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
|
||||
|
||||
// Check the internal links list
|
||||
$this->assertSame(
|
||||
['https://test-pages.phpscraper.de/assets/cat.jpg'],
|
||||
$web->internalLinks
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testExternalLinks()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
$web->go('https://test-pages.phpscraper.de/links/base-href.html');
|
||||
|
||||
// Check the external links list
|
||||
$this->assertSame(
|
||||
[
|
||||
'https://placekitten.com/408/287',
|
||||
'https://test-pages-with-base-href.phpscraper.de/assets/cat.jpg',
|
||||
],
|
||||
$web->externalLinks
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue