init
This commit is contained in:
commit
72a26edcff
22092 changed files with 2101903 additions and 0 deletions
147
lib/sc/tests/FeedSitemapTest.php
Normal file
147
lib/sc/tests/FeedSitemapTest.php
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
<?php
|
||||
|
||||
namespace Spekulatius\PHPScraper\Tests;
|
||||
|
||||
use Spekulatius\PHPScraper\DataTransferObjects\FeedEntry;
|
||||
|
||||
class FeedSitemapTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testSitemapUrl()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is guessed, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// Did we get the expected `/sitemap.xml`?
|
||||
$this->assertSame(
|
||||
'https://test-pages.phpscraper.de/sitemap.xml',
|
||||
$web->sitemapUrl
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if the default sitemap path is applied.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testDefaultSitemapUrl()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is guessed, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// The sitemapUrl should be the default.
|
||||
$this->assertSame(
|
||||
$web->sitemapRaw(),
|
||||
$web->sitemapRaw($web->sitemapUrl),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* The files `sitemap.xml` and `custom_sitemap.xml` are the same and used to ensure the custom URL feature works.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testCustomSitemapUrl()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is guessed, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// We should always allow for custom paths.
|
||||
$this->assertSame(
|
||||
$web->sitemapRaw($web->sitemapUrl),
|
||||
$web->sitemapRaw($web->currentBaseHost . '/custom_sitemap.xml'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* We should support both absolute and relative URLs.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testDifferentSitemapUrlTypes()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is predefined, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// Test 1: Absolute URL
|
||||
$this->assertSame(
|
||||
$web->sitemapRaw($web->sitemapUrl),
|
||||
$web->sitemapRaw($web->currentBaseHost . '/custom_sitemap.xml'),
|
||||
);
|
||||
|
||||
// Test 2: Relative URL
|
||||
$this->assertSame(
|
||||
$web->sitemapRaw($web->sitemapUrl),
|
||||
$web->sitemapRaw('/custom_sitemap.xml'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure we can parse the sitemap in itself (XML).
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testSitemapRaw()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is guessed, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// Get the sitemap and store it.
|
||||
$sitemapRaw = $web->sitemapRaw;
|
||||
|
||||
// Check the count
|
||||
$this->assertSame(129, count($sitemapRaw['url']));
|
||||
|
||||
// Check some entries to ensure the parsing works as expected.
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/apis/linkedin.html',
|
||||
$sitemapRaw['url'][4]['loc'],
|
||||
);
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/de/apis/zalando.html',
|
||||
$sitemapRaw['url'][20]['loc'],
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the DTO creation.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testSitemap()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page. As the URL is guessed, it's only about the base URL.
|
||||
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
|
||||
|
||||
// Get the sitemap and store it.
|
||||
$sitemap = $web->sitemap;
|
||||
|
||||
// Check the count
|
||||
$this->assertSame(129, count($sitemap));
|
||||
|
||||
// Check some samples.
|
||||
$this->assertTrue($sitemap[42] instanceof FeedEntry);
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/apis/linkedin.html',
|
||||
$sitemap[4]->link,
|
||||
);
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/de/apis/zalando.html',
|
||||
$sitemap[20]->link
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue