jb-data.de/lib/sc/tests/DownloadTest.php
2025-08-11 22:23:30 +02:00

71 lines
2 KiB
PHP

<?php
namespace Spekulatius\PHPScraper\Tests;
class DownloadTest extends \PHPUnit\Framework\TestCase
{
/**
* @test
*/
public function testMissingDownload()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
$this->expectException(\Symfony\Component\HttpClient\Exception\ClientException::class);
$this->expectExceptionMessage('HTTP/2 404 returned for "https://phpscraper.de/broken-url"');
$web->fetchAsset('https://phpscraper.de/broken-url');
}
/**
* @test
*/
public function testDownload()
{
// Downloads the PHPScraper sitemap and ensures the homepage is included (valid download and output).
$web = new \Spekulatius\PHPScraper\PHPScraper;
$xmlString = $web->fetchAsset('https://phpscraper.de/sitemap.xml');
// Convert XML to array
// Credit: https://stackoverflow.com/a/20431742
$xml = simplexml_load_string($xmlString, 'SimpleXMLElement', LIBXML_NOCDATA);
$array = json_decode((string) json_encode($xml), true);
$urls = array_map(
fn ($url) => $url['loc'],
$array['url']
);
$this->assertContains(
'https://phpscraper.de/',
$urls
);
}
/**
* We should support both absolute and relative URLs.
*
* Here we use the sitemap test page as a reference.
*
* @test
*/
public function testDifferentUrlTypes()
{
$web = new \Spekulatius\PHPScraper\PHPScraper;
// Navigate to the test page. As the URL is predefined, it's only about the base URL.
$web->go('https://test-pages.phpscraper.de/meta/feeds.html');
// Test 1: Absolute URL
$this->assertSame(
$web->fetchAsset($web->sitemapUrl),
$web->fetchAsset($web->currentBaseHost . '/custom_sitemap.xml'),
);
// Test 2: Relative URL
$this->assertSame(
$web->fetchAsset($web->sitemapUrl),
$web->fetchAsset('/custom_sitemap.xml'),
);
}
}