init
This commit is contained in:
commit
72a26edcff
22092 changed files with 2101903 additions and 0 deletions
212
lib/sc/tests/UrlTest.php
Normal file
212
lib/sc/tests/UrlTest.php
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
<?php
|
||||
|
||||
namespace Spekulatius\PHPScraper\Tests;
|
||||
|
||||
/**
|
||||
* Ensure our URL lib, https://github.com/thephpleague/uri, is integrated correctly and works as expected.
|
||||
*/
|
||||
class UrlTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
/**
|
||||
* If null is passed to `makeUrlAbsolute`, it should always return null.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testNullPassingThrough()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
$this->assertNull($web->makeUrlAbsolute(null));
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function validateUriTest()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// We use any URL for this.
|
||||
$web->go('https://test-pages.phpscraper.de/content/lists.html');
|
||||
|
||||
// Ensure the URL is set correctly.
|
||||
$this->assertSame(
|
||||
'https://test-pages.phpscraper.de/content/lists.html',
|
||||
$web->currentUrl
|
||||
);
|
||||
|
||||
// Ensure the host is parsed correctly.
|
||||
$this->assertSame(
|
||||
'test-pages.phpscraper.de',
|
||||
$web->currentHost
|
||||
);
|
||||
|
||||
// Ensure the host with protocol is parsed correctly.
|
||||
$this->assertSame(
|
||||
'https://test-pages.phpscraper.de',
|
||||
$web->currentBaseHost
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCurrentBaseHostWithBase()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to the test page.
|
||||
// Contains: <base href="https://test-pages-with-base-href.phpscraper.de/">
|
||||
$web->go('https://test-pages.phpscraper.de/meta/image/absolute-path-with-base-href.html');
|
||||
|
||||
// Check the base href being passed through the current base host.
|
||||
$this->assertSame(
|
||||
'https://test-pages-with-base-href.phpscraper.de',
|
||||
$web->currentBaseHost
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic processing of the URLs.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testMakeUrlAbsolute()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to test page: This sets the base URL.
|
||||
$web->go('https://phpscraper.de');
|
||||
|
||||
// Test variations of paths to be processed
|
||||
// With leading slash
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/index.html',
|
||||
$web->makeUrlAbsolute('/index.html'),
|
||||
);
|
||||
|
||||
// Without leading slash
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/index.html',
|
||||
$web->makeUrlAbsolute('index.html'),
|
||||
);
|
||||
|
||||
// Paths are considered.
|
||||
$this->assertSame(
|
||||
'https://phpscraper.de/test/index.html',
|
||||
$web->makeUrlAbsolute('test/index.html'),
|
||||
);
|
||||
|
||||
// Absolutely URLs are untouched.
|
||||
$this->assertSame(
|
||||
'https://example.com/index.html',
|
||||
$web->makeUrlAbsolute('https://example.com/index.html'),
|
||||
);
|
||||
|
||||
// Protocol is considered
|
||||
$this->assertSame(
|
||||
'http://example.com/index.html',
|
||||
$web->makeUrlAbsolute('http://example.com/index.html'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic processing of the URLs.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testMakeUrlAbsoluteConsiderBaseHref()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
/**
|
||||
* Navigate to test page: This sets the base URL.
|
||||
*
|
||||
* It contains:
|
||||
*
|
||||
* ```html
|
||||
* <base href="https://test-pages-with-base-href.phpscraper.de/">
|
||||
* ```
|
||||
*
|
||||
* While it's located on `test-pages.phpscraper.de`.
|
||||
*
|
||||
* This page isn't actually used. It's purely to set the context.
|
||||
*/
|
||||
$web->go('https://test-pages.phpscraper.de/meta/image/absolute-path-with-base-href.html');
|
||||
|
||||
// Test variations of paths to be processed
|
||||
// With leading slash
|
||||
$this->assertSame(
|
||||
'https://test-pages-with-base-href.phpscraper.de/index.html',
|
||||
$web->makeUrlAbsolute('/index.html'),
|
||||
);
|
||||
|
||||
// Without leading slash
|
||||
$this->assertSame(
|
||||
'https://test-pages-with-base-href.phpscraper.de/index.html',
|
||||
$web->makeUrlAbsolute('index.html'),
|
||||
);
|
||||
|
||||
// Paths are considered.
|
||||
$this->assertSame(
|
||||
'https://test-pages-with-base-href.phpscraper.de/test/index.html',
|
||||
$web->makeUrlAbsolute('test/index.html'),
|
||||
);
|
||||
|
||||
// Absolutely URLs are untouched.
|
||||
$this->assertSame(
|
||||
'https://example.com/index.html',
|
||||
$web->makeUrlAbsolute('https://example.com/index.html'),
|
||||
);
|
||||
|
||||
// Protocol is considered
|
||||
$this->assertSame(
|
||||
'http://example.com/index.html',
|
||||
$web->makeUrlAbsolute('http://example.com/index.html'),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if passed in hosts are considered. It trumps any base-href and current url.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testMakeUrlAbsoluteWithBaseHost()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Navigate to test page: This sets the base URL.
|
||||
$web->go('https://phpscraper.de');
|
||||
|
||||
// Test variations of paths to be processed
|
||||
// With leading slash
|
||||
$this->assertSame(
|
||||
'https://example.com/index.html',
|
||||
$web->makeUrlAbsolute('/index.html', 'https://example.com'),
|
||||
);
|
||||
|
||||
// Without leading slash
|
||||
$this->assertSame(
|
||||
'https://example.com/index.html',
|
||||
$web->makeUrlAbsolute('index.html', 'https://example.com'),
|
||||
);
|
||||
|
||||
// Paths are considered.
|
||||
$this->assertSame(
|
||||
'https://example.com/test/index.html',
|
||||
$web->makeUrlAbsolute('test/index.html', 'https://example.com'),
|
||||
);
|
||||
|
||||
// Absolutely URLs are untouched.
|
||||
$this->assertSame(
|
||||
'https://example.com/index.html',
|
||||
$web->makeUrlAbsolute('https://example.com/index.html', 'https://example-2.com/test/with/path'),
|
||||
);
|
||||
|
||||
// Protocol is considered
|
||||
$this->assertSame(
|
||||
'http://example.com/index.html',
|
||||
$web->makeUrlAbsolute('http://example.com/index.html', 'https://example-2.com/test/with/path'),
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue