init
This commit is contained in:
commit
72a26edcff
22092 changed files with 2101903 additions and 0 deletions
408
lib/sc/tests/ParserCsvTest.php
Normal file
408
lib/sc/tests/ParserCsvTest.php
Normal file
|
|
@ -0,0 +1,408 @@
|
|||
<?php
|
||||
|
||||
namespace Spekulatius\PHPScraper\Tests;
|
||||
|
||||
class ParserCsvTest extends \PHPUnit\Framework\TestCase
|
||||
{
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCsvParsingContext()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// This tests ensures an exception is thrown, if no context is given.
|
||||
// Context means either it's been navigated before (URL context) or get something to (fetch +) parse
|
||||
try {
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
$web->parseCsv();
|
||||
} catch (\Exception $e) {
|
||||
// Did we get the expected exception?
|
||||
$this->assertSame(
|
||||
'You can not call parseCsv() without parameter or initial navigation.',
|
||||
$e->getMessage()
|
||||
);
|
||||
}
|
||||
|
||||
// This tests ensures an exception is thrown, if no context is given.
|
||||
// Context means either it's been navigated before (URL context) or get something to (fetch +) parse
|
||||
try {
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
$web->parseCsvWithHeader();
|
||||
} catch (\Exception $e) {
|
||||
// Did we get the expected exception?
|
||||
$this->assertSame(
|
||||
'You can not call parseCsvWithHeader() without parameter or initial navigation.',
|
||||
$e->getMessage()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecodeRaw()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Only decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date', 'value'],
|
||||
['1945-02-06', '4.20'],
|
||||
['1952-03-11', '42'],
|
||||
],
|
||||
$web->csvDecodeRaw("date,value\n1945-02-06,4.20\n1952-03-11,42"),
|
||||
);
|
||||
|
||||
// Fetching and decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date', 'value'],
|
||||
['1945-02-06', '4.20'],
|
||||
['1952-03-11', '42'],
|
||||
],
|
||||
$web->csvDecodeRaw($web->fetchAsset('https://test-pages.phpscraper.de/test.csv')),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecode()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Only decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date', 'value'],
|
||||
['1945-02-06', 4.20],
|
||||
['1952-03-11', 42],
|
||||
],
|
||||
$web->csvDecode("date,value\n1945-02-06,4.20\n1952-03-11,42"),
|
||||
);
|
||||
|
||||
// Fetching and decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date', 'value'],
|
||||
['1945-02-06', 4.20],
|
||||
['1952-03-11', 42],
|
||||
],
|
||||
$web->csvDecode($web->fetchAsset('https://test-pages.phpscraper.de/test.csv')),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test with pipe as separator, enclosure and escape.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecodeAndCustomEncoding()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
$this->assertSame(
|
||||
[
|
||||
['date', 'value'],
|
||||
['1945-02-06', 4.20],
|
||||
['1952-03-11', 42],
|
||||
['\\'],
|
||||
],
|
||||
$web->csvDecode(
|
||||
"\"date\"|\"value\"\n\"1945-02-06\"|\"4.20\"\n\"1952-03-11\"|\"42\"\n\\",
|
||||
'|',
|
||||
'"',
|
||||
'\\'
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecodeWithHeaderRaw()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// Only decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date' => '1945-02-06', 'value' => '4.20'],
|
||||
['date' => '1952-03-11', 'value' => '42'],
|
||||
],
|
||||
$web->csvDecodeWithHeaderRaw("date,value\n1945-02-06,4.20\n1952-03-11,42"),
|
||||
);
|
||||
|
||||
// Fetching and decoding
|
||||
$this->assertSame(
|
||||
[
|
||||
['date' => '1945-02-06', 'value' => '4.20'],
|
||||
['date' => '1952-03-11', 'value' => '42'],
|
||||
],
|
||||
$web->csvDecodeWithHeaderRaw($web->fetchAsset('https://test-pages.phpscraper.de/test.csv')),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecodeWithHeaderAndCasting()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
$this->assertSame(
|
||||
[
|
||||
['date' => '1945-02-06', 'value' => 4.20],
|
||||
['date' => '1952-03-11', 'value' => 42],
|
||||
],
|
||||
$web->csvDecodeWithHeader("date,value\n1945-02-06,4.20\n1952-03-11,42"),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test with header, pipe as separator, and enclosure.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testCsvDecodeWithHeaderAndCustomEncoding()
|
||||
{
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
$this->assertSame(
|
||||
[
|
||||
['date' => '1945-02-06', 'value' => 4.20],
|
||||
['date' => '1952-03-11', 'value' => 42],
|
||||
],
|
||||
|
||||
$web->csvDecodeWithHeader(
|
||||
"\"date\"|\"value\"\n\"1945-02-06\"|\"4.20\"\n\"1952-03-11\"|\"42\"",
|
||||
'|',
|
||||
'"',
|
||||
'\\'
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the pluming: Test the various ways to call `parseCsv()`.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testDifferentCsvCalls()
|
||||
{
|
||||
// Downloads the PHPScraper sitemap and ensures the homepage is included (valid download and output).
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// For the reference we are using a simple CSV and parse it. This matches the hosted CSV.
|
||||
$csvString = "date,value\n1945-02-06,4.20\n1952-03-11,42";
|
||||
$csvData = [['date', 'value'], ['1945-02-06', 4.20], ['1952-03-11', 42]];
|
||||
|
||||
// Case 1: Passing in an CSV string in.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Parse the $csvString directly.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->parseCsv($csvString)
|
||||
);
|
||||
|
||||
// Case 2: `go` + `parseCsv()`
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Chained call using a CSV file as URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/test.csv')
|
||||
->parseCsv()
|
||||
);
|
||||
|
||||
// Case 3: `parseCsv()` with absolute URL.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Pass the absolutely URL to `parseCsv()`
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->parseCsv('https://test-pages.phpscraper.de/test.csv')
|
||||
);
|
||||
|
||||
// Case 4: `go` + `parseCsv()` with relative URL.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The 'go' sets the base URL for the following relative path.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->parseCsv('/test.csv')
|
||||
);
|
||||
|
||||
// Case 5: `go` with base URL + `go` with relative URL + `parseCsv()`.
|
||||
// 5.1. Ensure the final URL is correct.
|
||||
$this->assertSame(
|
||||
'https://test-pages.phpscraper.de/test.csv',
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test.csv')
|
||||
->currentUrl()
|
||||
);
|
||||
|
||||
// 5.2. Ensure the parsed CSV is correct.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test.csv')
|
||||
->parseCsv()
|
||||
);
|
||||
|
||||
// Case 6: With encoding params
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test-custom.csv')
|
||||
->parseCsv(null, '|', '"')
|
||||
);
|
||||
|
||||
// Case 7: With encoding params and (relative) URL
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->parseCsv('/test-custom.csv', '|', '"')
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the pluming: Test the various ways to call `parseCsvWithHeader()`.
|
||||
*
|
||||
* @test
|
||||
*/
|
||||
public function testDifferentCsvWithHeaderCalls()
|
||||
{
|
||||
// Downloads the PHPScraper sitemap and ensures the homepage is included (valid download and output).
|
||||
$web = new \Spekulatius\PHPScraper\PHPScraper;
|
||||
|
||||
// For the reference we are using a simple CSV and parse it. This matches the hosted CSV.
|
||||
$csvString = "date,value\n1945-02-06,4.20\n1952-03-11,42";
|
||||
$csvData = [
|
||||
['date' => '1945-02-06', 'value' => 4.20],
|
||||
['date' => '1952-03-11', 'value' => 42],
|
||||
];
|
||||
|
||||
// Case 1: Passing in an CSV string in.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Parse the $csvString directly.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->parseCsvWithHeader($csvString)
|
||||
);
|
||||
|
||||
// Case 2: `parseCsvWithHeader()`
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Chained call using a CSV file as URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->parseCsvWithHeader('https://test-pages.phpscraper.de/test.csv')
|
||||
);
|
||||
|
||||
// Case 2: `go` + `parseCsvWithHeader()`
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Chained call using a CSV file as URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/test.csv')
|
||||
->parseCsvWithHeader()
|
||||
);
|
||||
|
||||
// Case 3: `parseCsvWithHeader()` with absolute URL.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// Pass the absolutely URL to `parseCsvWithHeader()`
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->parseCsvWithHeader('https://test-pages.phpscraper.de/test.csv')
|
||||
);
|
||||
|
||||
// Case 4: `go` + `parseCsvWithHeader()` with relative URL.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The 'go' sets the base URL for the following relative path.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->parseCsvWithHeader('/test.csv')
|
||||
);
|
||||
|
||||
// Case 5: `go` with base URL + `go` with relative URL + `parseCsvWithHeader()`.
|
||||
// 5.1. Ensure the final URL is correct.
|
||||
$this->assertSame(
|
||||
'https://test-pages.phpscraper.de/test.csv',
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test.csv')
|
||||
->currentUrl()
|
||||
);
|
||||
|
||||
// 5.2. Ensure the parsed CSV is correct.
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test.csv')
|
||||
->parseCsvWithHeader()
|
||||
);
|
||||
|
||||
// Case 6: With encoding params
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->go('/test-custom.csv')
|
||||
->parseCsvWithHeader(null, '|', '"')
|
||||
);
|
||||
|
||||
// Case 7: With encoding params and (relative) URL
|
||||
$this->assertSame(
|
||||
// Pass the CSV Data as reference in.
|
||||
$csvData,
|
||||
|
||||
// The first 'go' sets the base URL for the following `go` with relative URL.
|
||||
(new \Spekulatius\PHPScraper\PHPScraper)
|
||||
->go('https://test-pages.phpscraper.de/meta/feeds.html')
|
||||
->parseCsvWithHeader('/test-custom.csv', '|', '"')
|
||||
);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue