This commit is contained in:
steven 2025-08-11 22:23:30 +02:00
commit 72a26edcff
22092 changed files with 2101903 additions and 0 deletions

View file

@ -0,0 +1,193 @@
<?php
namespace SimpleExcel\Parser;
use SimpleExcel\Exception\SimpleExcelException;
/**
* SimpleExcel class for parsing HTML table
*
* @author Faisalman
* @package SimpleExcel
*/
abstract class BaseParser implements IParser
{
/**
* Holds the parsed result
*
* @access private
* @var array
*/
protected $table_arr;
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = '';
/**
* @param string $file_url Path to file (optional)
*/
public function __construct($file_url = NULL) {
if(isset($file_url)) {
$this->loadFile($file_url);
}
}
/**
* Get value of the specified cell
*
* @param int $row_num Row number
* @param int $col_num Column number
* @param int $val_only
* @return array
* @throws Exception If the cell identified doesn't exist.
*/
public function getCell($row_num, $col_num, $val_only = true) {
// check whether the cell exists
if (!$this->isCellExists($row_num, $col_num)) {
throw new \Exception('Cell '.$row_num.','.$col_num.' doesn\'t exist', SimpleExcelException::CELL_NOT_FOUND);
}
return $this->table_arr[$row_num-1][$col_num-1];
}
/**
* Get data of the specified column as an array
*
* @param int $col_num Column number
* @param bool $val_only
* @return array
* @throws Exception If the column requested doesn't exist.
*/
public function getColumn($col_num, $val_only = TRUE) {
$col_arr = array();
if(!$this->isColumnExists($col_num)){
throw new \Exception('Column '.$col_num.' doesn\'t exist', SimpleExcelException::COLUMN_NOT_FOUND);
}
// get the specified column within every row
foreach($this->table_arr as $row){
array_push($col_arr, $row[$col_num-1]);
}
// return the array
return $col_arr;
}
/**
* Get data of all cells as an array
*
* @param bool $val_only
* @return array
* @throws Exception If the field is not set.
*/
public function getField($val_only = TRUE) {
if(!$this->isFieldExists()){
throw new \Exception('Field is not set', SimpleExcelException::FIELD_NOT_FOUND);
}
// return the array
return $this->table_arr;
}
/**
* Get data of the specified row as an array
*
* @param int $row_num Row number
* @param bool $val_only
* @return array
* @throws Exception When a row is requested that doesn't exist.
*/
public function getRow($row_num, $val_only = TRUE) {
if(!$this->isRowExists($row_num)){
throw new \Exception('Row '.$row_num.' doesn\'t exist', SimpleExcelException::ROW_NOT_FOUND);
}
// return the array
return $this->table_arr[$row_num-1];
}
/**
* Check whether cell with specified row & column exists
*
* @param int $row_num Row number
* @param int $col_num Column number
* @return bool
*/
public function isCellExists($row_num, $col_num){
return $this->isRowExists($row_num) && $this->isColumnExists($col_num);
}
/**
* Check whether a specified column exists
*
* @param int $col_num Column number
* @return bool
*/
public function isColumnExists($col_num){
$exist = false;
foreach($this->table_arr as $row){
if(array_key_exists($col_num-1, $row)){
$exist = true;
}
}
return $exist;
}
/**
* Check whether a specified row exists
*
* @param int $row_num Row number
* @return bool
*/
public function isRowExists($row_num){
return array_key_exists($row_num-1, $this->table_arr);
}
/**
* Check whether table exists
*
* @return bool
*/
public function isFieldExists(){
return isset($this->table_arr);
}
/**
* Check whether file exists, valid, and readable
*
* @param string $file_path Path to file
* @return bool
* @throws Exception If file being loaded doesn't exist
* @throws Exception If file extension doesn't match
* @throws Exception If error reading the file
*/
public function isFileReady($file_path) {
// file exists?
if (!file_exists($file_path)) {
throw new \Exception('File '.$file_path.' doesn\'t exist', SimpleExcelException::FILE_NOT_FOUND);
// extension valid?
} else if (strtoupper(pathinfo($file_path, PATHINFO_EXTENSION))!= strtoupper($this->file_extension)){
throw new \Exception('File extension '.strtoupper(pathinfo($file_path, PATHINFO_EXTENSION)).' doesn\'t match with '.$this->file_extension, SimpleExcelException::FILE_EXTENSION_MISMATCH);
// file readable?
} else if (($handle = fopen($file_path, 'r')) === FALSE) {
throw new \Exception('Error reading the file in'.$file_path, SimpleExcelException::ERROR_READING_FILE);
fclose($handle);
// okay then
} else {
return TRUE;
}
}
}

View file

@ -0,0 +1,124 @@
<?php
namespace SimpleExcel\Parser;
use SimpleExcel\Exception\SimpleExcelException;
/**
* SimpleExcel class for parsing Microsoft Excel CSV Spreadsheet
*
* @author Faisalman
* @package SimpleExcel
*/
class CSVParser extends BaseParser implements IParser
{
/**
* Defines delimiter character
*
* @access protected
* @var string
*/
protected $delimiter;
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'csv';
/**
* Load the CSV file to be parsed
*
* @param string $file_path Path to CSV file
*/
public function loadFile($file_path){
if (!$this->isFileReady($file_path)) {
return;
}
$this->loadString(file_get_contents($file_path));
}
/**
* Load the string to be parsed
*
* @param string $str String with CSV format
*/
public function loadString($str){
$this->table_arr = array();
// 1. Split into lines by newline http://stackoverflow.com/questions/3997336/explode-php-string-by-new-line
$pattern = "/\r\n|\n|\r/";
$lines = preg_split($pattern, $str, -1, PREG_SPLIT_NO_EMPTY);
$total = count($lines);
// There are no lines to parse
if ($total == 0) {
return;
}
// 2. Guess delimiter if none set
$line = $lines[0];
if (!isset($this->delimiter)) {
// do guess work
$separators = array(';' => 0, ',' => 0);
foreach ($separators as $sep => $count) {
$args = str_getcsv($sep, $line);
$count = count($args);
$separators[$sep] = $count;
}
$sep = ',';
if (($separators[';'] > $separators[','])) {
$sep = ';';
}
$this->delimiter = $sep;
}
// 3. Parse the lines into rows,cols
$max = 0;
$min = PHP_INT_MAX;
$cols = 0;
$sep = $this->delimiter;
$rows = array();
foreach ($lines as $line) {
$args = str_getcsv($line, $sep);
$rows[] = $args;
$cols = count($args);
if ($cols > $max) {
$max = $cols;
}
if ($cols < $min) {
$min = $cols;
}
}
// 4. Expand those rows which have less cols than max cols found
if ($min != $max) {
foreach ($rows as $i => $row) {
$c = count($row);
while ($c < $max) {
$row[] = ""; // fill with empty strings
$c += 1;
}
$rows[$i] = $row;
}
}
$this->table_arr = $rows;
}
/**
* Set delimiter that should be used to parse CSV document
*
* @param string $delimiter Delimiter character
*/
public function setDelimiter($delimiter){
$this->delimiter = $delimiter;
}
}

View file

@ -0,0 +1,95 @@
<?php
namespace SimpleExcel\Parser;
use SimpleExcel\Exception\SimpleExcelException;
/**
* SimpleExcel class for parsing HTML table
*
* @author Faisalman
* @package SimpleExcel
*/
class HTMLParser extends BaseParser implements IParser
{
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'html';
/**
* Process the loaded file/string
*
* @param DOMDocument $html DOMDocument object of HTML
*/
private function parseDOM($html){
$tables = $html->getElementsByTagName('table');
$field = array();
foreach ($tables as $table) {
$table_child = $table->childNodes;
foreach ($table_child as $twrap) {
if($twrap->nodeType === XML_ELEMENT_NODE) {
if ($twrap->nodeName === "thead" || $twrap->nodeName === "tbody") {
$twrap_child = $twrap->childNodes;
foreach ($twrap_child as $tr) {
if($tr->nodeType === XML_ELEMENT_NODE && $tr->nodeName === "tr") {
$row = array();
$tr_child = $tr->childNodes;
foreach ($tr_child as $td) {
if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
array_push($row, $td->nodeValue);
}
}
array_push($field, $row);
}
}
} else if ($twrap->nodeName === "tr") {
$row = array();
$twrap_child = $twrap->childNodes;
foreach ($twrap_child as $td) {
if ($td->nodeType === XML_ELEMENT_NODE && ($td->nodeName === "th" || $td->nodeName === "td")) {
array_push($row, $td->nodeValue);
}
}
array_push($field, $row);
}
}
}
}
$this->table_arr = $field;
}
/**
* Load the HTML file to be parsed
*
* @param string $file_path Path to HTML file
*/
public function loadFile($file_path) {
if (!$this->isFileReady($file_path)) {
return;
}
$html = new \DOMDocument('1.0', 'UTF-8');
$sp = mb_convert_encoding(file_get_contents($file_path), 'HTML-ENTITIES', "UTF-8");
$html->loadHTML($sp);
$html->encoding = 'UTF-8';
$this->parseDOM($html);
}
/**
* Load the string to be parsed
*
* @param string $str String with HTML format
*/
public function loadString($str){
$html = new \DOMDocument('1.0', 'UTF-8');
$sp = mb_convert_encoding($str, 'HTML-ENTITIES', "UTF-8");
$html->loadHTML($sp);
$html->encoding = 'UTF-8';
$this->parseDOM($html);
}
}

View file

@ -0,0 +1,28 @@
<?php
namespace SimpleExcel\Parser;
/**
* Defines SimpleExcel parser interface
*
* @author Faisalman
* @package SimpleExcel
*/
/** define parser interface */
interface IParser
{
public function getCell($row_num, $col_num, $val_only);
public function getColumn($col_num, $val_only);
public function getRow($row_num, $val_only);
public function getField($val_only);
public function isCellExists($row_num, $col_num);
public function isColumnExists($col_num);
public function isRowExists($row_num);
public function isFieldExists();
public function isFileReady($file_path);
public function loadFile($file_path);
public function loadString($str);
}
?>

View file

@ -0,0 +1,61 @@
<?php
namespace SimpleExcel\Parser;
use SimpleExcel\Exception\SimpleExcelException;
/**
* SimpleExcel class for parsing JSON table
*
* @author Faisalman
* @package SimpleExcel
*/
class JSONParser extends BaseParser implements IParser
{
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'json';
/**
* Load the JSON file to be parsed
*
* @param string $file_path Path to JSON file
*/
public function loadFile($file_path) {
if (!$this->isFileReady($file_path)) {
return;
}
$handle = fopen($file_path, 'r');
$contents = fread($handle, filesize($file_path));
$this->loadString($contents);
fclose($handle);
}
/**
* Load the string to be parsed
*
* @param string $str String with JSON format
* @throws Exception If JSON format is invalid (or too deep)
*/
public function loadString($str){
$field = array();
if (($table = json_decode(utf8_encode($str), false, 4)) === NULL) {
throw new \Exception('Invalid JSON format: '.$str, SimpleExcelException::MALFORMED_JSON);
} else {
foreach ($table as $rows) {
$row = array();
foreach ($rows as $cell) {
array_push($row, $cell);
}
array_push($field, $row);
}
}
$this->table_arr = $field;
}
}

View file

@ -0,0 +1,35 @@
<?php
namespace SimpleExcel\Parser;
/**
* SimpleExcel class for parsing Microsoft Excel TSV Spreadsheet
*
* @author Faisalman
* @package SimpleExcel
*/
class TSVParser extends CSVParser
{
/**
* Defines delimiter character (TAB)
*
* @access protected
* @var string
*/
protected $delimiter = "\t";
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'tsv';
/**
* Override parent class, this method is ignored in TSV
*/
public function setDelimiter($delimiter){
// do nothing
}
}

View file

@ -0,0 +1,34 @@
<?php
namespace SimpleExcel\Parser;
/**
* SimpleExcel class for parsing Microsoft Excel XLSX Spreadsheet
*
* @author Faisalman
* @package SimpleExcel
*/
class XLSXParser extends BaseParser implements IParser
{
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'xlsx';
/**
* Load an XLSX file to be parsed
*
* @param string $file_path Path to XLSX file
*/
public function loadFile($file_path) { }
/**
* Load the string to be parsed
*
* @param string $str String with XLSX format
*/
public function loadString($str) { }
}

View file

@ -0,0 +1,324 @@
<?php
namespace SimpleExcel\Parser;
use SimpleExcel\Exception\SimpleExcelException;
/**
* SimpleExcel class for parsing Microsoft Excel 2003 XML Spreadsheet
*
* @author Faisalman
* @package SimpleExcel
*/
class XMLParser extends BaseParser implements IParser
{
/**
* Defines valid file extension
*
* @access protected
* @var string
*/
protected $file_extension = 'xml';
/**
* Extract attributes from SimpleXMLElement object
*
* @access private
* @param object $attrs_obj
* @return array
*/
private function getAttributes($attrs_obj) {
$attrs_arr = array();
if (!$attrs_obj) {
return $attrs_arr;
}
foreach ($attrs_obj as $attrs) {
$attrs = (array) $attrs;
foreach ($attrs as $attr) {
$attr_keys = array_keys($attr);
$attrs_arr[$attr_keys[0]] = $attr[$attr_keys[0]];
}
}
return $attrs_arr;
}
/**
* Get value of the specified cell
*
* @param int $row_num Row number
* @param int $col_num Column number
* @param int $val_only Whether returns only it's value or complete data
* @return array
* @throws Exception If the cell identified doesn't exist.
*/
public function getCell($row_num, $col_num, $val_only = true) {
// check whether the cell exists
if (!$this->isCellExists($row_num, $col_num)) {
throw new \Exception('Cell '.$row_num.','.$col_num.' doesn\'t exist', SimpleExcelException::CELL_NOT_FOUND);
}
if(is_array($this->table_arr['table_contents'][$row_num-1]['row_contents'])){
if(array_key_exists($col_num-1, $this->table_arr['table_contents'][$row_num-1]['row_contents'])){
$cell = $this->table_arr['table_contents'][$row_num-1]['row_contents'][$col_num-1];
if(!$val_only){
return $cell;
} else {
return $cell['value'];
}
}
}
return "";
}
/**
* Get data of the specified column as an array
*
* @param int $col_num Column number
* @param bool $val_only Returns (value only | complete data) for every cell, default to TRUE
* @return array
* @throws Exception If the column requested doesn't exist.
*/
public function getColumn($col_num, $val_only = TRUE) {
$col_arr = array();
if (!$this->isColumnExists($col_num)) {
throw new \Exception('Column '.$col_num.' doesn\'t exist', SimpleExcelException::COLUMN_NOT_FOUND);
}
// get the specified column within every row
foreach ($this->table_arr['table_contents'] as $row) {
if ($row['row_contents']) {
if(!$val_only) {
array_push($col_arr, $row['row_contents'][$col_num-1]);
} else {
array_push($col_arr, $row['row_contents'][$col_num-1]['value']);
}
} else {
array_push($col_arr, "");
}
}
// return the array
return $col_arr;
}
/**
* Get data of all cells as an array
*
* @param bool $val_only Returns (value only | complete data) for every cell, default to TRUE
* @return array
* @throws Exception If the field is not set.
*/
public function getField($val_only = TRUE) {
if (!$this->isFieldExists()) {
throw new \Exception('Field is not set', SimpleExcelException::FIELD_NOT_FOUND);
}
if($val_only){
$field = array();
foreach($this->table_arr['table_contents'] as $row){
$cells = array();
if($row['row_contents']){
foreach($row['row_contents'] as $cell){
array_push($cells, $cell['value']);
}
}
array_push($field, $cells);
}
return $field;
} else {
return $this->table_arr;
}
}
/**
* Get data of the specified row as an array
*
* @param int $row_num Row number
* @param bool $val_only Returns (value only | complete data) for every cell, default to TRUE
* @return array
* @throws Exception When a row is requested that doesn't exist.
*/
public function getRow($row_num, $val_only = TRUE) {
if (!$this->isRowExists($row_num)) {
throw new \Exception('Row '.$row_num.' doesn\'t exist', SimpleExcelException::ROW_NOT_FOUND);
}
$row = $this->table_arr['table_contents'][$row_num-1]['row_contents'];
$row_arr = array();
// get the specified column within every row
foreach ($row as $cell) {
if (!$val_only) {
array_push($row_arr, $cell);
} else {
array_push($row_arr, $cell['value']);
}
}
// return the array, if empty then return FALSE
return $row_arr;
}
/**
* Check whether a specified column exists
*
* @param int $col_num Column number
* @return bool
*/
public function isColumnExists($col_num){
$exist = false;
foreach($this->table_arr['table_contents'] as $row){
if(is_array($row['row_contents'])){
if(array_key_exists($col_num-1, $row['row_contents'])){
$exist = true;
}
}
}
return $exist;
}
/**
* Check whether a specified row exists
*
* @param int $row_num Row number
* @return bool
*/
public function isRowExists($row_num){
return array_key_exists($row_num-1, $this->table_arr['table_contents']);
}
/**
* Process the loaded file/string
*
* @param SimpleXMLElement $xml SimpleXMLElement object of XML
* @throws Exception If document namespace invalid
* @return bool
*/
private function parseDOM($xml){
// get XML namespace
$xmlns = $xml->getDocNamespaces();
// check file extension and XML namespace
if ($xmlns['ss'] != 'urn:schemas-microsoft-com:office:spreadsheet') {
throw new \Exception('Document namespace isn\'t a valid Excel XML 2003 Spreadsheet', SimpleExcelException::INVALID_DOCUMENT_NAMESPACE);
}
// extract document properties
$doc_props = (array)$xml->DocumentProperties;
$this->table_arr['doc_props'] = $doc_props;
$rows = $xml->Worksheet->Table->Row;
$row_num = 1;
$this->table_arr = array(
'doc_props' => array(),
'table_contents' => array()
);
// loop through all rows
foreach ($rows as $row) {
// check whether ss:Index attribute exist in this row
$row_index = $row->xpath('@ss:Index');
// if exist, push empty value until the specified index
if (count($row_index) > 0) {
$gap = $row_index[0]-count($this->table_arr['table_contents']);
for($i = 1; $i < $gap; $i++){
array_push($this->table_arr['table_contents'], array(
'row_num' => $row_num,
'row_contents' => '',
//'row_attrs' => $row_attrs_arr
));
$row_num += 1;
}
}
$cells = $row->Cell;
$row_attrs = $row->xpath('@ss:*');
$row_attrs_arr = $this->getAttributes($row_attrs);
$row_arr = array();
$col_num = 1;
// loop through all row's cells
foreach ($cells as $cell) {
// check whether ss:Index attribute exist
$cell_index = $cell->xpath('@ss:Index');
// if exist, push empty value until the specified index
if (count($cell_index) > 0) {
$gap = $cell_index[0]-count($row_arr);
for ($i = 1; $i < $gap; $i++) {
array_push ($row_arr, array(
'row_num' => $row_num,
'col_num' => $col_num,
'datatype' => '',
'value' => '',
//'cell_attrs' => '',
//'data_attrs' => ''
));
$col_num += 1;
}
}
// get all cell and data attributes
//$cell_attrs = $cell->xpath('@ss:*');
//$cell_attrs_arr = $this->getAttributes($cell_attrs);
$data_attrs = $cell->Data->xpath('@ss:*');
$data_attrs_arr = $this->getAttributes($data_attrs);
$cell_datatype = isset($data_attrs_arr['Type']) ? $data_attrs_arr['Type'] : 'String';
// extract data from cell
$cell_value = (string) $cell->Data;
// escape input from HTML tags
$cell_value = filter_var($cell_value, FILTER_SANITIZE_SPECIAL_CHARS);
// push column array
array_push($row_arr, array(
'row_num' => $row_num,
'col_num' => $col_num,
'datatype' => $cell_datatype,
'value' => $cell_value,
//'cell_attrs' => $cell_attrs_arr,
//'data_attrs' => $data_attrs_arr
));
$col_num += 1;
}
// push row array
array_push($this->table_arr['table_contents'], array(
'row_num' => $row_num,
'row_contents' => $row_arr,
//'row_attrs' => $row_attrs_arr
));
$row_num += 1;
}
return true;
}
/**
* Load the XML file to be parsed
*
* @param string $file_path Path to XML file
* @return bool
*/
public function loadFile($file_path) {
if (!$this->isFileReady($file_path)) {
return false;
}
return $this->parseDOM(simplexml_load_file($file_path));
}
/**
* Load the string to be parsed
*
* @param string $str String with XML format
* @return bool
*/
public function loadString($str){
return $this->parseDOM(simplexml_load_string($str));
}
}