0

I have used dompdf to create pdf file, I have used a portion of the html file ie between to generate pdf . (cut & pasted manual way)

since I have a valid pdf out put now, I want to further automate the process,

I want to copy all contents between tables

<table> </table> 

to a file, would like to know what would be possible options in php. any suggestion is highly appreciated

Arvind GK
  • 27
  • 5

2 Answers2

0

Don't use regex, instead use DomDocument.

The following class will extract out the content between any element. So load your html from your file, or just pass it the contents of ob_get_contents()

<?php 

class DOMExtract extends DOMDocument
{
    private $source;
    private $dom;

    public function __construct()
    {
        libxml_use_internal_errors(true);
        $this->preserveWhiteSpace = false;
        $this->strictErrorChecking = false;
        $this->formatOutput = true;
    }

    public function setSource($source)
    {
        $this->source = $source;
        return $this;
    }

    public function getInnerHTML($tag, $id=null, $nodeValue = false)
    {
        if (empty($this->source)) {
            throw new Exception('Error: Missing $this->source, use setSource() first');
        }

        $this->loadHTML($this->source);
        $tmp = $this->getElementsByTagName($tag);
        $ret = null;
        foreach ($tmp as $v) {
            if ($id !== null) {
                $attr = explode('=', $id);
                if ($v->getAttribute($attr[0]) == $attr[1]) {
                    if ($nodeValue == true) {
                        $ret .= trim($v->nodeValue);
                    } else {
                        $ret .= $this->innerHTML($v);
                    }
                }
            } else {
                if ($nodeValue == true) {
                    $ret .= trim($v->nodeValue);
                } else{
                    $ret .= $this->innerHTML($v);
                }
            }
        }
        return $ret;
    }

    protected function innerHTML($dom)
    {
        $ret = "";
        foreach ($dom->childNodes as $v) {
            $tmp = new DOMDocument();
            $tmp->appendChild($tmp->importNode($v, true));
            $ret .= trim($tmp->saveHTML());
        }
        return $ret;
    }

}

$html = '
<h3>HTML Table Example</h3>
<div>
<table id="customers">
  <tr>
    <th>Company</th>
    <th>Contact</th>
    <th>Country</th>
  </tr>
  <tr>
    <td>Alfreds Futterkiste</td>
    <td>Maria Anders</td>
    <td>Germany</td>
  </tr>
  <tr>
    <td>Centro comercial Moctezuma</td>
    <td>Francisco Chang</td>
    <td>Mexico</td>
  </tr>
  <tr>
    <td>Ernst Handel</td>
    <td>Roland Mendel</td>
    <td>Austria</td>
  </tr>
  <tr>
    <td>Island Trading</td>
    <td>Helen Bennett</td>
    <td>UK</td>
  </tr>
  <tr>
    <td>Laughing Bacchus Winecellars</td>
    <td>Yoshi Tannamuri</td>
    <td>Canada</td>
  </tr>
  <tr>
    <td>Magazzini Alimentari Riuniti</td>
    <td>Giovanni Rovelli</td>
    <td>Italy</td>
  </tr>
</table>
</div>';

$dom = new DOMExtract();
$dom->setSource($html);

echo '
<table cellspacing="0" cellpadding="3" border="0" width="100%">',
    //match and return only tables inner content with id=customers
    $dom->getInnerHTML('table', 'id=customers'), 
    //match all tables inner content
    //$dom->getInnerHTML('table'), 
'</table>';

https://3v4l.org/OkbQW

<table cellspacing="0" cellpadding="3" border="0" width="100%"><tr><th>Company</th>
    <th>Contact</th>
    <th>Country</th>
  </tr><tr><td>Alfreds Futterkiste</td>
    <td>Maria Anders</td>
    <td>Germany</td>
  </tr><tr><td>Centro comercial Moctezuma</td>
    <td>Francisco Chang</td>
    <td>Mexico</td>
  </tr><tr><td>Ernst Handel</td>
    <td>Roland Mendel</td>
    <td>Austria</td>
  </tr><tr><td>Island Trading</td>
    <td>Helen Bennett</td>
    <td>UK</td>
  </tr><tr><td>Laughing Bacchus Winecellars</td>
    <td>Yoshi Tannamuri</td>
    <td>Canada</td>
  </tr><tr><td>Magazzini Alimentari Riuniti</td>
    <td>Giovanni Rovelli</td>
    <td>Italy</td>
  </tr></table>
Lawrence Cherone
  • 46,049
  • 7
  • 62
  • 106
-1

Try This To Extract Data between tags try this code Here $source will be your complete html code. And $match will be the data extracted between tags.

Code:

preg_match("'<table>(.*?)</table>'si", $source, $match); if($match) echo "result=".$match[1];

Reference: Preg match text in php between html tags

Raman Saluja
  • 106
  • 1
  • 12