Don't use regex, instead use DomDocument.
The following class will extract out the content between any element. So load your html from your file, or just pass it the contents of ob_get_contents()
<?php
class DOMExtract extends DOMDocument
{
private $source;
private $dom;
public function __construct()
{
libxml_use_internal_errors(true);
$this->preserveWhiteSpace = false;
$this->strictErrorChecking = false;
$this->formatOutput = true;
}
public function setSource($source)
{
$this->source = $source;
return $this;
}
public function getInnerHTML($tag, $id=null, $nodeValue = false)
{
if (empty($this->source)) {
throw new Exception('Error: Missing $this->source, use setSource() first');
}
$this->loadHTML($this->source);
$tmp = $this->getElementsByTagName($tag);
$ret = null;
foreach ($tmp as $v) {
if ($id !== null) {
$attr = explode('=', $id);
if ($v->getAttribute($attr[0]) == $attr[1]) {
if ($nodeValue == true) {
$ret .= trim($v->nodeValue);
} else {
$ret .= $this->innerHTML($v);
}
}
} else {
if ($nodeValue == true) {
$ret .= trim($v->nodeValue);
} else{
$ret .= $this->innerHTML($v);
}
}
}
return $ret;
}
protected function innerHTML($dom)
{
$ret = "";
foreach ($dom->childNodes as $v) {
$tmp = new DOMDocument();
$tmp->appendChild($tmp->importNode($v, true));
$ret .= trim($tmp->saveHTML());
}
return $ret;
}
}
$html = '
<h3>HTML Table Example</h3>
<div>
<table id="customers">
<tr>
<th>Company</th>
<th>Contact</th>
<th>Country</th>
</tr>
<tr>
<td>Alfreds Futterkiste</td>
<td>Maria Anders</td>
<td>Germany</td>
</tr>
<tr>
<td>Centro comercial Moctezuma</td>
<td>Francisco Chang</td>
<td>Mexico</td>
</tr>
<tr>
<td>Ernst Handel</td>
<td>Roland Mendel</td>
<td>Austria</td>
</tr>
<tr>
<td>Island Trading</td>
<td>Helen Bennett</td>
<td>UK</td>
</tr>
<tr>
<td>Laughing Bacchus Winecellars</td>
<td>Yoshi Tannamuri</td>
<td>Canada</td>
</tr>
<tr>
<td>Magazzini Alimentari Riuniti</td>
<td>Giovanni Rovelli</td>
<td>Italy</td>
</tr>
</table>
</div>';
$dom = new DOMExtract();
$dom->setSource($html);
echo '
<table cellspacing="0" cellpadding="3" border="0" width="100%">',
//match and return only tables inner content with id=customers
$dom->getInnerHTML('table', 'id=customers'),
//match all tables inner content
//$dom->getInnerHTML('table'),
'</table>';
https://3v4l.org/OkbQW
<table cellspacing="0" cellpadding="3" border="0" width="100%"><tr><th>Company</th>
<th>Contact</th>
<th>Country</th>
</tr><tr><td>Alfreds Futterkiste</td>
<td>Maria Anders</td>
<td>Germany</td>
</tr><tr><td>Centro comercial Moctezuma</td>
<td>Francisco Chang</td>
<td>Mexico</td>
</tr><tr><td>Ernst Handel</td>
<td>Roland Mendel</td>
<td>Austria</td>
</tr><tr><td>Island Trading</td>
<td>Helen Bennett</td>
<td>UK</td>
</tr><tr><td>Laughing Bacchus Winecellars</td>
<td>Yoshi Tannamuri</td>
<td>Canada</td>
</tr><tr><td>Magazzini Alimentari Riuniti</td>
<td>Giovanni Rovelli</td>
<td>Italy</td>
</tr></table>