Could you do something like this without using regex?
/**
* @description : Function extracts names from html header tags
* @example : "<h2>Obituary for John Doe</h2><h1>James Michael Lee</h1>" -> ["John Doe", "James Michael Lee"]
* @param $html string
* @return []string : list of full names
*/
function extractFullNames($html) {
$regex = '/<h[1-2]>(.*?)<\/h[1-2]>/';
preg_match_all($regex, $html, $matches);
$names = $matches[1];
$names = array_map('trim', $names);
$names = array_map('strip_tags', $names);
$names = array_map('strtolower', $names);
$names = array_map('ucwords', $names);
$names = array_map('removeObituary', $names);
return $names;
}
/**
* @description : Function used to remove "Obituary For" if present
* @example : "Obituary For John Doe" -> "John Doe"
* @param $name string
* @return string : name without "Obituary For"
*/
function removeObituary($name) {
$name = str_replace("Obituary For ", "", $name);
return $name;
}
// Test cases
$html = '<h2>Obituary for John Doe</h2><h1>James Michael Lee</h1>';
$names = extractFullNames($html);
$expected = ['John Doe', 'James Michael Lee'];
echo "Expected: " . implode(', ', $expected) . "\n";
echo "Actual: " . implode(', ', $names);