I've got the following PS script to strip html from an html formatted email. It however does not strip what appears to be the stylesheets. Hoping someone more knowledgable in this area is willing to assist and or provide some input on fixing that:
$html = @'
'@
# remove line breaks, replace with spaces
#$html = $html -replace "(r|n|t)", " "
# remove invisible content
@('head', 'style', 'script', 'object', 'embed', 'applet', 'noframes', 'noscript', 'noembed') | % {
$html = $html -replace "<$_[^>]*?>.*?</$_>", ""
}
# Condense extra whitespace
$html = $html -replace "( )+", " "
# Add line breaks
@('div','p','blockquote','h[1-9]') | % { $html = $html -replace "</?$_[^>]*?>.*?</$_>", ("n" + '$0' )}
# Add line breaks for self-closing tags
@('div','p','blockquote','h[1-9]','br') | % { $html = $html -replace "<$_[^>]*?/>", ('$0' + "n")}
#strip tags
$html = $html -replace "<[^>]*?>", ""
# write-verbose "removed tags: nn$htmln"
# replace common entities
@(
@("&bull;", " * "),
@("&lsaquo;", "<"),
@("&rsaquo;", ">"),
@("&(rsquo|lsquo);", "'"),
@("&(quot|ldquo|rdquo);", '"'),
@("&trade;", "(tm)"),
@("&frasl;", "/"),
@("&(quot|#34|#034|#x22);", '"'),
@('&(amp|#38|#038|#x26);', "&"),
@("&(lt|#60|#060|#x3c);", "<"),
@("&(gt|#62|#062|#x3e);", ">"),
@('&(copy|#169);', "(c)"),
@("&(reg|#174);", "(r)"),
@("&nbsp;", " "),
@("&(.{2,6});", ""),
@(" ", " ")
) | % { $html = $html -replace $_[0], $_[1] }
$PlainText=$html