As @rolfl said, you can use JTidy for this. The JTidy documentation kind of sucks though (and I've never used it before) so I downloaded it and tried using it. This test runs and gives you 3 warnings:
package com.sandbox;
import org.junit.Test;
import org.w3c.tidy.Tidy;
import java.io.StringReader;
import java.io.StringWriter;
import static org.junit.Assert.assertEquals;
public class SandboxTest {
@Test
public void myTest() {
Tidy tidy = new Tidy();
StringWriter writer = new StringWriter();
tidy.parse(new StringReader("invalid html"), writer);
assertEquals(0, tidy.getParseErrors());
assertEquals(0, tidy.getParseWarnings());
}
}
This assertion fails on the last line because it returns 3 instead of 0. Is that what you're looking for?
I tried using your input and I get a warning for it:
package com.sandbox;
import org.junit.Test;
import org.w3c.tidy.Tidy;
import java.io.StringReader;
import java.io.StringWriter;
import static org.junit.Assert.assertEquals;
public class SandboxTest {
@Test
public void myTest() {
Tidy tidy = new Tidy();
StringWriter writer = new StringWriter();
tidy.parse(new StringReader("<html>\n" +
"<head>\n" +
" <title>This is a sample doc</title>\n" +
"</head>\n" +
"<body>\n" +
" <p> <b>this is a sample paragraph</b></p>"), writer);
assertEquals(0, tidy.getParseErrors());
assertEquals(0, tidy.getParseWarnings());
}
}
Output:
line 1 column 1 - Warning: missing <!DOCTYPE> declaration
InputStream: Document content looks like HTML 2.0
1 warning, no errors were found!
java.lang.AssertionError:
Expected :0
Actual :1
<Click to see difference>
at org.junit.Assert.fail(Assert.java:93)
at org.junit.Assert.failNotEquals(Assert.java:647)
at org.junit.Assert.assertEquals(Assert.java:128)
at org.junit.Assert.assertEquals(Assert.java:472)
at org.junit.Assert.assertEquals(Assert.java:456)
at com.sandbox.SandboxTest.myTest(SandboxTest.java:25)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
at org.junit.runner.JUnitCore.run(JUnitCore.java:157)
at com.intellij.junit4.JUnit4IdeaTestRunner.startRunnerWithArgs(JUnit4IdeaTestRunner.java:77)
at com.intellij.rt.execution.junit.JUnitStarter.prepareStreamsAndStart(JUnitStarter.java:195)
at com.intellij.rt.execution.junit.JUnitStarter.main(JUnitStarter.java:63)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:120)
Process finished with exit code -1