I've just check out Fluent Selenium which uses Firefox WebDriver. It's a testing framework, so don't be surprised by presence of asserting methods. It can be used for crawling. Worked perfectly for me with very little configuration. It requires Maven to run, here is my working example:
package fluent;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.seleniumhq.selenium.fluent.FluentWebDriver;
import org.seleniumhq.selenium.fluent.Period;
import org.seleniumhq.selenium.fluent.TestableString;
import java.util.concurrent.TimeUnit;
import static org.openqa.selenium.By.className;
public class Test {
public static void main(String[] args) {
WebDriver driver = new FirefoxDriver();
FluentWebDriver fwd = new FluentWebDriver(driver);
driver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
driver.get("http://www.hudku.com/search/business-list/Paint%20%26%20Hardware%20in%20Kanakapura%20Road,%20Bangalore,%20Karnataka,%20India?p=6&h1=mgK%3DFsPlSAsPTaOVwo%2F0FIMA");
driver.navigate();
TestableString test = fwd.div(className("heading")).within(Period.secs(3)).getText();
System.out.println("header: " + test.toString());
test.shouldContain("Paint");
System.out.println("all is fine!");
}
}
My pom.xml
:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>testPrj3</groupId>
<artifactId>testPrj3</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.seleniumhq.selenium.fluent</groupId>
<artifactId>fluent-selenium</artifactId>
<version>1.14.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-all</artifactId>
<version>1.3</version>
<scope>test</scope>
</dependency>
<!-- If you're needing Coda Hale's Metrics integration (optional) -->
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
<version>3.0.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
UPDATE
FluentLenium seems being a little more popular.