I am new to regex patterns and I want to build a regex which extracts below line from given content:
Output:
https%253A%252F%252Fimages-na.ssl-imagesmazon.com%252Fimages%252FI%252F51tW%25252BoZNbJL.jpg
This didn't work with JDK1.6.
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
public static void main(String[] args) {
final String regex = "(?=<\\s*[^>]*\\s*id=\"pinterest\"\\s*[^>]*\\s*media%3D(?<media>.*.jpg))";
final String string = " \n"
+ " test\n"
+ " \n"
+ " <a id=\"pinterest\" data-height=\"570\" class=\"a-link-normal swf-social-site\" target=\"_blank\" title=\"Pin it on Pinterest\" href=\"/gp/redirect.html/ref=cm_sw_cl_pi_dp_UUohAbSJXVH6B?_encoding=UTF8&location=https%3A%2F%2Fpinterest.com%2Fpin%2Fcreate%2Fbutton%3Furl%3Dhttps%253A%252F%252Fwww.amazon.in%252Fdp%252FB00SIWUU2A%252Fref%253Dcm_sw_r_pi_dp_x_UUohAbSJXVH6B%26title%3DNegi%25203x3x3%2520Speed%2520Cube%2520Negi%26description%3DNegi%25203x3x3%2520Speed%2520Cube%2520Negi%2520https%253A%252F%252Fwww.amazon.in%252Fdp%252FB00SIWUU2A%252Fref%253Dcm_sw_r_pi_dp_x_UUohAbSJXVH6B%26media%3Dhttps%253A%252F%252Fimages-na.ssl-images-amazon.com%252Fimages%252FI%252F41L8Ag%25252BfIZL.jpg&token=49402B6CEDC244163E355CFF5E75B587C25D0183\"><i class=\"a-icon a-icon-share-pinterest\" aria-label=\"Pinterest\"><span class=\"a-icon-alt\">Pinterest</span></i></a><span class=\"a-letter-space\"></span>\n"
+ " \n"
+ " ";
final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
final Matcher matcher = pattern.matcher(string);
while (matcher.find()) {
System.out.println("Full match: " + matcher.group(0));
for (int i = 1; i <= matcher.groupCount(); i++) {
System.out.println("Group " + i + ": " + matcher.group(i));
}
}
}
}
Getting Error:
Exception in thread "main" java.util.regex.PatternSyntaxException: Look-behind group does not have an obvious maximum length near index 62 (?=<\s*[^>]\sid="pinterest"\s*[^>]\smedia%3D(?.*.jpg))