HTML Parser is a very cool java library which can be used to extract values in a html page.
Let's take the URL - http://www.google.ca/finance?q=BMO
Let's say we want to extract the value of the span tag - ( Mar 18 - Close.)
Here is the code
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.CssSelectorNodeFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class test {
/**
* @param args
*/
public static void main(String[] args) {
String URL = "http://www.google.ca/finance?q=BMO";
try {
URL url = new URL(URL);
URLConnection urlcon;
urlcon = url.openConnection();
Parser parser = new Parser(urlcon); // creating of HTML parser
// object
CssSelectorNodeFilter cssFilter = new CssSelectorNodeFilter(
"SPAN.nwp");
NodeList nodes = parser.parse(cssFilter); // getting nodes of
// span.nwp. Only NodeList created with one value
Node node = nodes.elementAt(0); //getting the first value of the nodes list
System.out.println(nodes.size());
String value = node.getFirstChild().getText(); //getting the value of the node
System.out.println(value);
} catch (MalformedURLException e) {
System.out.println("Malformed Exception :" + e.getMessage());
} catch (IOException e) {
e.printStackTrace();
} catch (ParserException e) {
e.printStackTrace();
}
}
}