// AnalyzeHtml.java ... Analyze HTML elements of a URL via drilling down from a HTML elent type/class combination
// RJM Programming 16/11/2013

import java.util.ArrayList;
import java.util.Iterator;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import java.util.Scanner;
import java.io.IOException;
import java.net.URL;
import java.util.List;

public class AnalyzeHtml
{
    TagNode ourRootNode;

    public AnalyzeHtml(URL ourHtmlPage) throws IOException {
        HtmlCleaner ourCleaner = new HtmlCleaner();
        ourRootNode = ourCleaner.clean(ourHtmlPage);
    }

    List getTagsByClass(String passedClassname, String ourByName) {
        List ourTagList = new ArrayList();
        TagNode ourTagElements[] = ourRootNode.getElementsByName(ourByName, true);
        for (int iit = 0; ourTagElements != null && iit < ourTagElements.length; iit++) {
            String ourClassType = ourTagElements[iit].getAttributeByName("class");
            if (ourClassType != null && ourClassType.equals(passedClassname)) {
                ourTagList.add(ourTagElements[iit]);
            }
        }
        return ourTagList;
    }

    public static void main(String[] args) {
        String ourElementType = ""; //"select";
        String ourClassName = ""; //"mytuts";
        String ourUrl = ""; //"http://www.rjmprogramming.com.au";
        try {
        	ourElementType = args[0]; 
        	ourClassName = args[1]; 
        	ourUrl = args[2];
        } catch (Exception err) {
        }

    	Scanner in = new Scanner(System.in); 
    	if (ourElementType == "") {
    		System.out.print("Please enter HTML element type to look for [div]: ");
    		ourElementType = in.nextLine().replace('\n', '\0');
    		if (ourElementType.compareTo(" ") <= 0) ourElementType = "select";
    	}
    	if (ourClassName == "") {
    		System.out.print("Please enter HTML element class to look for [Normal]: ");
    		ourClassName = in.nextLine().replace('\n', '\0');
    		if (ourClassName.compareTo(" ") <= 0) ourClassName = "mytuts";
    	}
    	if (ourUrl == "") {
    		System.out.print("Please enter HTML url to search on [http://www.rjmprogramming.com.au]: ");
    		ourUrl = in.nextLine().replace('\n', '\0');
    		if (ourUrl.compareTo(" ") <= 0) ourUrl = "http://www.rjmprogramming.com.au";
     	}

        
        try {
            AnalyzeHtml thisAnalyze = new AnalyzeHtml(new URL(ourUrl));

            List ourElements = thisAnalyze.getTagsByClass(ourClassName, ourElementType);
            System.out.println("Data of " + ourElementType + "s with class='" + ourClassName + "' at '" + ourUrl + "'");
            for (Iterator ourIterator = ourElements.iterator(); ourIterator.hasNext();) {
                TagNode ourElement = (TagNode) ourIterator.next();
                System.out.println("Text child nodes of " + ourElementType + "s: " + ourElement.getText().toString());
            }
        } catch (Exception err) {
            err.printStackTrace();
        }
    }
}

