package ir.webutils;
import java.net.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import java.util.*;

/** Extractor for AnchoredLink's.  Modifies the HTML parser
 * callback routines to also extract and store anchor text for
 * all links.
 *
 * @author Ray Mooney and Yuk Wah Wong */

public class AnchoredLinkExtractor extends LinkExtractor {

    /** Buffer to store anchor text encountered between
     * an "a" start tag and end tag. */
    protected StringBuffer anchorText = null;

    /** The current link being processed */
    protected AnchoredLink currentLink = null;

    /** Create an anchored link extractor for the given page */
    public AnchoredLinkExtractor(HTMLPage page) {
	super(page);
    }

    /** 
     * Executed when a block of text is encountered. 
     * If inside anchor tag, store text in anchorText.
     *
     * @param text A <code>char</code> array representation of the
     * text.
     *
     * @param position The position of the text in the document.  */
    public void handleText(char[] text, int position) {
	super.handleText(text, position);
	if (currentLink != null && anchorText !=null) {
	    anchorText.append(text);
	}
    }

    /**
     * Executed when an opening HTML tag is found in the document.
     * Note that this method only handles tags that also have a
     * closing tag. If "a" tags starts new anchorText buffer.
     * If already in a "a" tag, store tag info in the anchorText.
     *
     * @param tag The tag that caused this function to be executed.
     * @param attributes The attributes of <code>tag</code>.
     * @param position The start of the tag in the document.  If the
     * tag is implied (filled in by the parser but not actually
     * present in the document) then <code>position</code> will
     * correspond to that of the next encountered tag.  */
    public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {
	super.handleStartTag(tag, attributes, position);
	if (tag == HTML.Tag.A) {
	    anchorText = new StringBuffer();
	}
	else if (currentLink != null && anchorText !=null) {
	    appendTag(anchorText, tag, attributes);
	}
    }

    /** Write this tag with attributes out to the buffer */
    public static void appendTag(StringBuffer buffer, HTML.Tag tag, MutableAttributeSet attributes) {
        buffer.append("<" + tag);
        for (Enumeration<?> e = attributes.getAttributeNames(); e.hasMoreElements(); ) {
            Object attr = e.nextElement();
            buffer.append(" " + attr + "=");
            buffer.append("\"" + attributes.getAttribute(attr) + "\"");
        }
        buffer.append(">");
    }

    /**
     * Executed when a closing HTML tag is found in the document.
     * Note that the parser may add "implied" closing tags.  For
     * example, the default parser adds closing &lt;p&gt; tags.
     * If end of "a" tag then add the accumulated anchorText to
     * the current link (the last one added to links).
     * If already in a "a" tag, store tag info in the anchorText.
     *
     * @param tag The tag found.
     *
     * @param position The position of the tag in the document.  */
    public void handleEndTag(HTML.Tag tag, int position) {
	super.handleEndTag(tag, position);
	if (tag == HTML.Tag.A && currentLink != null && anchorText !=null) {
	    // Set the anchorText for this link.
	    currentLink.setAnchorText(anchorText.toString());
	    // Then set currentLink and anchorText to null to indicate no longer in "a" tag 
	    currentLink = null;
	    anchorText = null;
	} else if (currentLink != null && anchorText !=null)
	    anchorText.append("</" + tag + ">");
    }

    /**
     * Executed when an HTML tag that has no closing tag is found in
     * the document.
     * If already in a "a" tag, store tag info in the anchorText.
     *
     * @param tag The tag that caused this function to be executed.
     * @param attributes The attributes of <code>tag</code>.
     * @param position The start of the tag in the document.  If the
     * tag is implied (filled in by the parser but not actually
     * present in the document) then <code>position</code> will
     * correspond to that of the next encountered tag.  */
    public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {
	super.handleSimpleTag(tag, attributes, position);
	if (currentLink != null && anchorText !=null) {
	    appendTag(anchorText, tag, attributes);
	}
    }

    /** Retrieves a link from an attribute set and completes it against
     * the base URL.  This version creates AnchoredLink's
     *
     * @param attributes The attribute set.
     * @param attr The attribute that should be treated as a URL.  For
     * example, <code>attr</code> should be
     * <code>HTML.Attribute.HREF</code> if <code>attributes</code> is
     * from an anchor tag. */
    protected void addLink(MutableAttributeSet attributes, HTML.Attribute attr) {
	if (attributes.isDefined(attr)) {
	    String link = (String)attributes.getAttribute(attr);
	    try {
		URL completeURL =  new URL(this.url, link);
		AnchoredLink newLink = new AnchoredLink(completeURL);
		currentLink = newLink;
		this.links.add(newLink);
	    }
	    catch (MalformedURLException e) {
		System.err.println("LinkExtractor: " + e);
		// e.printStackTrace(System.err);
	    }
	}
    }

    public static void main (String[] args) throws Exception {
	System.out.println(new AnchoredLinkExtractor(new HTMLPageRetriever().getHTMLPage(new Link(args[0]))).extractLinks());
    }

}// LinkExtractor
