package ir.webutils;
import java.net.*;
import javax.swing.text.*;
import javax.swing.text.html.*;

/** 
 * An AnchoredLinkExtractor that extracts ScoredAnchoredLink's that
 * can be scored and used in heuristic web search.
 *
 * @author Ray Mooney*/

public class ScoredAnchoredLinkExtractor extends AnchoredLinkExtractor {

    int anchorPosition = -1;

    /** Create an ScoredAnchoredLink extractor for the given page */
    public ScoredAnchoredLinkExtractor(HTMLPage page) {
	super(page);
    }

    /**
     * Executed when an opening HTML tag is found in the document.
     * Note that this method only handles tags that also have a
     * closing tag. If "a" tags starts new anchorText buffer.
     * If already in a "a" tag, store tag info in the anchorText.
     *
     * @param tag The tag that caused this function to be executed.
     * @param attributes The attributes of <code>tag</code>.
     * @param position The start of the tag in the document.  If the
     * tag is implied (filled in by the parser but not actually
     * present in the document) then <code>position</code> will
     * correspond to that of the next encountered tag.  */
    public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {
	if (tag == HTML.Tag.A) {
	    anchorPosition = position;
	}
	super.handleStartTag(tag, attributes, position);
    }

    /**
     * Executed when a closing HTML tag is found in the document.
     * Note that the parser may add "implied" closing tags.  For
     * example, the default parser adds closing &lt;p&gt; tags.
     * If end of "a" tag then add the accumulated anchorText to
     * the current link (the last one added to links).
     * If already in a "a" tag, store tag info in the anchorText.
     *
     * @param tag The tag found.
     *
     * @param position The position of the tag in the document.  */
    public void handleEndTag(HTML.Tag tag, int position) {
	if (tag == HTML.Tag.A && currentLink != null) {
	    // Set the position for this link.
	    ((ScoredAnchoredLink)currentLink).setEndPosition(position);
	}
	super.handleEndTag(tag, position);
    }

    /** Retrieves a link from an attribute set and completes it against
     * the base URL.  This version creates ScoredAnchoredLink's
     *
     * @param attributes The attribute set.
     * @param attr The attribute that should be treated as a URL.  For
     * example, <code>attr</code> should be
     * <code>HTML.Attribute.HREF</code> if <code>attributes</code> is
     * from an anchor tag. */
    protected void addLink(MutableAttributeSet attributes, HTML.Attribute attr) {
	if (attributes.isDefined(attr)) {
	    String link = (String)attributes.getAttribute(attr);
	    try {
		URL completeURL =  new URL(this.url, link);
		// Create a ScoredAnchoredLink with a backLink to the link that generated the current page.
		AnchoredLink newLink = new ScoredAnchoredLink(completeURL, page.getLink(), anchorPosition);
		currentLink = newLink;
		this.links.add(newLink);
		// Change attribute to have complete URL for
		// including in absolute copy
		attributes.removeAttribute(attr);
		attributes.addAttribute(attr, completeURL.toString());
	    }
	    catch (MalformedURLException e) {
		System.err.println("LinkExtractor: " + e);
		// e.printStackTrace(System.err);
	    }
	}
    }


}




