net.firstpartners.rp.back.extractor.util
Class Spider.SpiderParserCallback

java.lang.Object
  extended by javax.swing.text.html.HTMLEditorKit.ParserCallback
      extended by net.firstpartners.rp.back.extractor.util.Spider.SpiderParserCallback
Enclosing class:
Spider

public class Spider.SpiderParserCallback
extends javax.swing.text.html.HTMLEditorKit.ParserCallback

Inner class

Version:
1.1
Author:
Firstpartners.net

Field Summary
private  boolean foundSummary
          flag to mark that the summary was found it
private  boolean isInBody
          flag to mark the actual process of body tag
private  boolean isInScript
          flag to mark the actual process of script tag
private  boolean isInTitle
          flag to mark the actual process of title tag
private  java.lang.String lastText
          contents of last text element
static java.lang.String METADATA_AUTHOR
          Author meta data
static java.lang.String METADATA_DESCRIPTION
          Description meta data
static java.lang.String METADATA_SUMMARY
          Summary meta data
static java.lang.String METADATA_WEBMASTER
          Webmaster meta data
private  java.lang.StringBuffer summaryText
          summary text
 
Fields inherited from class javax.swing.text.html.HTMLEditorKit.ParserCallback
IMPLIED
 
Constructor Summary
Spider.SpiderParserCallback()
          Creates a new instance of SpiderParserCallback
 
Method Summary
 void handleEndTag(javax.swing.text.html.HTML.Tag t, int pos)
          take care of start tags
 void handleSimpleTag(javax.swing.text.html.HTML.Tag t, javax.swing.text.MutableAttributeSet a, int pos)
          handle HTML tags that don't have a start and end tag
 void handleStartTag(javax.swing.text.html.HTML.Tag t, javax.swing.text.MutableAttributeSet a, int pos)
          take care of start tags
 void handleText(char[] data, int pos)
          take care of text between tags, check against keyword list for matches, if match found, set the node match status to true
 
Methods inherited from class javax.swing.text.html.HTMLEditorKit.ParserCallback
flush, handleComment, handleEndOfLineString, handleError
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

METADATA_DESCRIPTION

public static final java.lang.String METADATA_DESCRIPTION
Description meta data

See Also:
Constant Field Values

METADATA_SUMMARY

public static final java.lang.String METADATA_SUMMARY
Summary meta data

See Also:
Constant Field Values

METADATA_AUTHOR

public static final java.lang.String METADATA_AUTHOR
Author meta data

See Also:
Constant Field Values

METADATA_WEBMASTER

public static final java.lang.String METADATA_WEBMASTER
Webmaster meta data

See Also:
Constant Field Values

lastText

private java.lang.String lastText
contents of last text element


summaryText

private java.lang.StringBuffer summaryText
summary text


isInTitle

private boolean isInTitle
flag to mark the actual process of title tag


isInBody

private boolean isInBody
flag to mark the actual process of body tag


isInScript

private boolean isInScript
flag to mark the actual process of script tag


foundSummary

private boolean foundSummary
flag to mark that the summary was found it

Constructor Detail

Spider.SpiderParserCallback

public Spider.SpiderParserCallback()
Creates a new instance of SpiderParserCallback

Method Detail

handleSimpleTag

public void handleSimpleTag(javax.swing.text.html.HTML.Tag t,
                            javax.swing.text.MutableAttributeSet a,
                            int pos)
handle HTML tags that don't have a start and end tag

Overrides:
handleSimpleTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
t - HTML tag
a - HTML attributes
pos - Position within file

handleStartTag

public void handleStartTag(javax.swing.text.html.HTML.Tag t,
                           javax.swing.text.MutableAttributeSet a,
                           int pos)
take care of start tags

Overrides:
handleStartTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
t - HTML tag
a - HTML attributes
pos - Position within file

handleEndTag

public void handleEndTag(javax.swing.text.html.HTML.Tag t,
                         int pos)
take care of start tags

Overrides:
handleEndTag in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
t - HTML tag
pos - Position within file

handleText

public void handleText(char[] data,
                       int pos)
take care of text between tags, check against keyword list for matches, if match found, set the node match status to true

Overrides:
handleText in class javax.swing.text.html.HTMLEditorKit.ParserCallback
Parameters:
data - Text between tags
pos - position of text within web page