|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectnet.firstpartners.rp.back.extractor.util.Spider
public class Spider
Class responsable for spidering the uri location and return the information
| Nested Class Summary | |
|---|---|
class |
Spider.SpiderParserCallback
Inner class |
| Field Summary | |
|---|---|
private java.lang.String |
author
Author of the document |
private java.net.URL |
base
Base of the links inside document |
private java.lang.String |
description
Description (Summary) of the document |
private java.util.LinkedList |
links
Links list |
protected org.apache.log4j.Logger |
logger
Logger for this class and subclasses |
private int |
maxLengthDesc
Maxim length for the description |
private java.lang.String |
title
Title of the document |
private java.lang.String |
uri
Uri location of the document to spider |
private java.util.LinkedList |
values
Values list |
| Constructor Summary | |
|---|---|
Spider(java.lang.String uri)
Creates a new Spider object for the specified location with no summary length specified of the summary required |
|
Spider(java.lang.String uri,
int lengthSummary)
Creates a new Spider object for the specified location and maxim length of the summary required |
|
| Method Summary | |
|---|---|
void |
addLink(java.net.URL u)
Add the URL-object to the links list |
void |
addValue(java.lang.String value)
Add the value-object to the values list |
java.lang.String |
fixHref(java.lang.String href)
repairs a sloppy href, flips backwards /, adds missing / |
java.lang.String |
getAuthor()
Get the author |
java.net.URL |
getBase()
Get the document base |
java.lang.String |
getDescription()
Get the page description |
java.util.LinkedList |
getLinks()
Get the list of the links from the document |
int |
getMaxLengthDesc()
Get the maxim length for the description |
java.lang.String |
getTitle()
Get the title of the document |
java.lang.String |
getUri()
Get the uri of the document |
java.util.LinkedList |
getValues()
Get the list of the document values |
void |
setAuthor(java.lang.String author)
Set the page author |
void |
setBase(java.lang.String abase)
Set the document base |
void |
setDescription(java.lang.String description)
Set the page description |
void |
setLinks(java.util.LinkedList links)
Set the list of links |
void |
setTitle(java.lang.String title)
Set the title of the document |
void |
start()
Start to spider the data |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected final org.apache.log4j.Logger logger
private java.lang.String uri
private int maxLengthDesc
private java.net.URL base
private java.lang.String title
private java.lang.String author
private java.lang.String description
private java.util.LinkedList links
private java.util.LinkedList values
| Constructor Detail |
|---|
public Spider(java.lang.String uri,
int lengthSummary)
uri - Uri location to spiderlengthSummary - Maxim length of the summary requiredpublic Spider(java.lang.String uri)
uri - Uri location to spider| Method Detail |
|---|
public void start()
throws RpException
RpException - Exception in parsing the datapublic java.lang.String fixHref(java.lang.String href)
href - web site reference
public void addLink(java.net.URL u)
u - Url objectpublic void addValue(java.lang.String value)
value - DOCUMENT ME!public java.lang.String getAuthor()
public void setAuthor(java.lang.String author)
author - Page authorpublic java.net.URL getBase()
public void setBase(java.lang.String abase)
abase - Document basepublic java.lang.String getDescription()
public void setDescription(java.lang.String description)
description - Page descriptionpublic java.util.LinkedList getLinks()
public void setLinks(java.util.LinkedList links)
links - List of the links of the documentpublic int getMaxLengthDesc()
public java.lang.String getTitle()
public void setTitle(java.lang.String title)
title - Title of the documentpublic java.lang.String getUri()
public java.util.LinkedList getValues()
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||