net.firstpartners.rp.back.extractor.util
Class UtilExtract

java.lang.Object
  extended by net.firstpartners.rp.back.extractor.util.UtilExtract

public class UtilExtract
extends java.lang.Object

Extractor utility

Version:
1.1
Author:
Firstpartners.net

Field Summary
static java.lang.String FILE_BASE
          File protocol
static java.lang.String FTP_BASE
          Ftp protocol
static java.lang.String HTTP_BASE
          Http protocol
static java.lang.String HTTPS_BASE
          Https protocol
static java.lang.String NO_FILE_EXTENSION
          Unknown file extension
static java.lang.String NO_FILE_TITLE
          Unknown document title
static java.lang.String pathSep
          System separator
static int TYPE_DOC
          Document type
static int TYPE_HTML
          HTML type of the document
static int TYPE_PDF
          PDF Type
static int TYPE_TEXT
          Text type of the document
static int TYPE_UNKNOWN
          Uknown type of the document
static int TYPE_XML
          XML type of the document
static java.lang.String WWW_BASE
          Www protocol
 
Constructor Summary
UtilExtract()
           
 
Method Summary
static java.lang.String encode(java.lang.String strText)
          Encode the value (remove the special xml-characters)
static java.lang.String getFilenameTitle(java.lang.String fileName)
          Get the title for the specfied filename
static java.lang.String getLocationExtension(java.lang.String location)
          Return the location extension
static java.io.InputStream getStream(java.lang.String uri)
          Return a stream to the specified location.
static java.lang.String getUri(java.lang.String location)
          Get the uri for the specified start location
static java.util.LinkedList getValueList(java.lang.String buffer, int minLengthWord, java.util.regex.Pattern notIgnorePattern, java.util.regex.Pattern replacePattern)
          Return the list of the words which have match the criteria
static boolean isExtesionSupported(java.lang.String extension, java.lang.String[] list)
          Return true if the location is in the list
static boolean isValidUriLocation(java.lang.String uri)
          Validation of the uri.
static java.lang.String replaceAll(java.lang.String s, java.lang.String s1, java.lang.String s2)
          Replace all
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

NO_FILE_EXTENSION

public static final java.lang.String NO_FILE_EXTENSION
Unknown file extension

See Also:
Constant Field Values

NO_FILE_TITLE

public static final java.lang.String NO_FILE_TITLE
Unknown document title

See Also:
Constant Field Values

TYPE_UNKNOWN

public static final int TYPE_UNKNOWN
Uknown type of the document

See Also:
Constant Field Values

TYPE_HTML

public static final int TYPE_HTML
HTML type of the document

See Also:
Constant Field Values

TYPE_TEXT

public static final int TYPE_TEXT
Text type of the document

See Also:
Constant Field Values

TYPE_XML

public static final int TYPE_XML
XML type of the document

See Also:
Constant Field Values

TYPE_DOC

public static final int TYPE_DOC
Document type

See Also:
Constant Field Values

TYPE_PDF

public static final int TYPE_PDF
PDF Type

See Also:
Constant Field Values

HTTP_BASE

public static final java.lang.String HTTP_BASE
Http protocol

See Also:
Constant Field Values

HTTPS_BASE

public static final java.lang.String HTTPS_BASE
Https protocol

See Also:
Constant Field Values

FTP_BASE

public static final java.lang.String FTP_BASE
Ftp protocol

See Also:
Constant Field Values

WWW_BASE

public static final java.lang.String WWW_BASE
Www protocol

See Also:
Constant Field Values

FILE_BASE

public static final java.lang.String FILE_BASE
File protocol

See Also:
Constant Field Values

pathSep

public static final java.lang.String pathSep
System separator

Constructor Detail

UtilExtract

public UtilExtract()
Method Detail

getLocationExtension

public static java.lang.String getLocationExtension(java.lang.String location)
Return the location extension

Parameters:
location -
Returns:

isExtesionSupported

public static boolean isExtesionSupported(java.lang.String extension,
                                          java.lang.String[] list)
Return true if the location is in the list

Parameters:
extension - Extension to validate
list - List of extesnion supported
Returns:
True if the extensi is in the list of supported extensions

isValidUriLocation

public static boolean isValidUriLocation(java.lang.String uri)
Validation of the uri. In case that is web an open stream should be opened susccesfully (even when a proxy is set). In case that the location is locally an open stream to the specifed location can be opened.

Parameters:
uri - Uri location
Returns:
True if the specified uri location is valid.

getUri

public static java.lang.String getUri(java.lang.String location)
Get the uri for the specified start location

Parameters:
location - Location
Returns:
Uri location

getValueList

public static java.util.LinkedList getValueList(java.lang.String buffer,
                                                int minLengthWord,
                                                java.util.regex.Pattern notIgnorePattern,
                                                java.util.regex.Pattern replacePattern)
Return the list of the words which have match the criteria

Parameters:
buffer - String to be process
minLengthWord - Minim word length
notIgnorePattern - Pattern with the characters not to ignore
replacePattern - Pattern to replace characters
Returns:
List of the words which match the criteris

getFilenameTitle

public static java.lang.String getFilenameTitle(java.lang.String fileName)
Get the title for the specfied filename

Parameters:
fileName -
Returns:

replaceAll

public static java.lang.String replaceAll(java.lang.String s,
                                          java.lang.String s1,
                                          java.lang.String s2)
Replace all

Parameters:
s -
s1 -
s2 -
Returns:

getStream

public static java.io.InputStream getStream(java.lang.String uri)
                                     throws RpException
Return a stream to the specified location. If the location starts with http handle it as web (with proxy credentials) else thread as local file

Parameters:
uri -
Returns:
Reader to the specified location
Throws:
RpException - RPException will be thrown in case the reader cann't be obtained

encode

public static java.lang.String encode(java.lang.String strText)
Encode the value (remove the special xml-characters)

Parameters:
strText - Text to process
Returns:
Process text after conversion