Package org.htmlcleaner
Class TagInfo
- java.lang.Object
-
- org.htmlcleaner.TagInfo
-
public class TagInfo extends java.lang.Object
Class contains information about single HTML tag.
It also contains rules for tag balancing. For each tag, list of dependant tags may be defined. There are several kinds of dependancies used to reorder tags:- fatal tags - required outer tag - the tag will be ignored during parsing (will be skipped) if this fatal tag is missing. For example, most web browsers ignore elements TD, TR, TBODY if they are not in the context of TABLE tag.
- required enclosing tags - if there is no such, it is implicitely created. For example if TD is out of TR - open TR is created before.
- forbidden tags - it is not allowed to occure inside - for example FORM cannot be inside other FORM and it will be ignored during cleanup.
- allowed children tags - for example TR allowes TD and TH. If there are some dependant allowed tags defined then cleaner ignores other tags, treating them as unallowed, unless they are in some other relationship with this tag.
- higher level tags - for example for TR higher tags are THEAD, TBODY, TFOOT.
-
tags that must be closed and copied - for example, in
<a href="#"><div>....
tag A must be closed before DIV but copied again inside DIV. -
tags that must be closed before closing this tag and copied again after -
for example, in
<i><b>at</i> first</b> text
tag B must be closed before closing I, but it must be copied again after resulting finally in sequence:<i><b>at</b></i><b> first</b> text
.
Tag TR for instance (table row) may define the following dependancies:
- fatal tag is
table
- required enclosing tag is
tbody
- allowed children tags are
td,th
- higher level tags are
thead,tfoot
- tags that muste be closed before are
tr,td,th,caption,colgroup
tr
must be in context oftable
, otherwise it will be ignored,tr
may can be directly insidetbody
,tfoot
andthead
, otherwisetbody
will be implicitely created in front of it.tr
can containtd
andth
, all other tags and content will be pushed out of current limiting context, in the case of html tables, in front of enclosingtable
tag.- if previous open tag is one of
tr
,caption
orcolgroup
, it will be implicitely closed.
-
-
Field Summary
Fields Modifier and Type Field Description private int
belongsTo
protected static int
BODY
private java.util.Set
childTags
protected static int
CONTENT_ALL
protected static int
CONTENT_NONE
protected static int
CONTENT_TEXT
private int
contentType
private java.util.Set
continueAfterTags
private java.util.Set
copyTags
private boolean
deprecated
private java.lang.String
fatalTag
protected static int
HEAD
protected static int
HEAD_AND_BODY
private java.util.Set
higherTags
private boolean
ignorePermitted
private java.util.Set
mustCloseTags
private java.lang.String
name
private java.util.Set
permittedTags
private java.lang.String
requiredParent
private boolean
unique
-
Constructor Summary
Constructors Constructor Description TagInfo(java.lang.String name, int contentType, int belongsTo, boolean depricated, boolean unique, boolean ignorePermitted)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) boolean
allowsAnything()
(package private) boolean
allowsBody()
(package private) boolean
allowsItem(BaseToken token)
void
defineAllowedChildrenTags(java.lang.String commaSeparatedListOfTags)
void
defineCloseBeforeCopyInsideTags(java.lang.String commaSeparatedListOfTags)
void
defineCloseBeforeTags(java.lang.String commaSeparatedListOfTags)
void
defineCloseInsideCopyAfterTags(java.lang.String commaSeparatedListOfTags)
void
defineFatalTags(java.lang.String commaSeparatedListOfTags)
void
defineForbiddenTags(java.lang.String commaSeparatedListOfTags)
void
defineHigherLevelTags(java.lang.String commaSeparatedListOfTags)
void
defineRequiredEnclosingTags(java.lang.String commaSeparatedListOfTags)
int
getBelongsTo()
java.util.Set
getChildTags()
int
getContentType()
java.util.Set
getContinueAfterTags()
java.util.Set
getCopyTags()
java.lang.String
getFatalTag()
java.util.Set
getHigherTags()
java.util.Set
getMustCloseTags()
java.lang.String
getName()
java.util.Set
getPermittedTags()
java.lang.String
getRequiredParent()
(package private) boolean
hasCopyTags()
(package private) boolean
hasPermittedTags()
(package private) boolean
isContinueAfter(java.lang.String tagName)
(package private) boolean
isCopy(java.lang.String tagName)
boolean
isDeprecated()
boolean
isEmptyTag()
(package private) boolean
isHeadAndBodyTag()
(package private) boolean
isHeadTag()
(package private) boolean
isHigher(java.lang.String tagName)
boolean
isIgnorePermitted()
(package private) boolean
isMustCloseTag(TagInfo tagInfo)
boolean
isUnique()
void
setBelongsTo(int belongsTo)
void
setChildTags(java.util.Set childTags)
void
setContinueAfterTags(java.util.Set continueAfterTags)
void
setCopyTags(java.util.Set copyTags)
void
setDeprecated(boolean deprecated)
void
setFatalTag(java.lang.String fatalTag)
void
setHigherTags(java.util.Set higherTags)
void
setIgnorePermitted(boolean ignorePermitted)
void
setMustCloseTags(java.util.Set mustCloseTags)
void
setName(java.lang.String name)
void
setPermittedTags(java.util.Set permittedTags)
void
setRequiredParent(java.lang.String requiredParent)
void
setUnique(boolean unique)
-
-
-
Field Detail
-
HEAD_AND_BODY
protected static final int HEAD_AND_BODY
- See Also:
- Constant Field Values
-
HEAD
protected static final int HEAD
- See Also:
- Constant Field Values
-
BODY
protected static final int BODY
- See Also:
- Constant Field Values
-
CONTENT_ALL
protected static final int CONTENT_ALL
- See Also:
- Constant Field Values
-
CONTENT_NONE
protected static final int CONTENT_NONE
- See Also:
- Constant Field Values
-
CONTENT_TEXT
protected static final int CONTENT_TEXT
- See Also:
- Constant Field Values
-
name
private java.lang.String name
-
contentType
private int contentType
-
mustCloseTags
private java.util.Set mustCloseTags
-
higherTags
private java.util.Set higherTags
-
childTags
private java.util.Set childTags
-
permittedTags
private java.util.Set permittedTags
-
copyTags
private java.util.Set copyTags
-
continueAfterTags
private java.util.Set continueAfterTags
-
belongsTo
private int belongsTo
-
requiredParent
private java.lang.String requiredParent
-
fatalTag
private java.lang.String fatalTag
-
deprecated
private boolean deprecated
-
unique
private boolean unique
-
ignorePermitted
private boolean ignorePermitted
-
-
Method Detail
-
defineFatalTags
public void defineFatalTags(java.lang.String commaSeparatedListOfTags)
-
defineRequiredEnclosingTags
public void defineRequiredEnclosingTags(java.lang.String commaSeparatedListOfTags)
-
defineForbiddenTags
public void defineForbiddenTags(java.lang.String commaSeparatedListOfTags)
-
defineAllowedChildrenTags
public void defineAllowedChildrenTags(java.lang.String commaSeparatedListOfTags)
-
defineHigherLevelTags
public void defineHigherLevelTags(java.lang.String commaSeparatedListOfTags)
-
defineCloseBeforeCopyInsideTags
public void defineCloseBeforeCopyInsideTags(java.lang.String commaSeparatedListOfTags)
-
defineCloseInsideCopyAfterTags
public void defineCloseInsideCopyAfterTags(java.lang.String commaSeparatedListOfTags)
-
defineCloseBeforeTags
public void defineCloseBeforeTags(java.lang.String commaSeparatedListOfTags)
-
getName
public java.lang.String getName()
-
setName
public void setName(java.lang.String name)
-
getContentType
public int getContentType()
-
getMustCloseTags
public java.util.Set getMustCloseTags()
-
setMustCloseTags
public void setMustCloseTags(java.util.Set mustCloseTags)
-
getHigherTags
public java.util.Set getHigherTags()
-
setHigherTags
public void setHigherTags(java.util.Set higherTags)
-
getChildTags
public java.util.Set getChildTags()
-
setChildTags
public void setChildTags(java.util.Set childTags)
-
getPermittedTags
public java.util.Set getPermittedTags()
-
setPermittedTags
public void setPermittedTags(java.util.Set permittedTags)
-
getCopyTags
public java.util.Set getCopyTags()
-
setCopyTags
public void setCopyTags(java.util.Set copyTags)
-
getContinueAfterTags
public java.util.Set getContinueAfterTags()
-
setContinueAfterTags
public void setContinueAfterTags(java.util.Set continueAfterTags)
-
getRequiredParent
public java.lang.String getRequiredParent()
-
setRequiredParent
public void setRequiredParent(java.lang.String requiredParent)
-
getBelongsTo
public int getBelongsTo()
-
setBelongsTo
public void setBelongsTo(int belongsTo)
-
getFatalTag
public java.lang.String getFatalTag()
-
setFatalTag
public void setFatalTag(java.lang.String fatalTag)
-
isDeprecated
public boolean isDeprecated()
-
setDeprecated
public void setDeprecated(boolean deprecated)
-
isUnique
public boolean isUnique()
-
setUnique
public void setUnique(boolean unique)
-
isIgnorePermitted
public boolean isIgnorePermitted()
-
isEmptyTag
public boolean isEmptyTag()
-
setIgnorePermitted
public void setIgnorePermitted(boolean ignorePermitted)
-
allowsBody
boolean allowsBody()
-
isHigher
boolean isHigher(java.lang.String tagName)
-
isCopy
boolean isCopy(java.lang.String tagName)
-
hasCopyTags
boolean hasCopyTags()
-
isContinueAfter
boolean isContinueAfter(java.lang.String tagName)
-
hasPermittedTags
boolean hasPermittedTags()
-
isHeadTag
boolean isHeadTag()
-
isHeadAndBodyTag
boolean isHeadAndBodyTag()
-
isMustCloseTag
boolean isMustCloseTag(TagInfo tagInfo)
-
allowsItem
boolean allowsItem(BaseToken token)
-
allowsAnything
boolean allowsAnything()
-
-