Tag.java
/*
* Copyright © 2014-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.markup;
import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import lombok.Getter;
/**
* This enumeration represents HTML tags.
*/
public enum Tag {
/**
* The <code>_SKIP</code> pseudo tag for everything to be skipped.
*/
_SKIP(null, null, Type.SKIP_TAG),
/**
* The <code>_DOCTYPE</code> tag.
*/
_DOCTYPE(null, null, Type.SKIP_TAG),
/**
* The field <code>A</code> represents the a tag.
*/
A("<a", "</a>", Type.PASS, "name", "href", "title") {
/**
* {@inheritDoc}
*
* @see org.ctan.markdown.Tag#startSave(java.io.Writer, java.util.Map)
*/
@Override
public boolean startSave(Writer writer, String base,
Map<String, String> attributes)
throws IOException {
var href = attributes.get("href");
if (href != null && base != null
&& !href.matches("^(/|[a-z]+://)")) {
attributes.put("href", base + href);
}
return super.startSave(writer, base, attributes);
}
},
/**
* The constant <code>ABBR</code> represents the abbr tag.
*/
ABBR("<abbr", "</abbr>", Type.PASS),
/**
* The constant <code>ACRONYM</code> represents the acronym tag.
*/
ACRONYM("<acronym", "</acronym>", Type.PASS),
/**
* The constant <code>ADDRESS</code> represents the address tag.
*/
ADDRESS("<address", "</address>", Type.PASS),
/**
* The constant <code>APPLET</code> represents the applet tag.
*/
APPLET("<applet", "</applet>", Type.SKIP_TAG),
/**
* The constant <code>AREA</code> represents the area tag.
*/
AREA("<area", "</area>", Type.PASS),
/**
* The constant <code>B</code> represents the b tag.
*/
B("<b", "</b>", Type.PASS),
/**
* The constant <code>B_I</code> represents the b-i tag combination.
*/
B_I("<b><i", "</i></b>", Type.PASS),
/**
* The constant <code>BASE</code> represents the base tag.
*/
BASE("<base", "</base>", Type.SKIP_TAG),
/**
* The constant <code>BASEFONT</code> represents the basefont tag.
*/
BASEFONT("<basefont", "</basefont>", Type.SKIP_TAG),
/**
* The constant <code>BDO</code> represents the bdo tag.
*/
BDO("<bdo", "</bdo>", Type.PASS),
/**
* The constant <code>BIG</code> represents the big tag.
*/
BIG("<big", "</big>", Type.PASS),
/**
* The constant <code>BLOCKQUOTE</code> represents the blockquote tag.
*/
BLOCKQUOTE("<blockquote", "</blockquote>\n", true, Type.PASS, false),
/**
* The constant <code>BODY</code> represents the body tag.
*/
BODY("<body", "</body>", Type.SKIP_TAG),
/**
* The constant <code>BR</code> represents the br tag.
*/
BR("<br", "", Type.SELF_CLOSING),
/**
* The constant <code>BUTTON</code> represents the button tag.
*/
BUTTON("<button", "</button>", Type.SKIP_TAG),
/**
* The constant <code>CAPTION</code> represents the caption tag.
*/
CAPTION("<caption", "</caption>", Type.PASS),
/**
* The constant <code>CENTER</code> represents the center tag.
*/
CENTER("<center", "</center>", Type.PASS),
/**
* The constant <code>CITE</code> represents the cite tag.
*/
CITE("<cite", "</cite>", Type.PASS),
/**
* The constant <code>CODE</code> represents the code tag.
*/
CODE("<code", "</code>", Type.PASS),
/**
* The constant <code>COL</code> represents the col tag.
*/
COL("<col", "</col>", Type.PASS),
/**
* The constant <code>COLGROUP</code> represents the colgroup tag.
*/
COLGROUP("<colgroup", "</colgroup>", Type.PASS),
/**
* The constant <code>DD</code> represents the dd tag.
*/
DD("<dd", "</dd>", Type.PASS),
/**
* The constant <code>DEL</code> represents the del tag.
*/
DEL("<del", "</del>", Type.PASS),
/**
* The constant <code>DFN</code> represents the dfn tag.
*/
DFN("<dfn", "</dfn>", Type.PASS),
/**
* The constant <code>DIR</code> represents the dir tag.
*/
DIR("<dir", "</dir>", Type.PASS),
/**
* The constant <code>DIV</code> represents the div tag.
*/
DIV("<div", "</div>", Type.PASS, "align"),
/**
* The constant <code>DL</code> represents the dl tag.
*/
DL("<dl", "</dl>", Type.PASS),
/**
* The constant <code>DT</code> represents the dt tag.
*/
DT("<dt", "</dt>", Type.PASS),
/**
* The constant <code>EM</code> represents the em tag.
*/
EM("<em", "</em>", Type.PASS),
/**
* The constant <code>FIELDSET</code> represents the fieldset tag.
*/
FIELDSET("<fieldset", "</fieldset>", Type.SKIP_TAG),
/**
* The constant <code>FONT</code> represents the font tag.
*/
FONT("<font", "</font>", Type.PASS, "color", "face", "size"),
/**
* The constant <code>FORM</code> represents the form tag.
*/
FORM("<form", "</form>", Type.SKIP_TAG),
/**
* The constant <code>FRAME</code> represents the frame tag.
*/
FRAME("<frame", "</frame>", Type.SKIP_TAG),
/**
* The constant <code>FRAMESET</code> represents the frameset tag.
*/
FRAMESET("<frameset", "</frameset>", Type.SKIP_TAG),
/**
* The <code>H1</code> tag.
*/
H1("<h1", "</h1>\n", false, Type.PASS, true),
/**
* The <code>H2</code> tag.
*/
H2("<h2", "</h2>\n", false, Type.PASS, true),
/**
* The <code>H3</code> tag.
*/
H3("<h3", "</h3>\n", false, Type.PASS, true),
/**
* The <code>H4</code> tag.
*/
H4("<h4", "</h4>\n", false, Type.PASS, true),
/**
* The <code>H5</code> tag.
*/
H5("<h5", "</h5>\n", false, Type.PASS, true),
/**
* The <code>H6</code> tag.
*/
H6("<h6", "</h6>\n", false, Type.PASS, true),
/**
* The constant <code>HEAD</code> represents the head tag.
*/
HEAD("<head", "</head>\n", Type.SKIP_TAG),
/**
* The constant <code>HR</code> represents the hr tag.
*/
HR("<hr", "\n", Type.SELF_CLOSING),
/**
* The constant <code>HTML</code> represents the html tag.
*/
HTML("<html", "</html>\n", Type.SKIP_TAG),
/**
* The constant <code>I</code> represents the i tag.
*/
I("<i", "</i>", Type.PASS),
/**
* The constant <code>IFRAME</code> represents the iframe tag.
*/
IFRAME("<iframe", "</iframe>", Type.SKIP_TAG),
/**
* The constant <code>IMG</code> represents the img tag.
*/
IMG("<img", "", Type.SELF_CLOSING,
"src", "width", "height", "alt", "title"),
/**
* The constant <code>INPUT</code> represents the input tag.
*/
INPUT("<input", "</input>", Type.SKIP_TAG),
/**
* The constant <code>INS</code> represents the ins tag.
*/
INS("<ins", "</ins>", Type.PASS),
/**
* The constant <code>ISINDEX</code> represents the isindex tag.
*/
ISINDEX("<isindex", "</isindex>", Type.PASS),
/**
* The constant <code>KBD</code> represents the kbd tag.
*/
KBD("<kbd", "</kbd>", Type.PASS),
/**
* The constant <code>LABEL</code> represents the label tag.
*/
LABEL("<label", "</label>", Type.SKIP_TAG),
/**
* The constant <code>LEGEND</code> represents the legend tag.
*/
LEGEND("<legend", "</legend>", Type.PASS),
/**
* The constant <code>LI</code> represents the li tag.
*/
LI("<li", "</li>\n", Type.PASS),
/**
* The constant <code>LINK</code> represents the link tag.
*/
LINK("<link", "</link>", Type.SKIP_TAG),
/**
* The constant <code>MAP</code> represents the map tag.
*/
MAP("<map", "</map>", Type.PASS),
/**
* The constant <code>MENU</code> represents the menu tag.
*/
MENU("<menu", "</menu>", Type.SKIP_TAG),
/**
* The constant <code>META</code> represents the meta tag.
*/
META("<meta", "</meta>", Type.SKIP_TAG),
/**
* The constant <code>NOFRAMES</code> represents the noframes tag.
*/
NOFRAMES("<noframes", "</noframes>", Type.PASS),
/**
* The constant <code>NOSCRIPT</code> represents the noscript tag.
*/
NOSCRIPT("<noscript", "</noscript>", Type.PASS),
/**
* The constant <code>OBJECT</code> represents the object tag.
*/
OBJECT("<object", "</object>", Type.SKIP_TAG),
/**
* The constant <code>OL</code> represents the ol tag.
*/
OL("<ol", "</ol>\n", true, Type.PASS, false, "start"),
/**
* The constant <code>OPTGROUP</code> represents the optgroup tag.
*/
OPTGROUP("<optgroup", "</optgroup>", Type.PASS),
/**
* The constant <code>OPTION</code> represents the option tag.
*/
OPTION("<option", "</option>", Type.SKIP_TAG),
/**
* The constant <code>P</code> represents the p tag.
*/
P("<p", "</p>\n", Type.PASS, "align"),
/**
* The constant <code>PARAM</code> represents the param tag.
*/
PARAM("<param", "</param>", Type.SKIP_TAG),
/**
* The constant <code>PRE</code> represents the pre tag.
*/
PRE("<pre", "</pre>", Type.PASS),
/**
* The constant <code>Q</code> represents the q tag.
*/
Q("<q", "</q>", Type.PASS),
/**
* The constant <code>S</code> represents the s tag.
*/
S("<s", "</s>", Type.PASS),
/**
* The constant <code>SAMP</code> represents the samp tag.
*/
SAMP("<samp", "</samp>", Type.PASS),
/**
* The constant <code>SCRIPT</code> represents the script tag.
*/
SCRIPT("<script", "</script>", Type.SKIP_TAG),
/**
* The constant <code>SELECT</code> represents the select tag.
*/
SELECT("<select", "</select>", Type.SKIP_TAG),
/**
* The constant <code>SMALL</code> represents the small tag.
*/
SMALL("<small", "</small>", Type.PASS),
/**
* The constant <code>SPAN</code> represents the span tag.
*/
SPAN("<span", "</span>", Type.PASS),
/**
* The constant <code>STRIKE</code> represents the strike tag.
*/
STRIKE("<strike", "</strike>", Type.PASS),
/**
* The constant <code>STRONG</code> represents the strong tag.
*/
STRONG("<strong", "</strong>", Type.PASS),
/**
* The constant <code>STYLE</code> represents the style tag.
*/
STYLE("<style", "</style>", Type.SKIP_TAG),
/**
* The constant <code>SUB</code> represents the sub tag.
*/
SUB("<sub", "</sub>", Type.PASS),
/**
* The constant <code>SUP</code> represents the sup tag.
*/
SUP("<sup", "</sup>", Type.PASS),
/**
* The constant <code>TABLE</code> represents the table tag.
*/
TABLE("<table", "</table>\n", true, Type.PASS, false),
/**
* The constant <code>TBODY</code> represents the tbody tag.
*/
TBODY("<tbody", "</tbody>\n", true, Type.PASS, false),
/**
* The constant <code>TD</code> represents the td tag.
*/
TD("<td", "</td>\n", Type.PASS, "align"),
/**
* The constant <code>TEXTAREA</code> represents the textarea tag.
*/
TEXTAREA("<textarea", "</textarea>", Type.SKIP_TAG),
/**
* The constant <code>TFOOT</code> represents the tfoot tag.
*/
TFOOT("<tfoot", "</tfoot>", Type.PASS),
/**
* The constant <code>TH</code> represents the th tag.
*/
TH("<th", "</th>\n", Type.PASS, "align"),
/**
* The constant <code>THEAD</code> represents the thead tag.
*/
THEAD("<thead", "</thead>\n", true, Type.PASS, false),
/**
* The constant <code>TITLE</code> represents the title tag.
*/
TITLE("<title", "</title>", Type.SKIP_TAG),
/**
* The constant <code>TR</code> represents the tr tag.
*/
TR("<tr", "</tr>\n", true, Type.PASS, false),
/**
* The constant <code>TT</code> represents the tt tag.
*/
TT("<tt", "</tt>\n", Type.PASS),
/**
* The constant <code>U</code> represents the u tag.
*/
U("<u", "</u>", Type.PASS),
/**
* The constant <code>UL</code> represents the ul tag.
*/
UL("<ul", "</ul>\n", true, Type.PASS, false),
/**
* The constant <code>VAR</code> represents the var tag.
*/
VAR("<var", "</var>\n", Type.PASS);
/**
* This enumeration defines the modes to sanitise tags.
*/
public enum Type {
/**
* The constant <code>PASS</code> contains the type for tags to be
* passed through.
*/
PASS,
/**
* The constant <code>SELF_CLOSING</code> contains the type for tags
* which do not support a body and where the trailing / is optional.
*/
SELF_CLOSING,
/**
* The constant <code>SKIP_TAG</code> contains the type for tags which
* are thrown out.
*/
SKIP_TAG
}
/**
* The constant <code>SECTION</code> contains a list of section tags in
* ascending order.
*/
public static final List<Tag> SECTION = List.of(
Tag.H1,
Tag.H2,
Tag.H3,
Tag.H4,
Tag.H5,
Tag.H6);
/**
* The field <code>allowedAttributes</code> contains the allowed attributes
* for save tags.
*/
private Map<String, Object> allowedAttributes =
new HashMap<String, Object>();
/**
* The field <code>start</code> contains the start text for writing the tag.
*/
private String start;
/**
* The field <code>term</code> contains the end text for writing the tag.
*/
private String term;
/**
* The field <code>type</code> contains the indicator for save tags.
*/
@Getter
private Type type;
/**
* The field <code>section</code> contains the section.
*/
private boolean section;
/**
* The field <code>nlAtStart</code> contains the indicator for a newline at
* the start.
*/
private boolean nlAtStart;
/**
* This is the constructor for <code>Tag</code>.
*
* @param start the start text for writing the tag
* @param term the end text for writing the tag
* @param type the indicator for type tags
* @param section indicator for the H* tags
* @param attributes the allowed attributes for save tags in addition to the
* default attributes <i>id</i>, <i>style</i>, and <i>class</i>
*/
Tag(String start, String term, boolean nlAtStart, Type type,
boolean section,
String... attributes) {
this.start = start;
this.term = term;
this.nlAtStart = nlAtStart;
this.type = type;
this.section = section;
if (attributes != null && type != Type.SKIP_TAG) {
for (String a : attributes) {
allowedAttributes.put(a, a);
}
allowedAttributes.put("style", "style");
allowedAttributes.put("class", "class");
allowedAttributes.put("id", "id");
}
}
/**
* This is the constructor for <code>Tag</code>.
*
* @param start the start string
* @param term the end string
* @param type the type
* @param attributes the attributes
*/
Tag(String start, String term, Type type, String... attributes) {
this(start, term, false, type, false, attributes);
}
/**
* This method writes the start tag to the writer.
*
* @param writer the writer
*
* @throws IOException in case of an I/O error
*/
public void end(Writer writer) throws IOException {
if (term != null) {
writer.write(term);
}
}
/**
* The method <code>isSection</code> is the getter for the section
* indicator.
*
* @return {@code true} if the tag is a H* tag.
*/
public boolean isSection() {
return section;
}
/**
* This method writes the start tag to the writer.
*
* @param writer the writer
*
* @throws IOException in case of an I/O error
*/
public void start(Writer writer) throws IOException {
start(writer, null);
}
/**
* This method writes the start tag to the writer.
*
* @param writer the writer
* @param attr the attribute values
* @return <code>true</code> iff the tag has been closed
*
* @throws IOException in case of an I/O error
*/
public boolean start(Writer writer, Map<String, String> attr)
throws IOException {
if (type == Type.SKIP_TAG) {
return false;
}
writer.write(start);
if (attr != null) {
for (Entry<String, String> a : attr.entrySet()) {
var value = a.getValue();
if (value == null) {
continue;
}
value = value.replaceAll("\"", """);
var key = a.getKey();
if (allowedAttributes.get(key) != null) {
writer.write(' ');
writer.write(key);
writer.write("=\"");
writer.write(value);
writer.write('"');
}
}
}
writer.write("\n".equals(term) ? " />" : ">");
if (nlAtStart) {
writer.write('\n');
}
return "\n".equals(term) || "".equals(term);
}
/**
* This method writes the start tag to the writer.
*
* @param writer the writer
* @param key the key of a single property
* @param value the value for the key
*
* @throws IOException in case of an I/O error
*/
public void start(Writer writer, String key, String value)
throws IOException {
start(writer, Map.of(key, value));
}
/**
* This method writes the start tag to the writer.
*
* @param writer the writer
* @param base the base URL
* @param attributes the attributes
* @return {@code true} iff the tag is save
*
* @throws IOException in case of an I/O error
*/
public boolean startSave(Writer writer, String base,
Map<String, String> attributes)
throws IOException {
if (getType() != Type.SKIP_TAG) {
return !start(writer, attributes);
}
return false;
}
/**
* This method writes the tag to the writer.
*
* @param writer the writer
* @param attributes the attributes
*
* @throws IOException in case of an I/O error
*/
public void write(Writer writer, Map<String, String> attributes)
throws IOException {
writer.write(start);
if (attributes != null) {
for (Entry<String, String> a : attributes.entrySet()) {
writer.write(' ');
var key = a.getKey();
writer.write(key);
writer.write("=\"");
writer.write(a.getValue());
writer.write('"');
}
}
if ("\n".equals(term)) {
writer.write(" />");
writer.write(term);
} else {
writer.write(">");
writer.write(term);
}
}
/**
* This method writes the tag to the writer.
*
* @param writer the writer
* @param attributes the attributes
*
* @throws IOException in case of an I/O error
*/
public void writeSave(Writer writer, Map<String, String> attributes)
throws IOException {
if (type != Type.SKIP_TAG) {
write(writer, attributes);
}
}
}