Tag.java

/*
 * Copyright © 2014-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */

package org.ctan.markup;

import java.io.IOException;
import java.io.Writer;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import lombok.Getter;

/**
 * This enumeration represents HTML tags.
 */
public enum Tag {

    /**
     * The <code>_SKIP</code> pseudo tag for everything to be skipped.
     */
    _SKIP(null, null, Type.SKIP_TAG),
    /**
     * The <code>_DOCTYPE</code> tag.
     */
    _DOCTYPE(null, null, Type.SKIP_TAG),
    /**
     * The field <code>A</code> represents the a tag.
     */
    A("<a", "</a>", Type.PASS, "name", "href", "title") {

        /**
         * {@inheritDoc}
         *
         * @see org.ctan.markdown.Tag#startSave(java.io.Writer, java.util.Map)
         */
        @Override
        public boolean startSave(Writer writer, String base,
            Map<String, String> attributes)
            throws IOException {

            var href = attributes.get("href");
            if (href != null && base != null
                && !href.matches("^(/|[a-z]+://)")) {
                attributes.put("href", base + href);
            }
            return super.startSave(writer, base, attributes);
        }

    },
    /**
     * The constant <code>ABBR</code> represents the abbr tag.
     */
    ABBR("<abbr", "</abbr>", Type.PASS),
    /**
     * The constant <code>ACRONYM</code> represents the acronym tag.
     */
    ACRONYM("<acronym", "</acronym>", Type.PASS),
    /**
     * The constant <code>ADDRESS</code> represents the address tag.
     */
    ADDRESS("<address", "</address>", Type.PASS),
    /**
     * The constant <code>APPLET</code> represents the applet tag.
     */
    APPLET("<applet", "</applet>", Type.SKIP_TAG),
    /**
     * The constant <code>AREA</code> represents the area tag.
     */
    AREA("<area", "</area>", Type.PASS),
    /**
     * The constant <code>B</code> represents the b tag.
     */
    B("<b", "</b>", Type.PASS),
    /**
     * The constant <code>B_I</code> represents the b-i tag combination.
     */
    B_I("<b><i", "</i></b>", Type.PASS),
    /**
     * The constant <code>BASE</code> represents the base tag.
     */
    BASE("<base", "</base>", Type.SKIP_TAG),
    /**
     * The constant <code>BASEFONT</code> represents the basefont tag.
     */
    BASEFONT("<basefont", "</basefont>", Type.SKIP_TAG),
    /**
     * The constant <code>BDO</code> represents the bdo tag.
     */
    BDO("<bdo", "</bdo>", Type.PASS),
    /**
     * The constant <code>BIG</code> represents the big tag.
     */
    BIG("<big", "</big>", Type.PASS),
    /**
     * The constant <code>BLOCKQUOTE</code> represents the blockquote tag.
     */
    BLOCKQUOTE("<blockquote", "</blockquote>\n", true, Type.PASS, false),
    /**
     * The constant <code>BODY</code> represents the body tag.
     */
    BODY("<body", "</body>", Type.SKIP_TAG),
    /**
     * The constant <code>BR</code> represents the br tag.
     */
    BR("<br", "", Type.SELF_CLOSING),
    /**
     * The constant <code>BUTTON</code> represents the button tag.
     */
    BUTTON("<button", "</button>", Type.SKIP_TAG),
    /**
     * The constant <code>CAPTION</code> represents the caption tag.
     */
    CAPTION("<caption", "</caption>", Type.PASS),
    /**
     * The constant <code>CENTER</code> represents the center tag.
     */
    CENTER("<center", "</center>", Type.PASS),
    /**
     * The constant <code>CITE</code> represents the cite tag.
     */
    CITE("<cite", "</cite>", Type.PASS),
    /**
     * The constant <code>CODE</code> represents the code tag.
     */
    CODE("<code", "</code>", Type.PASS),
    /**
     * The constant <code>COL</code> represents the col tag.
     */
    COL("<col", "</col>", Type.PASS),
    /**
     * The constant <code>COLGROUP</code> represents the colgroup tag.
     */
    COLGROUP("<colgroup", "</colgroup>", Type.PASS),
    /**
     * The constant <code>DD</code> represents the dd tag.
     */
    DD("<dd", "</dd>", Type.PASS),
    /**
     * The constant <code>DEL</code> represents the del tag.
     */
    DEL("<del", "</del>", Type.PASS),
    /**
     * The constant <code>DFN</code> represents the dfn tag.
     */
    DFN("<dfn", "</dfn>", Type.PASS),
    /**
     * The constant <code>DIR</code> represents the dir tag.
     */
    DIR("<dir", "</dir>", Type.PASS),
    /**
     * The constant <code>DIV</code> represents the div tag.
     */
    DIV("<div", "</div>", Type.PASS, "align"),
    /**
     * The constant <code>DL</code> represents the dl tag.
     */
    DL("<dl", "</dl>", Type.PASS),
    /**
     * The constant <code>DT</code> represents the dt tag.
     */
    DT("<dt", "</dt>", Type.PASS),
    /**
     * The constant <code>EM</code> represents the em tag.
     */
    EM("<em", "</em>", Type.PASS),
    /**
     * The constant <code>FIELDSET</code> represents the fieldset tag.
     */
    FIELDSET("<fieldset", "</fieldset>", Type.SKIP_TAG),
    /**
     * The constant <code>FONT</code> represents the font tag.
     */
    FONT("<font", "</font>", Type.PASS, "color", "face", "size"),
    /**
     * The constant <code>FORM</code> represents the form tag.
     */
    FORM("<form", "</form>", Type.SKIP_TAG),
    /**
     * The constant <code>FRAME</code> represents the frame tag.
     */
    FRAME("<frame", "</frame>", Type.SKIP_TAG),
    /**
     * The constant <code>FRAMESET</code> represents the frameset tag.
     */
    FRAMESET("<frameset", "</frameset>", Type.SKIP_TAG),
    /**
     * The <code>H1</code> tag.
     */
    H1("<h1", "</h1>\n", false, Type.PASS, true),
    /**
     * The <code>H2</code> tag.
     */
    H2("<h2", "</h2>\n", false, Type.PASS, true),
    /**
     * The <code>H3</code> tag.
     */
    H3("<h3", "</h3>\n", false, Type.PASS, true),
    /**
     * The <code>H4</code> tag.
     */
    H4("<h4", "</h4>\n", false, Type.PASS, true),
    /**
     * The <code>H5</code> tag.
     */
    H5("<h5", "</h5>\n", false, Type.PASS, true),
    /**
     * The <code>H6</code> tag.
     */
    H6("<h6", "</h6>\n", false, Type.PASS, true),
    /**
     * The constant <code>HEAD</code> represents the head tag.
     */
    HEAD("<head", "</head>\n", Type.SKIP_TAG),
    /**
     * The constant <code>HR</code> represents the hr tag.
     */
    HR("<hr", "\n", Type.SELF_CLOSING),
    /**
     * The constant <code>HTML</code> represents the html tag.
     */
    HTML("<html", "</html>\n", Type.SKIP_TAG),
    /**
     * The constant <code>I</code> represents the i tag.
     */
    I("<i", "</i>", Type.PASS),
    /**
     * The constant <code>IFRAME</code> represents the iframe tag.
     */
    IFRAME("<iframe", "</iframe>", Type.SKIP_TAG),
    /**
     * The constant <code>IMG</code> represents the img tag.
     */
    IMG("<img", "", Type.SELF_CLOSING,
        "src", "width", "height", "alt", "title"),
    /**
     * The constant <code>INPUT</code> represents the input tag.
     */
    INPUT("<input", "</input>", Type.SKIP_TAG),
    /**
     * The constant <code>INS</code> represents the ins tag.
     */
    INS("<ins", "</ins>", Type.PASS),
    /**
     * The constant <code>ISINDEX</code> represents the isindex tag.
     */
    ISINDEX("<isindex", "</isindex>", Type.PASS),
    /**
     * The constant <code>KBD</code> represents the kbd tag.
     */
    KBD("<kbd", "</kbd>", Type.PASS),
    /**
     * The constant <code>LABEL</code> represents the label tag.
     */
    LABEL("<label", "</label>", Type.SKIP_TAG),
    /**
     * The constant <code>LEGEND</code> represents the legend tag.
     */
    LEGEND("<legend", "</legend>", Type.PASS),
    /**
     * The constant <code>LI</code> represents the li tag.
     */
    LI("<li", "</li>\n", Type.PASS),
    /**
     * The constant <code>LINK</code> represents the link tag.
     */
    LINK("<link", "</link>", Type.SKIP_TAG),
    /**
     * The constant <code>MAP</code> represents the map tag.
     */
    MAP("<map", "</map>", Type.PASS),
    /**
     * The constant <code>MENU</code> represents the menu tag.
     */
    MENU("<menu", "</menu>", Type.SKIP_TAG),
    /**
     * The constant <code>META</code> represents the meta tag.
     */
    META("<meta", "</meta>", Type.SKIP_TAG),
    /**
     * The constant <code>NOFRAMES</code> represents the noframes tag.
     */
    NOFRAMES("<noframes", "</noframes>", Type.PASS),
    /**
     * The constant <code>NOSCRIPT</code> represents the noscript tag.
     */
    NOSCRIPT("<noscript", "</noscript>", Type.PASS),
    /**
     * The constant <code>OBJECT</code> represents the object tag.
     */
    OBJECT("<object", "</object>", Type.SKIP_TAG),
    /**
     * The constant <code>OL</code> represents the ol tag.
     */
    OL("<ol", "</ol>\n", true, Type.PASS, false, "start"),
    /**
     * The constant <code>OPTGROUP</code> represents the optgroup tag.
     */
    OPTGROUP("<optgroup", "</optgroup>", Type.PASS),
    /**
     * The constant <code>OPTION</code> represents the option tag.
     */
    OPTION("<option", "</option>", Type.SKIP_TAG),
    /**
     * The constant <code>P</code> represents the p tag.
     */
    P("<p", "</p>\n", Type.PASS, "align"),
    /**
     * The constant <code>PARAM</code> represents the param tag.
     */
    PARAM("<param", "</param>", Type.SKIP_TAG),
    /**
     * The constant <code>PRE</code> represents the pre tag.
     */
    PRE("<pre", "</pre>", Type.PASS),
    /**
     * The constant <code>Q</code> represents the q tag.
     */
    Q("<q", "</q>", Type.PASS),
    /**
     * The constant <code>S</code> represents the s tag.
     */
    S("<s", "</s>", Type.PASS),
    /**
     * The constant <code>SAMP</code> represents the samp tag.
     */
    SAMP("<samp", "</samp>", Type.PASS),
    /**
     * The constant <code>SCRIPT</code> represents the script tag.
     */
    SCRIPT("<script", "</script>", Type.SKIP_TAG),
    /**
     * The constant <code>SELECT</code> represents the select tag.
     */
    SELECT("<select", "</select>", Type.SKIP_TAG),
    /**
     * The constant <code>SMALL</code> represents the small tag.
     */
    SMALL("<small", "</small>", Type.PASS),
    /**
     * The constant <code>SPAN</code> represents the span tag.
     */
    SPAN("<span", "</span>", Type.PASS),
    /**
     * The constant <code>STRIKE</code> represents the strike tag.
     */
    STRIKE("<strike", "</strike>", Type.PASS),
    /**
     * The constant <code>STRONG</code> represents the strong tag.
     */
    STRONG("<strong", "</strong>", Type.PASS),
    /**
     * The constant <code>STYLE</code> represents the style tag.
     */
    STYLE("<style", "</style>", Type.SKIP_TAG),
    /**
     * The constant <code>SUB</code> represents the sub tag.
     */
    SUB("<sub", "</sub>", Type.PASS),
    /**
     * The constant <code>SUP</code> represents the sup tag.
     */
    SUP("<sup", "</sup>", Type.PASS),
    /**
     * The constant <code>TABLE</code> represents the table tag.
     */
    TABLE("<table", "</table>\n", true, Type.PASS, false),
    /**
     * The constant <code>TBODY</code> represents the tbody tag.
     */
    TBODY("<tbody", "</tbody>\n", true, Type.PASS, false),
    /**
     * The constant <code>TD</code> represents the td tag.
     */
    TD("<td", "</td>\n", Type.PASS, "align"),
    /**
     * The constant <code>TEXTAREA</code> represents the textarea tag.
     */
    TEXTAREA("<textarea", "</textarea>", Type.SKIP_TAG),
    /**
     * The constant <code>TFOOT</code> represents the tfoot tag.
     */
    TFOOT("<tfoot", "</tfoot>", Type.PASS),
    /**
     * The constant <code>TH</code> represents the th tag.
     */
    TH("<th", "</th>\n", Type.PASS, "align"),
    /**
     * The constant <code>THEAD</code> represents the thead tag.
     */
    THEAD("<thead", "</thead>\n", true, Type.PASS, false),
    /**
     * The constant <code>TITLE</code> represents the title tag.
     */
    TITLE("<title", "</title>", Type.SKIP_TAG),
    /**
     * The constant <code>TR</code> represents the tr tag.
     */
    TR("<tr", "</tr>\n", true, Type.PASS, false),
    /**
     * The constant <code>TT</code> represents the tt tag.
     */
    TT("<tt", "</tt>\n", Type.PASS),
    /**
     * The constant <code>U</code> represents the u tag.
     */
    U("<u", "</u>", Type.PASS),
    /**
     * The constant <code>UL</code> represents the ul tag.
     */
    UL("<ul", "</ul>\n", true, Type.PASS, false),
    /**
     * The constant <code>VAR</code> represents the var tag.
     */
    VAR("<var", "</var>\n", Type.PASS);

    /**
     * This enumeration defines the modes to sanitise tags.
     */
    public enum Type {
        /**
         * The constant <code>PASS</code> contains the type for tags to be
         * passed through.
         */
        PASS,
        /**
         * The constant <code>SELF_CLOSING</code> contains the type for tags
         * which do not support a body and where the trailing / is optional.
         */
        SELF_CLOSING,
        /**
         * The constant <code>SKIP_TAG</code> contains the type for tags which
         * are thrown out.
         */
        SKIP_TAG
    }

    /**
     * The constant <code>SECTION</code> contains a list of section tags in
     * ascending order.
     */
    public static final List<Tag> SECTION = List.of(
        Tag.H1,
        Tag.H2,
        Tag.H3,
        Tag.H4,
        Tag.H5,
        Tag.H6);

    /**
     * The field <code>allowedAttributes</code> contains the allowed attributes
     * for save tags.
     */
    private Map<String, Object> allowedAttributes =
        new HashMap<String, Object>();

    /**
     * The field <code>start</code> contains the start text for writing the tag.
     */
    private String start;

    /**
     * The field <code>term</code> contains the end text for writing the tag.
     */
    private String term;

    /**
     * The field <code>type</code> contains the indicator for save tags.
     */
    @Getter
    private Type type;

    /**
     * The field <code>section</code> contains the section.
     */
    private boolean section;

    /**
     * The field <code>nlAtStart</code> contains the indicator for a newline at
     * the start.
     */
    private boolean nlAtStart;

    /**
     * This is the constructor for <code>Tag</code>.
     *
     * @param start the start text for writing the tag
     * @param term the end text for writing the tag
     * @param type the indicator for type tags
     * @param section indicator for the H* tags
     * @param attributes the allowed attributes for save tags in addition to the
     *     default attributes <i>id</i>, <i>style</i>, and <i>class</i>
     */
    Tag(String start, String term, boolean nlAtStart, Type type,
        boolean section,
        String... attributes) {

        this.start = start;
        this.term = term;
        this.nlAtStart = nlAtStart;
        this.type = type;
        this.section = section;
        if (attributes != null && type != Type.SKIP_TAG) {

            for (String a : attributes) {
                allowedAttributes.put(a, a);
            }
            allowedAttributes.put("style", "style");
            allowedAttributes.put("class", "class");
            allowedAttributes.put("id", "id");
        }
    }

    /**
     * This is the constructor for <code>Tag</code>.
     *
     * @param start the start string
     * @param term the end string
     * @param type the type
     * @param attributes the attributes
     */
    Tag(String start, String term, Type type, String... attributes) {

        this(start, term, false, type, false, attributes);
    }

    /**
     * This method writes the start tag to the writer.
     *
     * @param writer the writer
     *
     * @throws IOException in case of an I/O error
     */
    public void end(Writer writer) throws IOException {

        if (term != null) {
            writer.write(term);
        }
    }

    /**
     * The method <code>isSection</code> is the getter for the section
     * indicator.
     *
     * @return {@code true} if the tag is a H* tag.
     */
    public boolean isSection() {

        return section;
    }

    /**
     * This method writes the start tag to the writer.
     *
     * @param writer the writer
     *
     * @throws IOException in case of an I/O error
     */
    public void start(Writer writer) throws IOException {

        start(writer, null);
    }

    /**
     * This method writes the start tag to the writer.
     *
     * @param writer the writer
     * @param attr the attribute values
     * @return <code>true</code> iff the tag has been closed
     *
     * @throws IOException in case of an I/O error
     */
    public boolean start(Writer writer, Map<String, String> attr)
        throws IOException {

        if (type == Type.SKIP_TAG) {
            return false;
        }
        writer.write(start);
        if (attr != null) {
            for (Entry<String, String> a : attr.entrySet()) {
                var value = a.getValue();
                if (value == null) {
                    continue;
                }
                value = value.replaceAll("\"", "&quot;");
                var key = a.getKey();
                if (allowedAttributes.get(key) != null) {
                    writer.write(' ');
                    writer.write(key);
                    writer.write("=\"");
                    writer.write(value);
                    writer.write('"');
                }
            }
        }
        writer.write("\n".equals(term) ? " />" : ">");
        if (nlAtStart) {
            writer.write('\n');
        }
        return "\n".equals(term) || "".equals(term);
    }

    /**
     * This method writes the start tag to the writer.
     *
     * @param writer the writer
     * @param key the key of a single property
     * @param value the value for the key
     *
     * @throws IOException in case of an I/O error
     */
    public void start(Writer writer, String key, String value)
        throws IOException {

        start(writer, Map.of(key, value));
    }

    /**
     * This method writes the start tag to the writer.
     *
     * @param writer the writer
     * @param base the base URL
     * @param attributes the attributes
     * @return {@code true} iff the tag is save
     *
     * @throws IOException in case of an I/O error
     */
    public boolean startSave(Writer writer, String base,
        Map<String, String> attributes)
        throws IOException {

        if (getType() != Type.SKIP_TAG) {
            return !start(writer, attributes);
        }
        return false;
    }

    /**
     * This method writes the tag to the writer.
     *
     * @param writer the writer
     * @param attributes the attributes
     *
     * @throws IOException in case of an I/O error
     */
    public void write(Writer writer, Map<String, String> attributes)
        throws IOException {

        writer.write(start);
        if (attributes != null) {
            for (Entry<String, String> a : attributes.entrySet()) {
                writer.write(' ');
                var key = a.getKey();
                writer.write(key);
                writer.write("=\"");
                writer.write(a.getValue());
                writer.write('"');
            }
        }
        if ("\n".equals(term)) {
            writer.write(" />");
            writer.write(term);
        } else {
            writer.write(">");
            writer.write(term);
        }
    }

    /**
     * This method writes the tag to the writer.
     *
     * @param writer the writer
     * @param attributes the attributes
     *
     * @throws IOException in case of an I/O error
     */
    public void writeSave(Writer writer, Map<String, String> attributes)
        throws IOException {

        if (type != Type.SKIP_TAG) {
            write(writer, attributes);
        }
    }

}