MiniTexParser.java

/*
 * Copyright © 2016-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package minitex;

import java.io.EOFException;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * This class provides a reader for a <span class="logo">T<span style=
 * "text-transform:uppercase;font-size:90%;vertical-align:-0.4ex;
 * margin-left:-0.2em;margin-right:-0.1em;line-height: 0;" >e</span>X</span>
 * style configuration file.
 *
 * <p>
 * The parser reads form the input reader and extracts macros invocations and
 * environments on the top level. The following restrictions apply:
 * </p>
 * <ol>
 * <li>Macros must have a single argument. An exception is the macro
 * <code>\endinput</code> which terminates the reading immediately.</li>
 * <li>Environments may not have any argument. Arguments of environments are
 * digested as part of their body.</li>
 * <li>Comments are honored, i.e. discarded until end of line.</li>
 * </ol>
 *
 * <p>
 * The arguments of macros and the body of environments are stored in map. The
 * key is the name of the macro or environment. Macros and environments are
 * treated identically. Thus the following instructions are the same:
 * </p>
 * <pre>
 * \abc{123}
 * </pre> and <pre>
 * \begin{abc}123\end{abc}
 * </pre>
 *
 * <p>
 * Macros and environments may contain <span class="logo">T<span style=
 * "text-transform:uppercase;font-size:90%;vertical-align:-0.4ex;
 * margin-left:-0.2em;margin-right:-0.1em;line-height: 0;" >e</span>X</span>
 * code. This is not interpreted. It is taken literally as the value.
 * Nevertheless the blocks and environments need to be balanced.
 * </p>
 *
 * <p>
 * The same macro and environment name can be used several timed. The values for
 * the same key are stored in a list. The order of the elements in this list is
 * the order in the input.
 * </p>
 *
 * @author <a href="gene@ctan.org">Gerd Neugebauer</a>
 */
public class MiniTexParser {

    /**
     * The field <code>reader</code> contains the reader.
     */
    private PushbackReader reader;

    /**
     * The field <code>defs</code> contains the mapping gathered.
     */
    private Map<String, List<String>> defs =
        new HashMap<String, List<String>>();

    /**
     * This is the constructor for <code>MiniTeXParser</code>.
     *
     * @param reader the reader
     */
    public MiniTexParser(Reader reader) {

        this.reader = new PushbackReader(reader);
    }

    /**
     * This method add a value to the definitions map.
     *
     * @param key the key
     * @param value the value
     */
    public void add(String key, String value) {

        List<String> vals = defs.get(key);
        if (vals == null) {
            vals = new ArrayList<String>();
            defs.put(key, vals);
        }
        vals.add(value);
    }

    /**
     * This method reads the input and stores the top-level macros and
     * environments in a map.
     *
     * @return the macros and environments found
     *
     * @throws IOException in case of an error
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP")
    public Map<String, List<String>> parse() throws IOException {

        slurp(0, null, null);
        return defs;
    }

    /**
     * This method parses a block with balanced braces.
     *
     * @return the contents found
     *
     * @throws IOException in case of an error
     */
    private String parseBlock() throws IOException {

        int c = reader.read();
        if (c < 0) {
            throw new EOFException();
        }
        if (c != '{') {
            return Character.toString((char) c);
        }
        StringBuilder buffer = new StringBuilder();
        if (!slurp(1, buffer, null)) {
            throw new UnbalancedBracesException();
        }
        return buffer.toString();
    }

    /**
     * This method parses the input for the content of an environment. The
     * beginning of the environment has already been digested.
     *
     * @param reader the reader
     * @param env the name of the environment
     *
     * @return the contents of the environment
     *
     * @throws IOException in case of an error
     */
    private String parseEnv(String env) throws IOException {

        Stack<String> stack = new Stack<String>();
        StringBuilder buffer = new StringBuilder();
        stack.push(env);
        if (!slurp(0, buffer, stack)) {
            throw new EOFException();
        }
        return buffer.toString();
    }

    /**
     * This method reads and analyses the whole input.
     *
     * @param buffer the output buffer or {@code null} for none
     * @param stack the stack or {@code null} for none
     *
     * @return the success indicator
     *
     * @throws IOException in case of an error
     */
    private boolean slurp(int level, StringBuilder buffer, Stack<String> stack)
        throws IOException {

        for (int c = reader.read(); c >= 0; c = reader.read()) {
            switch (c) {
                case '%':
                    if (buffer != null) {
                        buffer.append((char) c);
                        for (c = reader.read(); c >= 0 && c != '\r'
                            && c != '\n'; c = reader.read()) {
                            buffer.append((char) c);
                        }
                        buffer.append((char) c);
                    } else {
                        for (c = reader.read(); c >= 0 && c != '\r'
                            && c != '\n'; c = reader.read()) {
                        }
                    }
                    break;
                case '{':
                    level++;
                    if (buffer != null) {
                        buffer.append((char) c);
                    }
                    break;
                case '}':
                    if (--level <= 0) {
                        return true;
                    }
                    if (buffer != null) {
                        buffer.append((char) c);
                    }
                    break;
                case '\\':
                    StringBuilder name = new StringBuilder();
                    c = reader.read();
                    if (c < 0) {
                        throw new EOFException();
                    }
                    name.append((char) c);
                    if (Character.isLetter(c)) {
                        for (c = reader.read(); c >= 0
                            && Character.isLetter(c); c =
                                reader.read()) {
                            name.append((char) c);
                        }
                        while (c >= 0 && Character.isWhitespace(c)) {
                            c = reader.read();
                        }
                        if (c >= 0) {
                            reader.unread(c);
                        }
                    }
                    String m = name.toString();
                    if (m.equals("begin")) {
                        m = parseBlock();
                        if (stack != null) {
                            stack.push(m);
                        }
                        if (buffer != null) {
                            buffer.append("\\begin{");
                            buffer.append(m);
                            buffer.append('}');
                        } else if (stack == null && level == 0) {
                            add(m, parseEnv(m));
                        }
                    } else if (m.equals("end")) {
                        m = parseBlock();
                        if (stack != null) {
                            String s = stack.pop();
                            if (stack.empty()) {
                                if (!s.equals(m)) {
                                    throw new IOException("expecting \\end{"
                                        + s + "} instead of \\end{"
                                        + m
                                        + "}");
                                }
                                return true;
                            }
                        }
                        if (buffer != null) {
                            buffer.append("\\end{");
                            buffer.append(m);
                            buffer.append('}');
                        } else if (stack == null) {
                            throw new EnvironmentStackUnderflowException();
                        }
                    } else if (m.equals("endinput")) {
                        if (level > 0) {
                            throw new UnbalancedBracesException();
                        }
                        return true;
                    } else {
                        if (buffer != null) {
                            buffer.append('\\');
                            buffer.append(m);
                        } else if (stack == null && level == 0) {
                            add(m, parseBlock());
                        }
                    }
                    break;
                default:
                    if (buffer != null) {
                        buffer.append((char) c);
                    }
            }
        }
        return false;
    }
}