MiniTexParser.java
/*
* Copyright © 2016-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package minitex;
import java.io.EOFException;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* This class provides a reader for a <span class="logo">T<span style=
* "text-transform:uppercase;font-size:90%;vertical-align:-0.4ex;
* margin-left:-0.2em;margin-right:-0.1em;line-height: 0;" >e</span>X</span>
* style configuration file.
*
* <p>
* The parser reads form the input reader and extracts macros invocations and
* environments on the top level. The following restrictions apply:
* </p>
* <ol>
* <li>Macros must have a single argument. An exception is the macro
* <code>\endinput</code> which terminates the reading immediately.</li>
* <li>Environments may not have any argument. Arguments of environments are
* digested as part of their body.</li>
* <li>Comments are honored, i.e. discarded until end of line.</li>
* </ol>
*
* <p>
* The arguments of macros and the body of environments are stored in map. The
* key is the name of the macro or environment. Macros and environments are
* treated identically. Thus the following instructions are the same:
* </p>
* <pre>
* \abc{123}
* </pre> and <pre>
* \begin{abc}123\end{abc}
* </pre>
*
* <p>
* Macros and environments may contain <span class="logo">T<span style=
* "text-transform:uppercase;font-size:90%;vertical-align:-0.4ex;
* margin-left:-0.2em;margin-right:-0.1em;line-height: 0;" >e</span>X</span>
* code. This is not interpreted. It is taken literally as the value.
* Nevertheless the blocks and environments need to be balanced.
* </p>
*
* <p>
* The same macro and environment name can be used several timed. The values for
* the same key are stored in a list. The order of the elements in this list is
* the order in the input.
* </p>
*
* @author <a href="gene@ctan.org">Gerd Neugebauer</a>
*/
public class MiniTexParser {
/**
* The field <code>reader</code> contains the reader.
*/
private PushbackReader reader;
/**
* The field <code>defs</code> contains the mapping gathered.
*/
private Map<String, List<String>> defs =
new HashMap<String, List<String>>();
/**
* This is the constructor for <code>MiniTeXParser</code>.
*
* @param reader the reader
*/
public MiniTexParser(Reader reader) {
this.reader = new PushbackReader(reader);
}
/**
* This method add a value to the definitions map.
*
* @param key the key
* @param value the value
*/
public void add(String key, String value) {
List<String> vals = defs.get(key);
if (vals == null) {
vals = new ArrayList<String>();
defs.put(key, vals);
}
vals.add(value);
}
/**
* This method reads the input and stores the top-level macros and
* environments in a map.
*
* @return the macros and environments found
*
* @throws IOException in case of an error
*/
@SuppressFBWarnings(value = "EI_EXPOSE_REP")
public Map<String, List<String>> parse() throws IOException {
slurp(0, null, null);
return defs;
}
/**
* This method parses a block with balanced braces.
*
* @return the contents found
*
* @throws IOException in case of an error
*/
private String parseBlock() throws IOException {
int c = reader.read();
if (c < 0) {
throw new EOFException();
}
if (c != '{') {
return Character.toString((char) c);
}
StringBuilder buffer = new StringBuilder();
if (!slurp(1, buffer, null)) {
throw new UnbalancedBracesException();
}
return buffer.toString();
}
/**
* This method parses the input for the content of an environment. The
* beginning of the environment has already been digested.
*
* @param reader the reader
* @param env the name of the environment
*
* @return the contents of the environment
*
* @throws IOException in case of an error
*/
private String parseEnv(String env) throws IOException {
Stack<String> stack = new Stack<String>();
StringBuilder buffer = new StringBuilder();
stack.push(env);
if (!slurp(0, buffer, stack)) {
throw new EOFException();
}
return buffer.toString();
}
/**
* This method reads and analyses the whole input.
*
* @param buffer the output buffer or {@code null} for none
* @param stack the stack or {@code null} for none
*
* @return the success indicator
*
* @throws IOException in case of an error
*/
private boolean slurp(int level, StringBuilder buffer, Stack<String> stack)
throws IOException {
for (int c = reader.read(); c >= 0; c = reader.read()) {
switch (c) {
case '%':
if (buffer != null) {
buffer.append((char) c);
for (c = reader.read(); c >= 0 && c != '\r'
&& c != '\n'; c = reader.read()) {
buffer.append((char) c);
}
buffer.append((char) c);
} else {
for (c = reader.read(); c >= 0 && c != '\r'
&& c != '\n'; c = reader.read()) {
}
}
break;
case '{':
level++;
if (buffer != null) {
buffer.append((char) c);
}
break;
case '}':
if (--level <= 0) {
return true;
}
if (buffer != null) {
buffer.append((char) c);
}
break;
case '\\':
StringBuilder name = new StringBuilder();
c = reader.read();
if (c < 0) {
throw new EOFException();
}
name.append((char) c);
if (Character.isLetter(c)) {
for (c = reader.read(); c >= 0
&& Character.isLetter(c); c =
reader.read()) {
name.append((char) c);
}
while (c >= 0 && Character.isWhitespace(c)) {
c = reader.read();
}
if (c >= 0) {
reader.unread(c);
}
}
String m = name.toString();
if (m.equals("begin")) {
m = parseBlock();
if (stack != null) {
stack.push(m);
}
if (buffer != null) {
buffer.append("\\begin{");
buffer.append(m);
buffer.append('}');
} else if (stack == null && level == 0) {
add(m, parseEnv(m));
}
} else if (m.equals("end")) {
m = parseBlock();
if (stack != null) {
String s = stack.pop();
if (stack.empty()) {
if (!s.equals(m)) {
throw new IOException("expecting \\end{"
+ s + "} instead of \\end{"
+ m
+ "}");
}
return true;
}
}
if (buffer != null) {
buffer.append("\\end{");
buffer.append(m);
buffer.append('}');
} else if (stack == null) {
throw new EnvironmentStackUnderflowException();
}
} else if (m.equals("endinput")) {
if (level > 0) {
throw new UnbalancedBracesException();
}
return true;
} else {
if (buffer != null) {
buffer.append('\\');
buffer.append(m);
} else if (stack == null && level == 0) {
add(m, parseBlock());
}
}
break;
default:
if (buffer != null) {
buffer.append((char) c);
}
}
}
return false;
}
}