AbstractSubmitValidator.java

/*
 * Copyright (C) 2017-2025 Gerd Neugebauer
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.upload.util;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.ctan.site.services.upload.util.archive.Archive;
import org.ctan.site.stores.LicenseStore;
import org.ctan.site.stores.TopicStore;

import lombok.extern.slf4j.Slf4j;
import minitex.InsParser;

/**
 * This is an abstract base class containing validation methods.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
public abstract class AbstractSubmitValidator {

    /**
     * The constant <code>SPECIALS_PATTERN</code> contains the pattern to detect
     * special characters.
     */
    private static final Pattern SPECIALS_PATTERN =
        Pattern.compile(".*([^a-zA-Z0-9_./]).*");

    /**
     * The field <code>licenseStore</code> contains the license store.
     */
    protected LicenseStore licenseStore;

    /**
     * The field <code>topicStore</code> contains the topic store.
     */
    private TopicStore topicStore;

    /**
     * This is the constructor for <code>AbstractSubmitValidator</code>.
     *
     * @param licenseStore the license store
     * @param topicStore the topic store
     */
    protected AbstractSubmitValidator(TopicStore topicStore,
        LicenseStore licenseStore) {

        this.topicStore = topicStore;
        this.licenseStore = licenseStore;
    }

    /**
     * This method extracts the content from an archive reader.
     *
     * @param stream the input stream
     *
     * @return the bytes found
     * @throws IOException in case of an I/O error
     */
    protected byte[] extractContent(InputStream stream) throws IOException {

        return stream.readAllBytes();
    }

    /**
     * This method is a getter which ensures a maximal size on a mandatory
     * field.
     *
     * @param messages the list of messages
     * @param key the key to validate
     * @param value the new value
     * @param len the maximum length
     * @return the value of the field or <code>null</code>
     */
    protected String hasField(Messages messages, String key,
        String value, int len) {

        return hasField(messages, key, value, len, true, null);
    }

    /**
     * This method is a getter which ensures a maximal size.
     *
     * @param messages the list of messages
     * @param key the key to validate
     * @param value the new value
     * @param len the maximum length
     * @param mandatory the indicator for mandatory fields
     * @return the value of the field or <code>null</code>
     */
    protected String hasField(Messages messages, String key,
        String value, int len, boolean mandatory) {

        return hasField(messages, key, value, len, mandatory, null);
    }

    /**
     * This method is a getter which ensures a maximal size.
     *
     * @param messages the list of messages
     * @param key the key to validate
     * @param value the new value
     * @param len the maximum length
     * @param mandatory the indicator for mandatory fields
     * @param f the function to apply to the value at the end
     * @return the value of the field or <code>null</code>
     */
    protected String hasField(Messages messages, String key,
        String value, int len, boolean mandatory,
        Function<String, String> f) {

        if (value == null) {
            if (mandatory) {
                messages.error("Missing field", key);
            }
            return null;
        } else if (value.isBlank()) {
            if (mandatory) {
                messages.error("Empty field", key);
            }
        } else if (value.length() > len) {
            messages.error("Field too long", key, value, Integer.toString(len));
            value = value.substring(0, len);
        }
        return f == null ? value : f.apply(value);
    }

    /**
     * This method is a getter which ensures a maximal size on a mandatory
     * field.
     *
     * @param messages the list of messages
     * @param key the key to validate
     * @param value the new value
     * @param len the maximum length
     * @param f the function to apply to the value at the end
     * @return the value of the field or <code>null</code>
     */
    protected String hasField(Messages messages, String key,
        String value, int len, Function<String, String> f) {

        return hasField(messages, key, value, len, true, f);
    }

    /**
     * This method retrieves a string and checks the length.
     *
     * @param messages the messages to augment
     * @param key the key in the parameter map
     * @param value the value
     * @param len the allowed maximal length
     * @param mandatory the indicator for mandatory fields
     * @return the value
     */
    protected String[] hasListField(Messages messages, String key,
        String value, int len, boolean mandatory) {

        var array = value == null ? null : new String[]{value};
        return hasListField(messages, key, array, len, mandatory);
    }

    /**
     * This method retrieves a list of strings and checks the length.
     *
     * @param messages the messages to augment
     * @param key the name of the field
     * @param value the values
     * @param len the allowed maximal length
     * @param mandatory the indicator for mandatory fields
     * @return the list found or <code>null</code> in case of an error
     */
    protected String[] hasListField(Messages messages,
        String key, String[] value, int len, boolean mandatory) {

        return hasListField(messages, key, value, len, mandatory, null);
    }

    /**
     * This method retrieves a list of strings and checks the length.
     *
     * @param messages the messages to augment
     * @param key the name of the field
     * @param value the values
     * @param len the allowed maximal length
     * @param mandatory the indicator for mandatory fields
     * @param f the function to apply to the value at the end
     * @return the list found or <code>null</code> in case of an error
     */
    protected String[] hasListField(Messages messages,
        String key, String[] value, int len, boolean mandatory,
        Function<String, String> f) {

        if (value == null) {
            if (mandatory) {
                messages.error("Missing field", key);
            }
            return null;
        }
        String joinedValue = String.join("; ", value);
        if (joinedValue.length() > len) {
            messages.error("Field too long", key, joinedValue,
                Integer.toString(len));
        }
        if (mandatory && value.length == 0) {
            messages.error("Empty field", key);
            return null;
        }
        int i = 0;
        for (var it : value) {
            if (it.length() > len) {
                messages.error("Field too long", key, it,
                    Integer.toString(len));
                value[i] = value[i].substring(0, len);
            } else if (it.isBlank()) {
                messages.error("Empty field", key);
            }
            if (f != null) {
                value[i] = f.apply(it);
            }
        }
        return value;
    }

    /**
     * The method <code>hasUrlListField</code> provides means to check that the
     * field contains a list of URLs separated by comma or semicolon in a
     * String.
     *
     * @param messages the messages to augment
     * @param key the name of the field
     * @param value the value
     * @param len the allowed maximal length
     * @return the field value
     */
    protected String[] hasUrlListField(Messages messages,
        String key, String value, int len) {

        return hasListField(messages, key,
            value == null ? null : value.split("[,;] *"), len, false,
            it -> validateUrl(messages, key, it));
    }

    /**
     * This method <code>validateArchive</code> takes an archive file for a
     * package and analyses its contents. If the archive is not of a known type
     * then am error is produced.
     *
     * @param messages the list of messages
     * @param pkg the name of the package
     * @param filename the file name
     * @param stream the data of the upload
     * @return the messages
     */
    protected Messages validateArchive(Messages messages, String pkg,
        String filename, InputStream stream) {

        Archive archive = Archive.of(filename, stream);
        if (archive == null) {
            messages.error("Missing archive file");
            return messages;
        }
        try {
            validateArchiveFiles(messages, pkg, archive);
        } catch (IOException e) {
            messages.error("Unknown archive type", filename);
        }
        return messages;
    }

    /**
     * This method <code>validateArchiveFiles</code> checks the archive file
     * given.
     *
     * @param messages the messages to be augmented
     * @param pkg the name of the package
     * @param archive the archive input stream
     * @throws IOException in case of an I/O error
     */
    protected void validateArchiveFiles(Messages messages, String pkg,
        Archive archive)
        throws IOException {

        boolean hasReadme = false;
        boolean hasPdf = false;
        Map<String, Boolean> files = new HashMap<>();
        Map<String, String> topLevelDirs = new HashMap<>();
        Map<String, Integer> dirs = new HashMap<>();
        RemainderValidator generated = new RemainderValidator();
        try {
            for (var entry = archive.getNextEntry(); entry != null; entry =
                archive.getNextEntry()) {
                var name = entry.getName();
                if (name == null) {
                    break;
                } else if (name.endsWith("/" + pkg + ".tds.zip")) {
                    validateTds(messages, pkg, name,
                        extractContent(archive.getStream()));
                    continue;
                } else if (name.endsWith(".ins")) {
                    int i = name.indexOf("/");
                    validateIns(messages, name, generated,
                        extractContent(archive.getStream()),
                        i < 0 ? "" : name.substring(0, i + 1));
                    archive.closeEntry();
                    continue;
                }
                archive.closeEntry();
                files.put(name, Boolean.TRUE);
                int i = name.indexOf("/");
                if (i >= 0) {
                    var directory = name.substring(0, i);
                    topLevelDirs.put(directory, directory);
                }
                i = name.lastIndexOf("/");
                var n = "";
                if (i >= 0) {
                    n = name.substring(0, i);
                    var value = dirs.get(n);
                    dirs.put(n, value == null ? 1 : value + 1);
                    if (!n.matches("[/a-zA-Z_0-9.-]*")) {
                        messages.errorOrWarning("Directory name invalid", n);
                    }
                }
                if (entry.isDirectory()) {
                    continue;
                }
                if (name.matches(".*/[^a-zA-Z][^/]*")) {
                    messages.info("Name does not start with a letter", name);
                }
                Matcher m = SPECIALS_PATTERN.matcher(name);
                if (m.matches()) {
                    messages.errorOrWarning("Name contains special character",
                        name,
                        m.group(1));
                }
                if (name.equals(pkg + "/README")
                    || name.equals(pkg + "/README.md")
                    || name.equals(pkg + "/README.markdown")
                    || name.equals(pkg + "/README.txt")) {
                    hasReadme = true;
                } else if (name.endsWith(".pdf")) {
                    hasPdf = true;
                }
            }
        } catch (IOException e) {
            log.error("Error for upload of {2}: {1}", e.toString(), pkg);
            return;
        } finally {
            archive.close();
        }
        generated.checkRemainderFiles(messages, files);
        switch (topLevelDirs.size()) {
            case 0:
                messages.errorOrWarning("Missing top-level directory", pkg);
                break;
            case 1:
                String d = topLevelDirs.keySet().iterator().next();
                if (d == null || d.isEmpty()) {
                    messages.errorOrWarning("Absolute top-level directory", d,
                        pkg);
                } else if (pkg != null && d.compareToIgnoreCase(pkg) != 0) {
                    messages.errorOrWarning("Unexpected top-level directory",
                        d, pkg);
                }
                break;
            default:
                messages.errorOrWarning("Several top-level directories");
        }
        for (var it : dirs.entrySet()) {
            if (it.getValue() == 0) {
                messages.errorOrWarning("Empty directory", it.getKey());
            }
        }
        if (!hasReadme) {
            messages.errorOrWarning("Missing README in top-level directory",
                pkg);
        }
        if (!hasPdf) {
            messages.errorOrWarning("Missing PDF documentation");
        }
    }

    /**
     * Apply checks to the CTAN path.
     *
     * @param messages the messages
     * @param path the path to check
     * @param base the base directory of tex-archive on the local file system
     *
     * @return the CTAN path
     */
    protected String validateCtanPath(Messages messages, String path,
        String base) {

        path = path.trim().replaceAll("^(http|https|ftp|file):/*", "")
            .replaceAll("/+$", "");
        if (path.indexOf('/') < 0) {
            if (!new File(base + '/' + path).exists()) {
                messages.warning("CTAN path not found", path);
            } else {
                messages.warning("Illegal CTAN path", path);
            }
        } else {
            var p = path.replaceAll("/[^/]*$", "");
            if (p.isBlank()) {
                messages.warning("Illegal CTAN path", path);
            } else if (!new File(base + "/" + p).exists()) {
                messages.warning("CTAN path not found", p);
            }
        }
        return path;
    }

    /**
     * This method <code>validateIns</code> performs an analysis on a LaTeX ins
     * file and adds the generated file names to a given list.
     *
     * @param messages the messages
     * @param cand the list of generated files
     * @param insContent the content of the ins file
     * @param dir the current directory
     * @throws IOException in case of an I/O error
     */
    private void validateIns(Messages messages, String name, List<String> cand,
        byte[] insContent, String dir)
        throws IOException {

        if (insContent.length <= 0) {
            messages.error("Empty ins file", name);
            return;
        }
        cand.addAll(new InsParser().parse(dir,
            new InputStreamReader(new ByteArrayInputStream(insContent),
                StandardCharsets.UTF_8)));
    }

    /**
     * This method checks a license against the known licenses in the database.
     *
     * @param messages the messages
     * @param license the license
     *
     * @return the licenses
     */
    protected String validateLicense(Messages messages, String license) {

        if (licenseStore.getByKey(license) == null) {
            messages.warning("License not found", license);
        }
        return license;
    }

    /**
     * The method <code>validateTds</code> takes a TDS file for a package and
     * analyses its contents.
     *
     * @param messages the list of messages
     * @param pkg the name of the package
     * @param filename the name of the file
     * @param content the uploaded archive file
     */
    private void validateTds(Messages messages, String pkg, String filename,
        byte[] content) {

        if (content.length == 0) {
            messages.error("Empty tds file", filename);
            return;
        }
        new TdsValidator().check(messages, filename, pkg, content);
    }

    /**
     * Validate that the given topics are known. Otherwise add a message that
     * the topic has not been found.
     *
     * @param messages the messages to augment with the findings
     * @param topic the topic to check
     * @return the topic
     */
    protected String validateTopic(Messages messages, String topic) {

        if (topicStore.getByKey(topic) == null) {
            messages.warning("Topic not found", topic);
        }
        return topic;
    }

    /**
     * Validate that the given URL can be retrieved. For this purpose a HEAD
     * request is sent to the URL. In case of an unreachable URL an error
     * message is added to the messages.
     *
     * @param messages the messages to augment or <code>null</code> to suppress
     *     the error message
     * @param type the type of the URL for the message
     * @param url the URL to be checked
     *
     * @return the url parameter
     */
    protected String validateUrl(Messages messages, String type,
        String url) {

        if (!url.matches("^(https?://|ftp://|mailto:).*")) {
            messages.error("Field does not contain a URL", type, url);
            return url;
        }
        if (url.startsWith("mailto:")) {
            return url;
        }
        URLConnection conn;
        try {
            conn = new URI(url).toURL().openConnection();
            if (conn instanceof HttpURLConnection co) {
                co.setRequestMethod("HEAD");
                co.connect();
                if (co.getResponseCode() != 404) {
                    return url;
                }
            } else {
                return url;
            }
        } catch (IOException | URISyntaxException e) {
            // fall-through
        }
        messages.errorOrWarning("URL is not reachable", type, url);
        return url;
    }
}