AbstractSubmitValidator.java
/*
* Copyright (C) 2017-2025 Gerd Neugebauer
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.site.services.upload.util;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.ctan.site.services.upload.util.archive.Archive;
import org.ctan.site.stores.LicenseStore;
import org.ctan.site.stores.TopicStore;
import lombok.extern.slf4j.Slf4j;
import minitex.InsParser;
/**
* This is an abstract base class containing validation methods.
*
* @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
*/
@Slf4j
public abstract class AbstractSubmitValidator {
/**
* The constant <code>SPECIALS_PATTERN</code> contains the pattern to detect
* special characters.
*/
private static final Pattern SPECIALS_PATTERN =
Pattern.compile(".*([^a-zA-Z0-9_./]).*");
/**
* The field <code>licenseStore</code> contains the license store.
*/
protected LicenseStore licenseStore;
/**
* The field <code>topicStore</code> contains the topic store.
*/
private TopicStore topicStore;
/**
* This is the constructor for <code>AbstractSubmitValidator</code>.
*
* @param licenseStore the license store
* @param topicStore the topic store
*/
protected AbstractSubmitValidator(TopicStore topicStore,
LicenseStore licenseStore) {
this.topicStore = topicStore;
this.licenseStore = licenseStore;
}
/**
* This method extracts the content from an archive reader.
*
* @param stream the input stream
*
* @return the bytes found
* @throws IOException in case of an I/O error
*/
protected byte[] extractContent(InputStream stream) throws IOException {
return stream.readAllBytes();
}
/**
* This method is a getter which ensures a maximal size on a mandatory
* field.
*
* @param messages the list of messages
* @param key the key to validate
* @param value the new value
* @param len the maximum length
* @return the value of the field or <code>null</code>
*/
protected String hasField(Messages messages, String key,
String value, int len) {
return hasField(messages, key, value, len, true, null);
}
/**
* This method is a getter which ensures a maximal size.
*
* @param messages the list of messages
* @param key the key to validate
* @param value the new value
* @param len the maximum length
* @param mandatory the indicator for mandatory fields
* @return the value of the field or <code>null</code>
*/
protected String hasField(Messages messages, String key,
String value, int len, boolean mandatory) {
return hasField(messages, key, value, len, mandatory, null);
}
/**
* This method is a getter which ensures a maximal size.
*
* @param messages the list of messages
* @param key the key to validate
* @param value the new value
* @param len the maximum length
* @param mandatory the indicator for mandatory fields
* @param f the function to apply to the value at the end
* @return the value of the field or <code>null</code>
*/
protected String hasField(Messages messages, String key,
String value, int len, boolean mandatory,
Function<String, String> f) {
if (value == null) {
if (mandatory) {
messages.error("Missing field", key);
}
return null;
} else if (value.isBlank()) {
if (mandatory) {
messages.error("Empty field", key);
}
} else if (value.length() > len) {
messages.error("Field too long", key, value, Integer.toString(len));
value = value.substring(0, len);
}
return f == null ? value : f.apply(value);
}
/**
* This method is a getter which ensures a maximal size on a mandatory
* field.
*
* @param messages the list of messages
* @param key the key to validate
* @param value the new value
* @param len the maximum length
* @param f the function to apply to the value at the end
* @return the value of the field or <code>null</code>
*/
protected String hasField(Messages messages, String key,
String value, int len, Function<String, String> f) {
return hasField(messages, key, value, len, true, f);
}
/**
* This method retrieves a string and checks the length.
*
* @param messages the messages to augment
* @param key the key in the parameter map
* @param value the value
* @param len the allowed maximal length
* @param mandatory the indicator for mandatory fields
* @return the value
*/
protected String[] hasListField(Messages messages, String key,
String value, int len, boolean mandatory) {
var array = value == null ? null : new String[]{value};
return hasListField(messages, key, array, len, mandatory);
}
/**
* This method retrieves a list of strings and checks the length.
*
* @param messages the messages to augment
* @param key the name of the field
* @param value the values
* @param len the allowed maximal length
* @param mandatory the indicator for mandatory fields
* @return the list found or <code>null</code> in case of an error
*/
protected String[] hasListField(Messages messages,
String key, String[] value, int len, boolean mandatory) {
return hasListField(messages, key, value, len, mandatory, null);
}
/**
* This method retrieves a list of strings and checks the length.
*
* @param messages the messages to augment
* @param key the name of the field
* @param value the values
* @param len the allowed maximal length
* @param mandatory the indicator for mandatory fields
* @param f the function to apply to the value at the end
* @return the list found or <code>null</code> in case of an error
*/
protected String[] hasListField(Messages messages,
String key, String[] value, int len, boolean mandatory,
Function<String, String> f) {
if (value == null) {
if (mandatory) {
messages.error("Missing field", key);
}
return null;
}
String joinedValue = String.join("; ", value);
if (joinedValue.length() > len) {
messages.error("Field too long", key, joinedValue,
Integer.toString(len));
}
if (mandatory && value.length == 0) {
messages.error("Empty field", key);
return null;
}
int i = 0;
for (var it : value) {
if (it.length() > len) {
messages.error("Field too long", key, it,
Integer.toString(len));
value[i] = value[i].substring(0, len);
} else if (it.isBlank()) {
messages.error("Empty field", key);
}
if (f != null) {
value[i] = f.apply(it);
}
}
return value;
}
/**
* The method <code>hasUrlListField</code> provides means to check that the
* field contains a list of URLs separated by comma or semicolon in a
* String.
*
* @param messages the messages to augment
* @param key the name of the field
* @param value the value
* @param len the allowed maximal length
* @return the field value
*/
protected String[] hasUrlListField(Messages messages,
String key, String value, int len) {
return hasListField(messages, key,
value == null ? null : value.split("[,;] *"), len, false,
it -> validateUrl(messages, key, it));
}
/**
* This method <code>validateArchive</code> takes an archive file for a
* package and analyses its contents. If the archive is not of a known type
* then am error is produced.
*
* @param messages the list of messages
* @param pkg the name of the package
* @param filename the file name
* @param stream the data of the upload
* @return the messages
*/
protected Messages validateArchive(Messages messages, String pkg,
String filename, InputStream stream) {
Archive archive = Archive.of(filename, stream);
if (archive == null) {
messages.error("Missing archive file");
return messages;
}
try {
validateArchiveFiles(messages, pkg, archive);
} catch (IOException e) {
messages.error("Unknown archive type", filename);
}
return messages;
}
/**
* This method <code>validateArchiveFiles</code> checks the archive file
* given.
*
* @param messages the messages to be augmented
* @param pkg the name of the package
* @param archive the archive input stream
* @throws IOException in case of an I/O error
*/
protected void validateArchiveFiles(Messages messages, String pkg,
Archive archive)
throws IOException {
boolean hasReadme = false;
boolean hasPdf = false;
Map<String, Boolean> files = new HashMap<>();
Map<String, String> topLevelDirs = new HashMap<>();
Map<String, Integer> dirs = new HashMap<>();
RemainderValidator generated = new RemainderValidator();
try {
for (var entry = archive.getNextEntry(); entry != null; entry =
archive.getNextEntry()) {
var name = entry.getName();
if (name == null) {
break;
} else if (name.endsWith("/" + pkg + ".tds.zip")) {
validateTds(messages, pkg, name,
extractContent(archive.getStream()));
continue;
} else if (name.endsWith(".ins")) {
int i = name.indexOf("/");
validateIns(messages, name, generated,
extractContent(archive.getStream()),
i < 0 ? "" : name.substring(0, i + 1));
archive.closeEntry();
continue;
}
archive.closeEntry();
files.put(name, Boolean.TRUE);
int i = name.indexOf("/");
if (i >= 0) {
var directory = name.substring(0, i);
topLevelDirs.put(directory, directory);
}
i = name.lastIndexOf("/");
var n = "";
if (i >= 0) {
n = name.substring(0, i);
var value = dirs.get(n);
dirs.put(n, value == null ? 1 : value + 1);
if (!n.matches("[/a-zA-Z_0-9.-]*")) {
messages.errorOrWarning("Directory name invalid", n);
}
}
if (entry.isDirectory()) {
continue;
}
if (name.matches(".*/[^a-zA-Z][^/]*")) {
messages.info("Name does not start with a letter", name);
}
Matcher m = SPECIALS_PATTERN.matcher(name);
if (m.matches()) {
messages.errorOrWarning("Name contains special character",
name,
m.group(1));
}
if (name.equals(pkg + "/README")
|| name.equals(pkg + "/README.md")
|| name.equals(pkg + "/README.markdown")
|| name.equals(pkg + "/README.txt")) {
hasReadme = true;
} else if (name.endsWith(".pdf")) {
hasPdf = true;
}
}
} catch (IOException e) {
log.error("Error for upload of {2}: {1}", e.toString(), pkg);
return;
} finally {
archive.close();
}
generated.checkRemainderFiles(messages, files);
switch (topLevelDirs.size()) {
case 0:
messages.errorOrWarning("Missing top-level directory", pkg);
break;
case 1:
String d = topLevelDirs.keySet().iterator().next();
if (d == null || d.isEmpty()) {
messages.errorOrWarning("Absolute top-level directory", d,
pkg);
} else if (pkg != null && d.compareToIgnoreCase(pkg) != 0) {
messages.errorOrWarning("Unexpected top-level directory",
d, pkg);
}
break;
default:
messages.errorOrWarning("Several top-level directories");
}
for (var it : dirs.entrySet()) {
if (it.getValue() == 0) {
messages.errorOrWarning("Empty directory", it.getKey());
}
}
if (!hasReadme) {
messages.errorOrWarning("Missing README in top-level directory",
pkg);
}
if (!hasPdf) {
messages.errorOrWarning("Missing PDF documentation");
}
}
/**
* Apply checks to the CTAN path.
*
* @param messages the messages
* @param path the path to check
* @param base the base directory of tex-archive on the local file system
*
* @return the CTAN path
*/
protected String validateCtanPath(Messages messages, String path,
String base) {
path = path.trim().replaceAll("^(http|https|ftp|file):/*", "")
.replaceAll("/+$", "");
if (path.indexOf('/') < 0) {
if (!new File(base + '/' + path).exists()) {
messages.warning("CTAN path not found", path);
} else {
messages.warning("Illegal CTAN path", path);
}
} else {
var p = path.replaceAll("/[^/]*$", "");
if (p.isBlank()) {
messages.warning("Illegal CTAN path", path);
} else if (!new File(base + "/" + p).exists()) {
messages.warning("CTAN path not found", p);
}
}
return path;
}
/**
* This method <code>validateIns</code> performs an analysis on a LaTeX ins
* file and adds the generated file names to a given list.
*
* @param messages the messages
* @param cand the list of generated files
* @param insContent the content of the ins file
* @param dir the current directory
* @throws IOException in case of an I/O error
*/
private void validateIns(Messages messages, String name, List<String> cand,
byte[] insContent, String dir)
throws IOException {
if (insContent.length <= 0) {
messages.error("Empty ins file", name);
return;
}
cand.addAll(new InsParser().parse(dir,
new InputStreamReader(new ByteArrayInputStream(insContent),
StandardCharsets.UTF_8)));
}
/**
* This method checks a license against the known licenses in the database.
*
* @param messages the messages
* @param license the license
*
* @return the licenses
*/
protected String validateLicense(Messages messages, String license) {
if (licenseStore.getByKey(license) == null) {
messages.warning("License not found", license);
}
return license;
}
/**
* The method <code>validateTds</code> takes a TDS file for a package and
* analyses its contents.
*
* @param messages the list of messages
* @param pkg the name of the package
* @param filename the name of the file
* @param content the uploaded archive file
*/
private void validateTds(Messages messages, String pkg, String filename,
byte[] content) {
if (content.length == 0) {
messages.error("Empty tds file", filename);
return;
}
new TdsValidator().check(messages, filename, pkg, content);
}
/**
* Validate that the given topics are known. Otherwise add a message that
* the topic has not been found.
*
* @param messages the messages to augment with the findings
* @param topic the topic to check
* @return the topic
*/
protected String validateTopic(Messages messages, String topic) {
if (topicStore.getByKey(topic) == null) {
messages.warning("Topic not found", topic);
}
return topic;
}
/**
* Validate that the given URL can be retrieved. For this purpose a HEAD
* request is sent to the URL. In case of an unreachable URL an error
* message is added to the messages.
*
* @param messages the messages to augment or <code>null</code> to suppress
* the error message
* @param type the type of the URL for the message
* @param url the URL to be checked
*
* @return the url parameter
*/
protected String validateUrl(Messages messages, String type,
String url) {
if (!url.matches("^(https?://|ftp://|mailto:).*")) {
messages.error("Field does not contain a URL", type, url);
return url;
}
if (url.startsWith("mailto:")) {
return url;
}
URLConnection conn;
try {
conn = new URI(url).toURL().openConnection();
if (conn instanceof HttpURLConnection co) {
co.setRequestMethod("HEAD");
co.connect();
if (co.getResponseCode() != 404) {
return url;
}
} else {
return url;
}
} catch (IOException | URISyntaxException e) {
// fall-through
}
messages.errorOrWarning("URL is not reachable", type, url);
return url;
}
}