UploadService.java

/*
 * Copyright © 2023-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.upload;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.ctan.site.CtanConfiguration.UploadConfig;
import org.ctan.site.domain.catalogue.Upload;
import org.ctan.site.stores.UploadStore;
import org.jsoup.Jsoup;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.NonNull;

/**
 * The class <code>UploadService</code> contains the service to access the
 * incoming directory.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
public class UploadService {

    /**
     * The class <code>IncomingTo</code> contains the transport object for the
     * paged list of incoming files.
     */
    @Getter
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @SuppressFBWarnings(value = "EI_EXPOSE_REP")
    public static class IncomingTo {

        /**
         * The field <code>page</code> contains the current page. It is 0-based.
         */
        private long page;

        /**
         * The field <code>size</code> contains the page size.
         */
        private long size;

        /**
         * The field <code>length</code> contains the total number of files.
         */
        private long length;

        /**
         * The field <code>files</code> contains the page list of file.
         */
        private List<String> files;
    }

    /**
     * The field <code>statDate</code> contains the date of the most recent stat
     * call.
     */
    private LocalDateTime statDate = LocalDateTime.MIN;

    /**
     * The field <code>statData</code> contains the cached data.
     */
    private Map<String, Integer> statData = null;

    /**
     * The field <code>base</code> contains the incoming directory.
     */
    private String incoming = null;

    /**
     * The field <code>addendum</code> contains the location of the addendum
     * from the configuration.
     */
    private String addendum = null;

    /**
     * The field <code>uploadStore</code> contains the underlying store.
     */
    private @NonNull UploadStore uploadStore;

    /**
     * This is the constructor for the class <code>UploadService</code>.
     *
     * @param config the CTAN configuration
     * @param uploadStore the underlying store
     */
    @SuppressFBWarnings(value = {"CT_CONSTRUCTOR_THROW", "EI_EXPOSE_REP2"})
    public UploadService(@NonNull UploadConfig config,
        @NonNull UploadStore uploadStore) {

        this.uploadStore = uploadStore;
        incoming = config.getIncoming();
        if (incoming == null) {
            throw new IllegalArgumentException(
                "Missing configuration upload.directory");
        }
        if (!incoming.endsWith("/")) {
            incoming = incoming + "/";
        }
        addendum = config.getAddendum();
        if (addendum == null) {
            throw new IllegalArgumentException(
                "Missing configuration upload.addendum");
        }
    }

    /**
     * The method <code>getAddendum</code> provides means to retrieve the
     * addendum file.
     *
     * @return the addendum
     * @throws IOException in case of an I/O error
     */
    public Map<String, String> getAddendum() throws IOException {

        var doc = Jsoup.parse(new File(addendum));
        var body = doc.getElementsByTag("body")
            .html()
            .replaceAll(
                "&amp;(TeX|LaTeX|TeXLaTeX|LaTeXTeX|BibTeX|LaTeX2e|LaTeXe);",
                "&$1;");
        return Map.of("title", doc.getElementsByTag("title").text(),
            "content", body,
            "lang", "en");
    }

    /**
     * The method <code>getIncoming</code> provides means to retrieve the list
     * of unprocessed packages. The packages are taken from the incoming
     * directory in the configuration.
     *
     * <p>
     * The list is paged. Thus several segments can be requested.
     * </p>
     *
     * <p>
     * Directories starting with a dot are silently ignored.
     * </p>
     *
     * @param query the query pattern
     * @param page the current page
     * @param size the page size
     * @param orderBy the attribute to order by
     * @param asc sort ascending
     *
     * @return the list of current uploads
     * @throws FileNotFoundException in case that the incoming directory does
     *     not exist
     */
    public IncomingTo getIncoming(String query, long page, long size,
        String orderBy, Boolean asc)
        throws FileNotFoundException {

        if (page < 0L || size <= 0L) {
            throw new IllegalArgumentException();
        }
        var dir = new File(incoming);
        if (!dir.isDirectory()) {
            throw new FileNotFoundException();
        }
        var list = dir
            .listFiles((file, s) -> (new File(file, s).isDirectory()
                && !s.startsWith(".")));
        if (list == null) {
            throw new FileNotFoundException();
        }
        var len = list.length;
        if (page * size >= list.length) {
            list = new File[]{};
        }
        List<String> files = Arrays.asList(list)
            .stream()
            .map((File x) -> x.getName())
            .sorted((a, b) -> a.compareToIgnoreCase(b))
            .skip(size * page)
            .limit(size)
            .collect(Collectors.toList());
        return IncomingTo.builder()
            .files(files)
            .length(len)
            .page(page)
            .size(size)
            .build();
    }

    /**
     * The method <code>getStatistics</code> provides means to retrieve the
     * cached upload statistics. The upload statistics contains the number of
     * uploads per month.
     *
     * <p>
     * The statistics are cached and updated every 30 minutes.
     * </p>
     *
     * @return the statistics
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP")
    public synchronized Map<String, Integer> getStatistics() {

        var now = LocalDateTime.now();
        if (statData == null
            || Duration.between(statDate, now).getSeconds() > 60L * 30L) {
            statData = uploadStore.getStatistics();
            statDate = now;
        }
        return statData;
    }

    /**
     * The method <code>save</code> provides means to store an entity.
     *
     * @param upload the entity
     * @return the entity
     */
    public Upload save(Upload upload) {

        return uploadStore.save(upload);
    }
}