ArchiveFilesUpdateService.java

/*
 * Copyright © 2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */

package org.ctan.site.services.texarchive;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.ctan.site.CtanConfiguration;
import org.ctan.site.CtanConfiguration.TexArchiveConfig;
import org.ctan.site.domain.archive.ArchiveFile;
import org.ctan.site.domain.archive.ArchiveFile.FileType;
import org.ctan.site.services.upload.util.archive.Archive;
import org.ctan.site.services.upload.util.archive.Archive.Entry;
import org.ctan.site.stores.ArchiveFileStore;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import minitex.InsParser;
import minitex.MiniTexParser;

/**
 * The class <code>ArchiveFilesUpdateService</code> contains the service for
 * scanning the TeX archive directory and updating the database and search index
 * accordingly.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
public class ArchiveFilesUpdateService {

    /**
     * The field <code>base</code> contains the base directory.
     */
    private File base;

    /**
     * The field <code>strip</code> contains the length of the initial segment
     * to be stripped.
     */
    private int strip;

    /**
     * The field <code>store</code> contains the store.
     */
    private ArchiveFileStore store;

    /**
     * This is the constructor for <code>ArchiveFilesUpdateService</code>.
     *
     * @param cfg the configuration
     * @param store the store
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP2")
    public ArchiveFilesUpdateService(CtanConfiguration cfg,
        ArchiveFileStore store) {

        this.store = store;
        TexArchiveConfig texArchive = cfg.getTexArchive();
        String directory = texArchive.getDirectory();
        this.base = new File(directory);
        this.strip = base.getAbsolutePath().length();
    }

    /**
     * The method <code>addUpdate</code> provides means to save or update an
     * archive file from a file.
     *
     * @param archiveFile the archive file
     * @param file the file
     * @param type the type
     */
    private void addUpdate(ArchiveFile archiveFile, File file,
        FileType type) {

        addUpdate(archiveFile,
            file.getName(),
            file.getPath().substring(strip), type,
            file.lastModified());
    }

    /**
     * The method <code>addUpdate</code> provides means to save or update an
     * archive file from attributes.
     *
     * @param archiveFile the archive file
     * @param name the name
     * @param path the path
     * @param type the type
     * @param mtime the modification time
     */
    private void addUpdate(ArchiveFile archiveFile, String name, String path,
        FileType type,
        long mtime) {

        if (archiveFile == null) {
            archiveFile = ArchiveFile.builder()
                .type(type)
                .name(name)
                .path(path)
                .mtime(mtime)
                .build();
        } else {
            archiveFile.setMtime(mtime);
        }
        store.save(archiveFile);
    }

    /**
     * The method <code>find</code> provides means to find an element in a list
     * with a given name.
     *
     * @param list the list to search in
     * @param name the name to find
     * @return the element found or <code>null</code>
     */
    private ArchiveFile find(List<ArchiveFile> list, String name) {

        for (var it : list) {
            if (it.getName().equals(name)) {
                return it;
            }
        }
        return null;
    }

    /**
     * The method <code>removeDirectory</code> provides means to delete a
     * directory from the archive file database table.
     *
     * @param dir the directory to be removed
     */
    private void removeDirectory(ArchiveFile dir) {

        store.remove(dir);
        for (var it : store.findAllByPath(dir.getPath())) {
            store.remove(it);
        }
    }

    /**
     * The method <code>update</code> provides means to traverse the directory
     * tree and update the database accordingly.
     *
     * @throws IOException in case of an I/O error
     */
    public void update() throws IOException {

        List<ArchiveFile> directories = store.findAllByType(FileType.DIRECTORY);
        List<ArchiveFile> generated = store.findAllByType(FileType.GENERATED);

        update(base, directories, generated);

        for (var del : directories) {
            removeDirectory(del);
        }
        store.remove(generated);
    }

    /**
     * The method <code>update</code> provides means to update a certain
     * directory.
     *
     * @param dir the directory to update
     * @throws FileNotFoundException in case of an I/O error
     */
    private void update(@NonNull File dir, List<ArchiveFile> directories,
        List<ArchiveFile> generated)
        throws FileNotFoundException {

        String path = dir.toString().substring(strip);
        List<ArchiveFile> toDelete = new ArrayList<>(store.findAllByPath(path));

        File[] listFiles = dir.listFiles();
        if (listFiles == null) {
            throw new FileNotFoundException();
        }
        for (File file : listFiles) {
            String name = file.getName();
            // ignore dot files
            if (name.startsWith(".")) {
                continue;
            }
            try {
                // ignore symlinks
                if (!file.getCanonicalPath().equals(file.getAbsolutePath())) {
                    continue;
                }
            } catch (IOException e) {
                continue;
            }

            ArchiveFile archiveFile = find(toDelete, name);
            if (archiveFile == null) {
                archiveFile = ArchiveFile.builder()
                    .type(FileType.FILE)
                    .name(file.getName())
                    .path(file.getPath().substring(strip))
                    .mtime(file.lastModified())
                    .build();
            } else {
                toDelete.remove(archiveFile);
                if (archiveFile.getMtime() == file.lastModified()) {
                    continue;
                }
            }

            try {
                if (file.isDirectory()) {
                    addUpdate(archiveFile, file, FileType.DIRECTORY);
                    update(file, directories, generated);
                    directories.remove(archiveFile);
                } else if (file.isFile()) {
                    if (name.endsWith(".zip")
                        || name.endsWith(".tgz")
                        || name.endsWith(".tar.gz")) {
                        addUpdate(archiveFile, file, FileType.ARCHIVE);
                        updateArchive(file, archiveFile);
                        continue;
                    }
                    int i = name.lastIndexOf('.');
                    switch (i >= 0 ? name.substring(i) : "") {
                        case ".dtx":
                            updateDtx(file, archiveFile);
                            break;
                        case ".ins":
                            updateIns(file, archiveFile);
                            generated.remove(archiveFile);
                            break;
                        case ".latex", ".LaTeX", ".ltx", ".tex", ".TeX":
                            updateTex(file, archiveFile);
                            break;
                        case ".pdf":
                            updatePdf(file, archiveFile);
                            break;
                        default:
                            // done
                    }

                    addUpdate(archiveFile, file, FileType.FILE);
                }
            } catch (IOException e) {
                log.warn("Problem analysing file: {} {}",
                    file.getAbsolutePath(),
                    e.getMessage());
            }
        }

        store.remove(toDelete);
    }

    /**
     * The method <code>updateZip</code> provides means to analyse an archive
     * file and update the search index accordingly.
     *
     * @param file the current file
     * @param archiveFile the archive file
     * @throws IOException in case of an I/O error
     */
    private void updateArchive(File file, ArchiveFile archiveFile)
        throws IOException {

        try (var in = new FileInputStream(file)) {
            Archive arc = Archive.of(file.getName(), in);
            if (arc == null) {
                // TODO
            }
            for (Entry entry = arc.getNextEntry(); entry != null; entry =
                arc.getNextEntry()) {
                String name = entry.getName();

                // TODO
            }
        }

        // System.err.println(file.getPath());
    }

    /**
     * The method <code>updateDtx</code> provides means to means to analyse a
     * dtx file and update the search index accordingly.
     *
     * @param file the current file
     * @param archiveFile the archive file
     * @throws IOException in case of an I/O error
     */
    private void updateDtx(File file, ArchiveFile archiveFile) {

        // TODO Auto-generated method stub
    }

    /**
     * The method <code>updateIns</code> provides means to means to analyse an
     * ins file and update the search index accordingly.
     *
     * @param file the current file
     * @param archiveFile the archive file
     * @throws IOException in case of an I/O error
     */
    private void updateIns(File file, ArchiveFile archiveFile)
        throws FileNotFoundException,
            IOException {

        String path = file.getPath().substring(strip);
        List<String> generated = new InsParser().parse(path + "#",
            new InputStreamReader(new FileInputStream(file),
                StandardCharsets.UTF_8));

        List<ArchiveFile> toDelete =
            new ArrayList<>(store.findAllByPath(path + "#"));
        long lastModified = file.lastModified();

        for (var gen : generated) {
            ArchiveFile af =
                find(toDelete, gen.substring(gen.lastIndexOf('/') + 1));
            toDelete.remove(af);

            addUpdate(af,
                gen.substring(gen.lastIndexOf('/') + 1),
                gen.substring(0, gen.lastIndexOf('#') + 1),
                FileType.GENERATED,
                lastModified);
        }

        store.remove(toDelete);
    }

    /**
     * The method <code>updatePdf</code> provides means to analyse a PDF file
     * and update the search index accordingly.
     *
     * @param file the current file
     * @param archiveFile the archive file
     * @throws IOException in case of an I/O error
     */
    private void updatePdf(File file, ArchiveFile archiveFile)
        throws IOException {

        PDDocument doc = Loader.loadPDF(file);
        PDDocumentInformation info = doc.getDocumentInformation();
        ArchiveFile af = ArchiveFile.builder()
            .type(FileType.FILE)
            .name(file.getName())
            .title(info.getTitle())
            .path(file.getPath().substring(strip))
            .mtime(file.lastModified())
            .build();

        addUpdate(af, file, FileType.FILE);
    }

    /**
     * The method <code>updateTex</code> provides means to means to analyse a
     * TeX file and update the search index accordingly.
     *
     * @param file the current file
     * @param archiveFile the archive file
     * @throws IOException in case of an I/O error
     */
    private void updateTex(File file, ArchiveFile archiveFile)
        throws IOException {

        MiniTexParser parser =
            new MiniTexParser(new FileReader(file, Charset.forName("UTF-8")));
        Map<String, List<String>> map = parser.parse();

        List<String> title = map.get("title");
        if (title != null) {
            archiveFile.setTitle(String.join("", title));
        }
    }

}