IndexingService.java

/*
 * Copyright © 2012-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */

package org.ctan.site.services.search.base;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.ctan.site.CtanConfiguration.CtanConfig;
import org.ctan.site.CtanConfiguration.IndexConfig;
import org.ctan.site.stores.base.IndexingStore;

import lombok.extern.slf4j.Slf4j;

/**
 * The class <code>SearchService</code> implements a service to perform a
 * full-text search.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
public class IndexingService extends IndexingBase {

    /**
     * This is the constructor for <code>IndexingService</code>.
     *
     * @param base the base directory for the search index
     */
    public IndexingService(File base) {

        super(base);
    }

    /**
     * This is the constructor for the class <code>IndexingService</code>.
     *
     * @param config the index configuration
     * @throws FileNotFoundException in case that one of the index directories
     *     does not exist
     */
    public IndexingService(IndexConfig config)
        throws FileNotFoundException {

        super(config);
    }

    /**
     * Remove all files in the index directory.
     *
     * @param type the index type
     *
     * @return {@code true} iff the directory and the files contained could be
     *     deleted
     */
    public boolean clearIndexDir(IndexType type) {

        var ret = true;
        for (var locale : CtanConfig.LOCALES) {
            var files = indexPath(type, locale).toFile().listFiles();
            if (files != null) {
                for (File f : files) {
                    ret &= f.delete();
                }
            }
        }
        return ret;
    }

    /**
     * Create an index writer.
     *
     * @param type the index type
     * @param locale the language code
     *
     * @return the new index writer
     *
     * @throws CorruptIndexException in case of a problem with the index
     * @throws IOException in case of an I/O error
     */
    protected IndexWriter createWriter(IndexType type, String locale)
        throws CorruptIndexException,
            IOException {

        return new IndexWriter(open(type, locale),
            new IndexWriterConfig(CtanAnalyzer.INSTANCE));
    }

    // /**
    // * This method retrieves the modification time for a path in an index.
    // *
    // * @param type the index type
    // * @param path the URL path
    // *
    // * @return the date of last modification or 0
    // */
    // long getLastModified(IndexType type, String path) {
    //
    // long lastModified = 0l
    // Directory directory = FSDirectory.open(getIndexDir(type))
    // try {
    // IndexSearcher searcher = new IndexSearcher(directory)
    // Query query = new TermQuery(new Term(Fields.PATH, path))
    // TopDocs docs = searcher.search(query, 1)
    // def d = docs.scoreDocs
    // if (d.length() >= 1) {
    // Document doc = searcher.doc(d[0].doc)
    // lastModified = doc.getField(Fields.MTIME).numericValue().longValue()
    // }
    // } finally {
    // directory.close()
    // }
    // }

    /**
     * The method <code>indexingSession</code> provides means to open a new
     * indexing session.
     *
     * @return the new indexing session
     * @throws FileNotFoundException if the file is not found
     */
    public IndexingSession indexingSession() throws FileNotFoundException {

        return new IndexingSession(getIndexBase());
    }

    /**
     * Initialise an index directory in case it is empty. Existing indices are
     * left unchanged.
     *
     * @param type the index type
     * @throws IOException in case of an I/O error
     * @throws CorruptIndexException in case of an error
     */
    private void initializeIndex(IndexType type)
        throws CorruptIndexException,
            IOException {

        log.info("Initializing ${type}");

        for (var locale : CtanConfig.LOCALES) {
            File dir = indexPath(type, locale).toFile();
            if (!dir.mkdirs() && !dir.isDirectory()) {
                throw new IOException();
            }
            createWriter(type, locale).close();
        }
    }

    /**
     * This method iterates through all index types and creates the associated
     * index.
     *
     * @param stores the array of stores to use for indexing
     * @throws IOException in case of an I/O error
     * @throws CorruptIndexException in case of an error
     */
    public void initializeIndices(IndexingStore... stores)
        throws CorruptIndexException,
            IOException {

        log.info("Creating index in " + getIndexBase().toString());
        // try (var session = indexingSession()) {
        for (IndexType it : IndexType.values()) {
            initializeIndex(it);
        }

        for (var it : stores) {
            it.updateIndex();
        }
        // }
    }

}