IndexingSession.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.search.base;

import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import jakarta.validation.constraints.NotNull;
import lombok.Builder;
import lombok.Builder.Default;
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;

/**
 * The class <code>IndexingSession</code> contains the session which combines
 * various update operations on the search indices.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
@SuppressFBWarnings(value = "EI_EXPOSE_REP")
public class IndexingSession extends IndexingBase implements Closeable {

    /**
     * The class <code>IndexArgs</code> contains the transport object for
     * passing in indexing arguments.
     */
    @Getter
    @Builder
    public static class IndexArgs {

        /**
         * The field <code>locale</code> contains the two letter locale. It may
         * have the values "en" or "de". The default is "en".
         */
        @Setter
        @Default
        private String locale = "en";

        /**
         * The field <code>title</code> contains the title of the index item.
         */
        private String title;

        /**
         * The field <code>modified</code> contains the modification time stamp.
         */
        private Long modified;

        /**
         * The field <code>content</code> contains the content text.
         */
        private String[] content;

        /**
         * The field <code>tags</code> contains the tags to add.
         */
        private String tags;

        /**
         * The field <code>topics</code> contains the list of topics. They are
         * space separated.
         */
        private String topics;

        /**
         * The field <code>pkg</code> contains the package.
         */
        private String pkg;

        /**
         * The field <code>type</code> contains the index type.
         */
        private IndexType type;

        /**
         * The field <code>display</code> contains the string to display.
         */
        private String display;

        /**
         * The field <code>clipDisplay</code> contains the indicator whether the
         * display should be clipped.
         */
        private boolean clipDisplay;
    }

    /**
     * The field <code>writers</code> contains the base directory per type.
     */
    private Map<String, IndexWriter> writers =
        new HashMap<String, IndexWriter>();
    // /**
    // * The field <code>indexBase</code> contains the base directory.
    // */
    // private File indexBase;

    /**
     * This is the constructor for <code>IndexingSession</code>.
     *
     * @param indexBase the base directory
     * @throws FileNotFoundException in case of an improper directory
     */
    @SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
    public IndexingSession(@NonNull File indexBase)
        throws FileNotFoundException {

        super(indexBase);
    }

    /**
     * {@inheritDoc}
     *
     * @see java.io.Closeable#close()
     */
    @Override
    public void close() throws IOException {

        for (IndexWriter w : writers.values()) {
            w.commit();
            w.close();
        }
    }

    /**
     * The method <code>getWriter</code> provides means to get an index writer.
     * The index writers are cached. Thus a cached value is returned if one
     * exists. Otherwise a new one is created and added to the cache.
     *
     * @param type the type
     * @param locale the locale
     * @return the cached or a new index writer
     * @throws IOException in case of an I/O error
     */
    private IndexWriter getWriter(IndexType type, String locale)
        throws IOException {

        String index = directory(type, locale);
        var writer = writers.get(index);
        if (writer == null) {
            writer = new IndexWriter(
                open(index),
                new IndexWriterConfig(CtanAnalyzer.INSTANCE));
            writers.put(index, writer);
        }
        return writer;
    }

    /**
     * The method <code>remove</code> provides means to remove an item from the
     * search index.
     *
     * @param indexType the index type
     * @param indexPath the key for identifying the indexed item
     * @param locale the locale
     * @throws IOException in case of an I/O error
     */
    public void remove(@NotNull IndexType indexType,
        @NotNull String indexPath,
        @NotNull String locale)
        throws IOException {

        var writer = getWriter(indexType, locale);
        writer.deleteDocuments(new Term(Fields.PATH, indexPath));
    }

    /**
     * The method <code>updateIndex</code> provides means to update the search
     * index.
     *
     * @param path the URL of the item
     * @param args the arguments
     *
     * @throws IOException in case of an I/O error
     */
    public void updateIndex(@NotNull String path, @NotNull IndexArgs args)
        throws IOException {

        if (args.title == null) {
            args.title = "";
            log.warn(path + " has no title");
        }
        if (args.modified == null) {
            args.modified = new Date().getTime();
        }
        var doc = new Document();
        for (String it : args.content) {
            doc.add(new TextField(Fields.DEFAULT, it, Store.NO));
        }
        if (args.tags != null) {
            doc.add(new TextField(Fields.TAGS, args.tags, Store.YES));
        }
        if (args.topics != null) {
            var field = new TextField(Fields.TAGS, args.topics, Store.YES);
            // field.setBoost(4.0f);
            doc.add(field);
        }
        if (args.pkg != null) {
            var field =
                new TextField(Fields.PKG, args.pkg + '$',
                    Store.YES);
            // field.setBoost(64.0f);
            doc.add(field);
        }
        doc.add(new TextField(Fields.DEFAULT,
            path.replaceAll("\\..*$", ""), Store.NO));
        doc.add(new TextField(Fields.DEFAULT, args.title, Store.NO));
        var field = new TextField(Fields.TITLE, args.title, Store.YES);
        // field.setBoost(2.0f)
        doc.add(field);
        doc.add(new StringField(Fields.PATH, path, Store.YES));
        var display = args.display;
        if (display != null) {
            if (args.clipDisplay) {
                var i = display.indexOf(". ");
                i = Math.min((i < 0 ? display.length() : i), 128);
                display = display.substring(0, i) + "...";
            }
            doc.add(new TextField(Fields.DISPLAY, display, Store.YES));
        }
        doc.add(new LongField(Fields.MTIME, args.modified, Store.YES));
        // doc.add(new StringField(Fields.LOCALE, args.locale, Store.YES));
        IndexType type = args.type;
        if (type == null) {
            return;
        }
        doc.add(new TextField(Fields.TYPE, type.getKey(), Store.YES));
        var keyTerm = new Term(Fields.PATH, path);
        getWriter(type, args.locale).updateDocument(keyTerm, doc);
    }
}