IndexingSession.java
/*
* Copyright © 2024-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.site.services.search.base;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import jakarta.validation.constraints.NotNull;
import lombok.Builder;
import lombok.Builder.Default;
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
/**
* The class <code>IndexingSession</code> contains the session which combines
* various update operations on the search indices.
*
* @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
*/
@Slf4j
@SuppressFBWarnings(value = "EI_EXPOSE_REP")
public class IndexingSession extends IndexingBase implements Closeable {
/**
* The class <code>IndexArgs</code> contains the transport object for
* passing in indexing arguments.
*/
@Getter
@Builder
public static class IndexArgs {
/**
* The field <code>locale</code> contains the two letter locale. It may
* have the values "en" or "de". The default is "en".
*/
@Setter
@Default
private String locale = "en";
/**
* The field <code>title</code> contains the title of the index item.
*/
private String title;
/**
* The field <code>modified</code> contains the modification time stamp.
*/
private Long modified;
/**
* The field <code>content</code> contains the content text.
*/
private String[] content;
/**
* The field <code>tags</code> contains the tags to add.
*/
private String tags;
/**
* The field <code>topics</code> contains the list of topics. They are
* space separated.
*/
private String topics;
/**
* The field <code>pkg</code> contains the package.
*/
private String pkg;
/**
* The field <code>type</code> contains the index type.
*/
private IndexType type;
/**
* The field <code>display</code> contains the string to display.
*/
private String display;
/**
* The field <code>clipDisplay</code> contains the indicator whether the
* display should be clipped.
*/
private boolean clipDisplay;
}
/**
* The field <code>writers</code> contains the base directory per type.
*/
private Map<String, IndexWriter> writers =
new HashMap<String, IndexWriter>();
// /**
// * The field <code>indexBase</code> contains the base directory.
// */
// private File indexBase;
/**
* This is the constructor for <code>IndexingSession</code>.
*
* @param indexBase the base directory
* @throws FileNotFoundException in case of an improper directory
*/
@SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
public IndexingSession(@NonNull File indexBase)
throws FileNotFoundException {
super(indexBase);
}
/**
* {@inheritDoc}
*
* @see java.io.Closeable#close()
*/
@Override
public void close() throws IOException {
for (IndexWriter w : writers.values()) {
w.commit();
w.close();
}
}
/**
* The method <code>getWriter</code> provides means to get an index writer.
* The index writers are cached. Thus a cached value is returned if one
* exists. Otherwise a new one is created and added to the cache.
*
* @param type the type
* @param locale the locale
* @return the cached or a new index writer
* @throws IOException in case of an I/O error
*/
private IndexWriter getWriter(IndexType type, String locale)
throws IOException {
String index = directory(type, locale);
var writer = writers.get(index);
if (writer == null) {
writer = new IndexWriter(
open(index),
new IndexWriterConfig(CtanAnalyzer.INSTANCE));
writers.put(index, writer);
}
return writer;
}
/**
* The method <code>remove</code> provides means to remove an item from the
* search index.
*
* @param indexType the index type
* @param indexPath the key for identifying the indexed item
* @param locale the locale
* @throws IOException in case of an I/O error
*/
public void remove(@NotNull IndexType indexType,
@NotNull String indexPath,
@NotNull String locale)
throws IOException {
var writer = getWriter(indexType, locale);
writer.deleteDocuments(new Term(Fields.PATH, indexPath));
}
/**
* The method <code>updateIndex</code> provides means to update the search
* index.
*
* @param path the URL of the item
* @param args the arguments
*
* @throws IOException in case of an I/O error
*/
public void updateIndex(@NotNull String path, @NotNull IndexArgs args)
throws IOException {
if (args.title == null) {
args.title = "";
log.warn(path + " has no title");
}
if (args.modified == null) {
args.modified = new Date().getTime();
}
var doc = new Document();
for (String it : args.content) {
doc.add(new TextField(Fields.DEFAULT, it, Store.NO));
}
if (args.tags != null) {
doc.add(new TextField(Fields.TAGS, args.tags, Store.YES));
}
if (args.topics != null) {
var field = new TextField(Fields.TAGS, args.topics, Store.YES);
// field.setBoost(4.0f);
doc.add(field);
}
if (args.pkg != null) {
var field =
new TextField(Fields.PKG, args.pkg + '$',
Store.YES);
// field.setBoost(64.0f);
doc.add(field);
}
doc.add(new TextField(Fields.DEFAULT,
path.replaceAll("\\..*$", ""), Store.NO));
doc.add(new TextField(Fields.DEFAULT, args.title, Store.NO));
var field = new TextField(Fields.TITLE, args.title, Store.YES);
// field.setBoost(2.0f)
doc.add(field);
doc.add(new StringField(Fields.PATH, path, Store.YES));
var display = args.display;
if (display != null) {
if (args.clipDisplay) {
var i = display.indexOf(". ");
i = Math.min((i < 0 ? display.length() : i), 128);
display = display.substring(0, i) + "...";
}
doc.add(new TextField(Fields.DISPLAY, display, Store.YES));
}
doc.add(new LongField(Fields.MTIME, args.modified, Store.YES));
// doc.add(new StringField(Fields.LOCALE, args.locale, Store.YES));
IndexType type = args.type;
if (type == null) {
return;
}
doc.add(new TextField(Fields.TYPE, type.getKey(), Store.YES));
var keyTerm = new Term(Fields.PATH, path);
getWriter(type, args.locale).updateDocument(keyTerm, doc);
}
}