SearchResource.java
/*
* Copyright © 2024-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.site.resources.catalogue.api;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.apache.lucene.queryparser.classic.ParseException;
import org.ctan.site.services.search.QueryContainer;
import org.ctan.site.services.search.QueryContainer.HitInfo;
import org.ctan.site.services.search.SearchService;
import org.ctan.site.services.search.base.IndexType;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import jakarta.annotation.security.PermitAll;
import jakarta.ws.rs.DefaultValue;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.PathParam;
import jakarta.ws.rs.Produces;
import jakarta.ws.rs.QueryParam;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response.Status;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;
/**
* The class <code>SearchResource</code> contains the controller for the search
* resource.
*
* @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
*/
@Path("/")
public class SearchResource {
/**
* The class <code>AuthorTo</code> contains the transport object for the
* author resource.
*/
@Getter
@Builder
@SuppressFBWarnings(value = "EI_EXPOSE_REP")
protected static class HitsPage {
/**
* The field <code>numberOfHits</code> contains the total number of
* hits.
*/
private long numberOfHits;
/**
* The field <code>offset</code> contains the offset of the first hit.
*/
private long offset;
/**
* The field <code>max</code> contains the page size.
*/
private long max;
/**
* The field <code>phrase</code> contains the query.
*/
private String phrase;
/**
* The field <code>hits</code> contains the list of hits in the page.
*/
private List<HitTo> hits;
}
/**
* The class <code>HitTo</code> contains the transport object for the search
* hit.
*/
@Getter
@Builder
protected static class HitTo {
/**
* The field <code>title</code> contains the title.
*/
private String title;
/**
* The field <code>path</code> contains the path.
*/
private String path;
/**
* The field <code>text</code> contains the text.
*/
private String text;
}
/**
* The field <code>store</code> contains the underlying repository.
*/
private SearchService service;
/**
* This is the constructor for the class <code>SearchResource</code>.
*
* @param service the underlying service
*/
@SuppressFBWarnings(value = {"CT_CONSTRUCTOR_THROW", "EI_EXPOSE_REP2"})
public SearchResource(@NonNull SearchService service) {
this.service = service;
}
/**
* The method <code>collectSections</code> provides means to collect the
* sections to search through.
*
* @param ext This parameter determines whether the sections are requested
* explicitly. The value is either {@code true} or {@code false}. For
* any other value the behavior is undefined.
*
* If the value is {@code false} then all default sections are searched.
* Otherwise the sections to be searched have to be specified with
* additional parameters.
*
* The default for this parameter is {@code false}.
* @param includePkg This parameter determines whether the package section
* should be included into the search. The package section contains the
* text fields of a package entry in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeAuthors This parameter determines whether the author
* section should be included into the search. The author section
* contains the names of the contributors in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeTopics This parameter determines whether the topics section
* should be included into the search. The topics section contains the
* text fields of the topics in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeSite This parameter determines whether the site section
* should be included into the search. The site section contains the
* text of the site pages.
*
* The default for this parameter is {@code false}.
* @return the set or requested index types
*/
private HashSet<IndexType> collectSections(Boolean ext, Boolean includePkg,
Boolean includeAuthors, Boolean includeTopics,
Boolean includeSite) {
var sections = new HashSet<IndexType>();
if (ext) {
sections.add(IndexType.AUTHORS);
sections.add(IndexType.PKG);
sections.add(IndexType.TOPICS);
sections.add(IndexType.SITE);
} else {
if (includeAuthors) {
sections.add(IndexType.AUTHORS);
}
if (includePkg) {
sections.add(IndexType.PKG);
}
if (includeTopics) {
sections.add(IndexType.TOPICS);
}
if (includeSite) {
sections.add(IndexType.SITE);
}
}
return sections;
}
/**
* The method <code>encode</code> provides means to escape special XML
* characters.
*
* @param s the string
* @return the encoded string
*/
private String encode(String s) {
return s.replaceAll("&", "&")
.replaceAll("\"", """);
}
/**
* The method <code>mapHits</code> provides means to map a hit info to a
* proper return value.
*
* @param hits the list to transform
* @return the list of mapped items
*/
private List<HitTo> mapHits(List<HitInfo> hits) {
return hits.stream()
.map(hit -> HitTo.builder()
.title(hit.getTitle())
.path(hit.getPath())
.text(hit.getDisplay())
.build())
.collect(Collectors.toList());
}
/**
* The method <code>search</code> provides means to trigger the JSON search
* end-point.
*
* @param phrase The parameter phrase contains the search phrase, i.e. the
* words or search expressions to query for.
* @param offset This is the offset for paging. The accompanying parameter
* max contains the page size. This parameter contains the first hit to
* be returned. It is a number greater or equal to 0. If a negative
* number is passed in then it is replaced by 0. If the offset is larger
* than the number of actual hits then the list of hits will be empty.
* @param max This parameter determines the number of hits maximally
* returned. It is a number in the range 1 to 256. Larger values will be
* reduced to 256. Lower values will be replaced by the default value.
* If this parameter is omitted then the default value 16 will be used.
* @param ext This parameter determines whether the sections are requested
* explicitly. The value is either {@code true} or {@code false}. For
* any other value the behavior is undefined.
*
* If the value is {@code false} then all default sections are searched.
* Otherwise the sections to be searched have to be specified with
* additional parameters.
*
* The default for this parameter is {@code false}.
* @param includePkg This parameter determines whether the package section
* should be included into the search. The package section contains the
* text fields of a package entry in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeAuthors This parameter determines whether the author
* section should be included into the search. The author section
* contains the names of the contributors in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeTopics This parameter determines whether the topics section
* should be included into the search. The topics section contains the
* text fields of the topics in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeSite This parameter determines whether the site section
* should be included into the search. The site section contains the
* text of the site pages.
*
* The default for this parameter is {@code false}.
* @param lang This parameter contains the language.
*
* The default for this parameter is {@code en}.
* @return a page
*/
@GET
@Path("/search/json")
@Produces(MediaType.APPLICATION_JSON)
@PermitAll
public HitsPage search(
@NonNull @PathParam("phrase") String phrase,
@PathParam("offset") int offset,
@PathParam("max") int max,
@QueryParam("ext") @DefaultValue("false") Boolean ext,
@QueryParam("lang") @DefaultValue("en") String lang,
@QueryParam("PKG")
@DefaultValue("false") Boolean includePkg,
@QueryParam("AUTHORS")
@DefaultValue("false") Boolean includeAuthors,
@QueryParam("TOPICS")
@DefaultValue("false") Boolean includeTopics,
@QueryParam("PORTAL")
@DefaultValue("false") Boolean includeSite) {
Locale locale = switch (lang) {
case "de" -> Locale.GERMAN;
case "en" -> Locale.ENGLISH;
default -> Locale.ENGLISH;
};
var query = QueryContainer.builder()
.phrase(phrase)
.max(max)
.offset(offset)
.sections(collectSections(ext, includePkg, includeAuthors,
includeTopics, includeSite))
.locale(locale)
.build();
try {
query = service.find(query);
} catch (IOException e) {
throw new WebApplicationException(Status.INTERNAL_SERVER_ERROR);
} catch (ParseException e) {
throw new WebApplicationException(Status.BAD_REQUEST);
} catch (IllegalArgumentException e) {
throw new WebApplicationException(e.getMessage(),
Status.BAD_REQUEST);
}
return HitsPage.builder()
.phrase(phrase)
.offset(offset)
.max(max)
.numberOfHits(query.getHitNumber())
.hits(mapHits(query.getHits()))
.build();
}
/**
* The method <code>searchXml</code> provides means to trigger the XML
* search end-point.
*
* @param phrase The parameter phrase contains the search phrase, i.e. the
* words or search expressions to query for.
* @param offset This is the offset for paging. The accompanying parameter
* max contains the page size. This parameter contains the first hit to
* be returned. It is a number greater or equal to 0. If a negative
* number is passed in then it is replaced by 0. If the offset is larger
* than the number of actual hits then the list of hits will be empty.
* @param max This parameter determines the number of hits maximally
* returned. It is a number in the range 1 to 256. Larger values will be
* reduced to 256. Lower values will be replaced by the default value.
* If this parameter is omitted then the default value 16 will be used.
* @param ext This parameter determines whether the sections are requested
* explicitly. The value is either {@code true} or {@code false}. For
* any other value the behavior is undefined.
*
* If the value is {@code false} then all default sections are searched.
* Otherwise the sections to be searched have to be specified with
* additional parameters.
*
* The default for this parameter is {@code false}.
* @param includePkg This parameter determines whether the package section
* should be included into the search. The package section contains the
* text fields of a package entry in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeAuthors This parameter determines whether the author
* section should be included into the search. The author section
* contains the names of the contributors in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeTopics This parameter determines whether the topics section
* should be included into the search. The topics section contains the
* text fields of the topics in the Catalogue.
*
* The default for this parameter is {@code false}.
* @param includeSite This parameter determines whether the site section
* should be included into the search. The site section contains the
* text of the site pages.
*
* The default for this parameter is {@code false}.
* @return a page
*/
@GET
@Path("/search/xml")
@Produces(MediaType.APPLICATION_XML)
@PermitAll
public String searchXml(
@NonNull @PathParam("phrase") String phrase,
@PathParam("offset") int offset,
@PathParam("max") int max,
@QueryParam("ext") @DefaultValue("false") Boolean ext,
@QueryParam("PKG")
@DefaultValue("false") Boolean includePkg,
@QueryParam("AUTHORS")
@DefaultValue("false") Boolean includeAuthors,
@QueryParam("TOPICS")
@DefaultValue("false") Boolean includeTopics,
@QueryParam("PORTAL")
@DefaultValue("false") Boolean includeSite) {
var query = QueryContainer.builder()
.phrase(phrase)
.max(max)
.offset(offset)
.sections(collectSections(ext, includePkg, includeAuthors,
includeTopics, includeSite))
.locale(Locale.ENGLISH)
.build();
try {
query = service.find(query);
} catch (IOException e) {
throw new WebApplicationException(Status.INTERNAL_SERVER_ERROR);
} catch (ParseException e) {
throw new WebApplicationException(Status.BAD_REQUEST);
} catch (IllegalArgumentException e) {
throw new WebApplicationException(e.getMessage(),
Status.BAD_REQUEST);
}
var xml = new XmlWriter();
xml.out(
"<search-result "
+ "numberOfHits=\""
+ Long.toString(query.getHitNumber())
+ "\" offset=\"" + Long.toString(query.getOffset())
+ "\" max=\"" + Long.toString(query.getMax())
+ "\" phrase=\""
+ encode(query.getPhrase() == null
? ""
: query.getPhrase())
+ "\">\n");
for (var hit : query.getHits()) {
xml.out("<search-hit"
+ " title=\"" + encode(hit.getTitle())
+ "\" path=\"" + encode(hit.getPath())
+ "\" text=\"" + encode(hit.getDisplay())
+ " />\n");
}
xml.out("</search-result>\n");
return xml.toString();
}
}