SearchResource.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.resources.catalogue.api;

import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;

import org.apache.lucene.queryparser.classic.ParseException;
import org.ctan.site.services.search.QueryContainer;
import org.ctan.site.services.search.QueryContainer.HitInfo;
import org.ctan.site.services.search.SearchService;
import org.ctan.site.services.search.base.IndexType;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import jakarta.annotation.security.PermitAll;
import jakarta.ws.rs.DefaultValue;
import jakarta.ws.rs.GET;
import jakarta.ws.rs.Path;
import jakarta.ws.rs.PathParam;
import jakarta.ws.rs.Produces;
import jakarta.ws.rs.QueryParam;
import jakarta.ws.rs.WebApplicationException;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response.Status;
import lombok.Builder;
import lombok.Getter;
import lombok.NonNull;

/**
 * The class <code>SearchResource</code> contains the controller for the search
 * resource.
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Path("/")
public class SearchResource {

    /**
     * The class <code>AuthorTo</code> contains the transport object for the
     * author resource.
     */
    @Getter
    @Builder
    @SuppressFBWarnings(value = "EI_EXPOSE_REP")
    protected static class HitsPage {

        /**
         * The field <code>numberOfHits</code> contains the total number of
         * hits.
         */
        private long numberOfHits;

        /**
         * The field <code>offset</code> contains the offset of the first hit.
         */
        private long offset;

        /**
         * The field <code>max</code> contains the page size.
         */
        private long max;

        /**
         * The field <code>phrase</code> contains the query.
         */
        private String phrase;

        /**
         * The field <code>hits</code> contains the list of hits in the page.
         */
        private List<HitTo> hits;
    }

    /**
     * The class <code>HitTo</code> contains the transport object for the search
     * hit.
     */
    @Getter
    @Builder
    protected static class HitTo {

        /**
         * The field <code>title</code> contains the title.
         */
        private String title;

        /**
         * The field <code>path</code> contains the path.
         */
        private String path;

        /**
         * The field <code>text</code> contains the text.
         */
        private String text;
    }

    /**
     * The field <code>store</code> contains the underlying repository.
     */
    private SearchService service;

    /**
     * This is the constructor for the class <code>SearchResource</code>.
     *
     * @param service the underlying service
     */
    @SuppressFBWarnings(value = {"CT_CONSTRUCTOR_THROW", "EI_EXPOSE_REP2"})
    public SearchResource(@NonNull SearchService service) {

        this.service = service;
    }

    /**
     * The method <code>collectSections</code> provides means to collect the
     * sections to search through.
     *
     * @param ext This parameter determines whether the sections are requested
     *     explicitly. The value is either {@code true} or {@code false}. For
     *     any other value the behavior is undefined.
     *
     *     If the value is {@code false} then all default sections are searched.
     *     Otherwise the sections to be searched have to be specified with
     *     additional parameters.
     *
     *     The default for this parameter is {@code false}.
     * @param includePkg This parameter determines whether the package section
     *     should be included into the search. The package section contains the
     *     text fields of a package entry in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeAuthors This parameter determines whether the author
     *     section should be included into the search. The author section
     *     contains the names of the contributors in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeTopics This parameter determines whether the topics section
     *     should be included into the search. The topics section contains the
     *     text fields of the topics in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeSite This parameter determines whether the site section
     *     should be included into the search. The site section contains the
     *     text of the site pages.
     *
     *     The default for this parameter is {@code false}.
     * @return the set or requested index types
     */
    private HashSet<IndexType> collectSections(Boolean ext, Boolean includePkg,
        Boolean includeAuthors, Boolean includeTopics,
        Boolean includeSite) {

        var sections = new HashSet<IndexType>();
        if (ext) {
            sections.add(IndexType.AUTHORS);
            sections.add(IndexType.PKG);
            sections.add(IndexType.TOPICS);
            sections.add(IndexType.SITE);
        } else {
            if (includeAuthors) {
                sections.add(IndexType.AUTHORS);
            }
            if (includePkg) {
                sections.add(IndexType.PKG);
            }
            if (includeTopics) {
                sections.add(IndexType.TOPICS);
            }
            if (includeSite) {
                sections.add(IndexType.SITE);
            }
        }
        return sections;
    }

    /**
     * The method <code>encode</code> provides means to escape special XML
     * characters.
     *
     * @param s the string
     * @return the encoded string
     */
    private String encode(String s) {

        return s.replaceAll("&", "&amp;")
            .replaceAll("\"", "&quot;");
    }

    /**
     * The method <code>mapHits</code> provides means to map a hit info to a
     * proper return value.
     *
     * @param hits the list to transform
     * @return the list of mapped items
     */
    private List<HitTo> mapHits(List<HitInfo> hits) {

        return hits.stream()
            .map(hit -> HitTo.builder()
                .title(hit.getTitle())
                .path(hit.getPath())
                .text(hit.getDisplay())
                .build())
            .collect(Collectors.toList());
    }

    /**
     * The method <code>search</code> provides means to trigger the JSON search
     * end-point.
     *
     * @param phrase The parameter phrase contains the search phrase, i.e. the
     *     words or search expressions to query for.
     * @param offset This is the offset for paging. The accompanying parameter
     *     max contains the page size. This parameter contains the first hit to
     *     be returned. It is a number greater or equal to 0. If a negative
     *     number is passed in then it is replaced by 0. If the offset is larger
     *     than the number of actual hits then the list of hits will be empty.
     * @param max This parameter determines the number of hits maximally
     *     returned. It is a number in the range 1 to 256. Larger values will be
     *     reduced to 256. Lower values will be replaced by the default value.
     *     If this parameter is omitted then the default value 16 will be used.
     * @param ext This parameter determines whether the sections are requested
     *     explicitly. The value is either {@code true} or {@code false}. For
     *     any other value the behavior is undefined.
     *
     *     If the value is {@code false} then all default sections are searched.
     *     Otherwise the sections to be searched have to be specified with
     *     additional parameters.
     *
     *     The default for this parameter is {@code false}.
     * @param includePkg This parameter determines whether the package section
     *     should be included into the search. The package section contains the
     *     text fields of a package entry in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeAuthors This parameter determines whether the author
     *     section should be included into the search. The author section
     *     contains the names of the contributors in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeTopics This parameter determines whether the topics section
     *     should be included into the search. The topics section contains the
     *     text fields of the topics in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeSite This parameter determines whether the site section
     *     should be included into the search. The site section contains the
     *     text of the site pages.
     *
     *     The default for this parameter is {@code false}.
     * @param lang This parameter contains the language.
     * 
     *     The default for this parameter is {@code en}.
     * @return a page
     */
    @GET
    @Path("/search/json")
    @Produces(MediaType.APPLICATION_JSON)
    @PermitAll
    public HitsPage search(
        @NonNull @PathParam("phrase") String phrase,
        @PathParam("offset") int offset,
        @PathParam("max") int max,
        @QueryParam("ext") @DefaultValue("false") Boolean ext,
        @QueryParam("lang") @DefaultValue("en") String lang,
        @QueryParam("PKG")
        @DefaultValue("false") Boolean includePkg,
        @QueryParam("AUTHORS")
        @DefaultValue("false") Boolean includeAuthors,
        @QueryParam("TOPICS")
        @DefaultValue("false") Boolean includeTopics,
        @QueryParam("PORTAL")
        @DefaultValue("false") Boolean includeSite) {

        Locale locale = switch (lang) {
            case "de" -> Locale.GERMAN;
            case "en" -> Locale.ENGLISH;
            default -> Locale.ENGLISH;
        };
        var query = QueryContainer.builder()
            .phrase(phrase)
            .max(max)
            .offset(offset)
            .sections(collectSections(ext, includePkg, includeAuthors,
                includeTopics, includeSite))
            .locale(locale)
            .build();
        try {
            query = service.find(query);
        } catch (IOException e) {
            throw new WebApplicationException(Status.INTERNAL_SERVER_ERROR);
        } catch (ParseException e) {
            throw new WebApplicationException(Status.BAD_REQUEST);
        } catch (IllegalArgumentException e) {
            throw new WebApplicationException(e.getMessage(),
                Status.BAD_REQUEST);
        }
        return HitsPage.builder()
            .phrase(phrase)
            .offset(offset)
            .max(max)
            .numberOfHits(query.getHitNumber())
            .hits(mapHits(query.getHits()))
            .build();
    }

    /**
     * The method <code>searchXml</code> provides means to trigger the XML
     * search end-point.
     *
     * @param phrase The parameter phrase contains the search phrase, i.e. the
     *     words or search expressions to query for.
     * @param offset This is the offset for paging. The accompanying parameter
     *     max contains the page size. This parameter contains the first hit to
     *     be returned. It is a number greater or equal to 0. If a negative
     *     number is passed in then it is replaced by 0. If the offset is larger
     *     than the number of actual hits then the list of hits will be empty.
     * @param max This parameter determines the number of hits maximally
     *     returned. It is a number in the range 1 to 256. Larger values will be
     *     reduced to 256. Lower values will be replaced by the default value.
     *     If this parameter is omitted then the default value 16 will be used.
     * @param ext This parameter determines whether the sections are requested
     *     explicitly. The value is either {@code true} or {@code false}. For
     *     any other value the behavior is undefined.
     *
     *     If the value is {@code false} then all default sections are searched.
     *     Otherwise the sections to be searched have to be specified with
     *     additional parameters.
     *
     *     The default for this parameter is {@code false}.
     * @param includePkg This parameter determines whether the package section
     *     should be included into the search. The package section contains the
     *     text fields of a package entry in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeAuthors This parameter determines whether the author
     *     section should be included into the search. The author section
     *     contains the names of the contributors in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeTopics This parameter determines whether the topics section
     *     should be included into the search. The topics section contains the
     *     text fields of the topics in the Catalogue.
     *
     *     The default for this parameter is {@code false}.
     * @param includeSite This parameter determines whether the site section
     *     should be included into the search. The site section contains the
     *     text of the site pages.
     *
     *     The default for this parameter is {@code false}.
     * @return a page
     */
    @GET
    @Path("/search/xml")
    @Produces(MediaType.APPLICATION_XML)
    @PermitAll
    public String searchXml(
        @NonNull @PathParam("phrase") String phrase,
        @PathParam("offset") int offset,
        @PathParam("max") int max,
        @QueryParam("ext") @DefaultValue("false") Boolean ext,
        @QueryParam("PKG")
        @DefaultValue("false") Boolean includePkg,
        @QueryParam("AUTHORS")
        @DefaultValue("false") Boolean includeAuthors,
        @QueryParam("TOPICS")
        @DefaultValue("false") Boolean includeTopics,
        @QueryParam("PORTAL")
        @DefaultValue("false") Boolean includeSite) {

        var query = QueryContainer.builder()
            .phrase(phrase)
            .max(max)
            .offset(offset)
            .sections(collectSections(ext, includePkg, includeAuthors,
                includeTopics, includeSite))
            .locale(Locale.ENGLISH)
            .build();
        try {
            query = service.find(query);
        } catch (IOException e) {
            throw new WebApplicationException(Status.INTERNAL_SERVER_ERROR);
        } catch (ParseException e) {
            throw new WebApplicationException(Status.BAD_REQUEST);
        } catch (IllegalArgumentException e) {
            throw new WebApplicationException(e.getMessage(),
                Status.BAD_REQUEST);
        }
        var xml = new XmlWriter();
        xml.out(
            "<search-result "
                + "numberOfHits=\""
                + Long.toString(query.getHitNumber())
                + "\" offset=\"" + Long.toString(query.getOffset())
                + "\" max=\"" + Long.toString(query.getMax())
                + "\" phrase=\""
                + encode(query.getPhrase() == null
                    ? ""
                    : query.getPhrase())
                + "\">\n");
        for (var hit : query.getHits()) {
            xml.out("<search-hit"
                + " title=\"" + encode(hit.getTitle())
                + "\" path=\"" + encode(hit.getPath())
                + "\" text=\"" + encode(hit.getDisplay())
                + " />\n");
        }
        xml.out("</search-result>\n");
        return xml.toString();
    }
}