TopicsImportService.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.catalogue;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.ctan.site.domain.catalogue.Topic;
import org.ctan.site.domain.catalogue.TopicDetail;
import org.ctan.site.stores.TopicStore;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.extern.slf4j.Slf4j;

/**
 * This service deals with the topics from a file.
 *
 * <p>
 * The file encoding is UTF-8.
 * </p>
 *
 * <pre>
 * &lt;topics&gt;
 *   &lt;topic name='aaa'&gt;
 *     &lt;en&gt;
 *       &lt;title&gt;aaa-en-title&lt;/title&gt;
 *       &lt;description&gt;aaa-en-desc&lt;/description&gt;
 *       &lt;teaser&gt;aaa-en-teaser&lt;/teaser&gt;
 *       &lt;details&gt;aaa-en-details&lt;/details&gt;
 *     &lt;/en&gt;
 *     &lt;de&gt;
 *       &lt;title&gt;aaa-de-title&lt;/title&gt;
 *       &lt;description&gt;aaa-de-desc&lt;/description&gt;
 *       &lt;teaser&gt;aaa-de-teaser&lt;/teaser&gt;
 *       &lt;details&gt;aaa-de-details&lt;/details&gt;
 *     &lt;/de&gt;
 *   &lt;/topic&gt;
 *   ...
 * &lt;/topics&gt;
 * </pre>
 *
 * <ul>
 * <li>The root node is <code>topics</code>.</li>
 * <li>The first level contains nodes of type <code>topic</code>.
 * <ul>
 * <li>The topic node has a mandatory attribute <code>name</code>.</li>
 * <li>The second level contains nodes of type <code>en</code> or
 * <code>de</code> for the locale.
 * <ul>
 * <li>The topic node has a mandatory attribute <code>name</code>.</li>
 * <li>The third level contains nodes for the texts of type <code>title</code>,
 * <code>description</code>, <code>details</code>, or <code>teaser</code>.</li>
 * </ul>
 * </li>
 * </ul>
 * </li>
 * </ul>
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
public class TopicsImportService extends AbstractImportService<TopicStore> {

    /**
     * The field <code>TOPICS_FILE</code> contains the file name for the topics.
     */
    private static final String TOPICS_FILE = "topics";

    /**
     * The field <code>file</code> contains the topics file.
     */
    private File file;

    /**
     * This is the constructor for <code>TopicsImportService</code>.
     *
     * @param entries the entries directory
     * @param store the topics store
     */
    @SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
    public TopicsImportService(File entries, TopicStore store) {

        super(entries, store);
        file = new File(entries, TOPICS_FILE);
        if (!file.isFile()) {
            throw new IllegalArgumentException("topics file is not a file");
        }
    }

    /**
     * The method <code>drop</code> provides means to drop some topics.
     *
     * @param topicsToBeDropped a list of topics to be dropped
     */
    public void drop(List<Topic> topicsToBeDropped) {

        store.remove(topicsToBeDropped);
    }

    /**
     * The method <code>updateDetail</code> provides means to update topic
     * details.
     *
     * @param t the topic to be updated
     * @param td the new topic detail data
     */
    private boolean updateDetail(Topic t, TopicDetail td) {

        final var topicDetails = t.getDetails();
        var ret = false;
        for (var d : topicDetails) {
            if (neq(d.getLang(), td.getLang())) {
                continue;
            }
            if (neq(d.getTitle(), td.getTitle())) {
                d.setTitle(td.getTitle());
                ret = true;
            }
            if (neq(d.getTeaser(), td.getTeaser())) {
                d.setTeaser(td.getTeaser());
                ret = true;
            }
            if (neq(d.getDescription(), td.getDescription())) {
                d.setDescription(td.getDescription());
                ret = true;
            }
            if (neq(d.getDetail(), td.getDetail())) {
                d.setDetail(td.getDetail());
                ret = true;
            }
            return ret;
        }
        topicDetails.add(td);
        return true;
    }

    /**
     * The method <code>updateExisting</code> provides means to parse an XML
     * file and add the topics found to the database. As a side effect a list of
     * topics is collected which are not in the XML files.
     *
     * @return the topics to be dropped
     * @throws IOException in case of an I/O error
     */
    public List<Topic> updateExisting()
        throws IOException {

        var doc = Jsoup.parse(file);
        var topicsToBeDropped = new ArrayList<>(store.findAll());
        Map<String, Topic> all = topicsToBeDropped.stream()
            .collect(Collectors.toMap(x -> x.getKey(), x -> x));
        for (var el : doc.getElementsByTag("topic")) {
            updateTopic(el, all, topicsToBeDropped);
        }
        return topicsToBeDropped;
    }

    /**
     * The method <code>updateTopic</code> provides means to update a topic from
     * a DOM element.
     *
     * @param el the DOM element
     * @param all the map of all topics
     * @param topicsToBeDropped the topics to be dropped
     */
    private void updateTopic(Element el,
        Map<String, Topic> all,
        List<Topic> topicsToBeDropped) {

        var key = el.attr("name");
        var theDetails = el.hasAttr("details") ? el.attr("details") : null;
        var t = all.get(key);
        if (t == null) {
            t = Topic.builder()
                .key(key)
                .build();
        } else {
            topicsToBeDropped.remove(t);
        }
        if (t.getDetails() == null) {
            t.setDetails(new ArrayList<TopicDetail>());
        }
        var locales = new HashMap<String, Boolean>();
        if (theDetails != null) {
            locales.put("en", Boolean.TRUE);
            updateDetail(t, TopicDetail.builder()
                .lang("en")
                .detail(theDetails)
                .build());
        }
        for (var loc : el.children()) {
            var locale = loc.normalName();
            locales.put(locale, Boolean.TRUE);
            var builder = TopicDetail.builder()
                .lang(locale);
            for (var text : loc.children()) {
                switch (text.normalName()) {
                    case "title":
                        builder.title(text.text());
                        break;
                    case "description":
                        builder.description(text.text());
                        break;
                    case "teaser":
                        builder.teaser(text.text());
                        break;
                    case "detail":
                    case "details":
                        builder.detail(text.text());
                        break;
                    default:
                        log.info(key + ": unknown type " + text.normalName());
                }
            }
            updateDetail(t, builder.build());
        }
        var details = t.getDetails();
        for (var it : details.toArray(new TopicDetail[]{})) {
            Boolean hasLocale = locales.get(it.getLang());
            if (hasLocale != null && !hasLocale) {
                details.remove(it);
            }
        }
        store.save(t);
    }
}