TopicsImportService.java
/*
* Copyright © 2024-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.site.services.catalogue;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.ctan.site.domain.catalogue.Topic;
import org.ctan.site.domain.catalogue.TopicDetail;
import org.ctan.site.stores.TopicStore;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.extern.slf4j.Slf4j;
/**
* This service deals with the topics from a file.
*
* <p>
* The file encoding is UTF-8.
* </p>
*
* <pre>
* <topics>
* <topic name='aaa'>
* <en>
* <title>aaa-en-title</title>
* <description>aaa-en-desc</description>
* <teaser>aaa-en-teaser</teaser>
* <details>aaa-en-details</details>
* </en>
* <de>
* <title>aaa-de-title</title>
* <description>aaa-de-desc</description>
* <teaser>aaa-de-teaser</teaser>
* <details>aaa-de-details</details>
* </de>
* </topic>
* ...
* </topics>
* </pre>
*
* <ul>
* <li>The root node is <code>topics</code>.</li>
* <li>The first level contains nodes of type <code>topic</code>.
* <ul>
* <li>The topic node has a mandatory attribute <code>name</code>.</li>
* <li>The second level contains nodes of type <code>en</code> or
* <code>de</code> for the locale.
* <ul>
* <li>The topic node has a mandatory attribute <code>name</code>.</li>
* <li>The third level contains nodes for the texts of type <code>title</code>,
* <code>description</code>, <code>details</code>, or <code>teaser</code>.</li>
* </ul>
* </li>
* </ul>
* </li>
* </ul>
*
* @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
*/
@Slf4j
public class TopicsImportService extends AbstractImportService<TopicStore> {
/**
* The field <code>TOPICS_FILE</code> contains the file name for the topics.
*/
private static final String TOPICS_FILE = "topics";
/**
* The field <code>file</code> contains the topics file.
*/
private File file;
/**
* This is the constructor for <code>TopicsImportService</code>.
*
* @param entries the entries directory
* @param store the topics store
*/
@SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
public TopicsImportService(File entries, TopicStore store) {
super(entries, store);
file = new File(entries, TOPICS_FILE);
if (!file.isFile()) {
throw new IllegalArgumentException("topics file is not a file");
}
}
/**
* The method <code>drop</code> provides means to drop some topics.
*
* @param topicsToBeDropped a list of topics to be dropped
*/
public void drop(List<Topic> topicsToBeDropped) {
store.remove(topicsToBeDropped);
}
/**
* The method <code>updateDetail</code> provides means to update topic
* details.
*
* @param t the topic to be updated
* @param td the new topic detail data
*/
private boolean updateDetail(Topic t, TopicDetail td) {
final var topicDetails = t.getDetails();
var ret = false;
for (var d : topicDetails) {
if (neq(d.getLang(), td.getLang())) {
continue;
}
if (neq(d.getTitle(), td.getTitle())) {
d.setTitle(td.getTitle());
ret = true;
}
if (neq(d.getTeaser(), td.getTeaser())) {
d.setTeaser(td.getTeaser());
ret = true;
}
if (neq(d.getDescription(), td.getDescription())) {
d.setDescription(td.getDescription());
ret = true;
}
if (neq(d.getDetail(), td.getDetail())) {
d.setDetail(td.getDetail());
ret = true;
}
return ret;
}
topicDetails.add(td);
return true;
}
/**
* The method <code>updateExisting</code> provides means to parse an XML
* file and add the topics found to the database. As a side effect a list of
* topics is collected which are not in the XML files.
*
* @return the topics to be dropped
* @throws IOException in case of an I/O error
*/
public List<Topic> updateExisting()
throws IOException {
var doc = Jsoup.parse(file);
var topicsToBeDropped = new ArrayList<>(store.findAll());
Map<String, Topic> all = topicsToBeDropped.stream()
.collect(Collectors.toMap(x -> x.getKey(), x -> x));
for (var el : doc.getElementsByTag("topic")) {
updateTopic(el, all, topicsToBeDropped);
}
return topicsToBeDropped;
}
/**
* The method <code>updateTopic</code> provides means to update a topic from
* a DOM element.
*
* @param el the DOM element
* @param all the map of all topics
* @param topicsToBeDropped the topics to be dropped
*/
private void updateTopic(Element el,
Map<String, Topic> all,
List<Topic> topicsToBeDropped) {
var key = el.attr("name");
var theDetails = el.hasAttr("details") ? el.attr("details") : null;
var t = all.get(key);
if (t == null) {
t = Topic.builder()
.key(key)
.build();
} else {
topicsToBeDropped.remove(t);
}
if (t.getDetails() == null) {
t.setDetails(new ArrayList<TopicDetail>());
}
var locales = new HashMap<String, Boolean>();
if (theDetails != null) {
locales.put("en", Boolean.TRUE);
updateDetail(t, TopicDetail.builder()
.lang("en")
.detail(theDetails)
.build());
}
for (var loc : el.children()) {
var locale = loc.normalName();
locales.put(locale, Boolean.TRUE);
var builder = TopicDetail.builder()
.lang(locale);
for (var text : loc.children()) {
switch (text.normalName()) {
case "title":
builder.title(text.text());
break;
case "description":
builder.description(text.text());
break;
case "teaser":
builder.teaser(text.text());
break;
case "detail":
case "details":
builder.detail(text.text());
break;
default:
log.info(key + ": unknown type " + text.normalName());
}
}
updateDetail(t, builder.build());
}
var details = t.getDetails();
for (var it : details.toArray(new TopicDetail[]{})) {
Boolean hasLocale = locales.get(it.getLang());
if (hasLocale != null && !hasLocale) {
details.remove(it);
}
}
store.save(t);
}
}