AuthorsImportService.java
/*
* Copyright © 2024-2025 The CTAN Team and individual authors
*
* This file is distributed under the 3-clause BSD license.
* See file LICENSE for details.
*/
package org.ctan.site.services.catalogue;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;
import org.ctan.site.domain.Gender;
import org.ctan.site.domain.catalogue.Author;
import org.ctan.site.domain.catalogue.AuthorEmail;
import org.ctan.site.stores.AuthorStore;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* This service deals with the authors from a file.
*
* <p>
* The file encoding is UTF-8.
* </p>
*
* <pre>
* <authors>
* <author
* id="the-key"
* familyname="..."
* givenname="..."
* title="..."
* von="..."
* junior="..."
* gender="female"
* died="true"
* >
* <email
* inactive="true"
* note="some remarks on the email"
* >
* email@address
* </email>
* </author>
* ...
* </authors>
* </pre>
*
* <ul>
* <li>The root node is <code>authors</code>.</li>
* </ul>
*
* @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
*/
public class AuthorsImportService extends AbstractImportService<AuthorStore> {
/**
* The field <code>AUTHORS_FILE</code> contains the file name for the
* authors.
*/
private static final String AUTHORS_FILE = "authors";
/**
* The field <code>file</code> contains the authors file.
*/
private File file;
/**
* This is the constructor for <code>AuthorsImportService</code>.
*
* @param entries the entries directory
* @param store the authors store
* @throws IllegalArgumentException in case of an I/O error
* @throws NullPointerException in case of a missing value
*/
@SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
public AuthorsImportService(File entries, AuthorStore store) {
super(entries, store);
file = new File(entries, AUTHORS_FILE);
if (!file.isFile()) {
throw new IllegalArgumentException(
"entries file is not a file " + file.getPath());
}
}
/**
* The method <code>drop</code> provides means to drop some authors.
*
* @param authorsToBeDropped a list of authors to be dropped
*/
public void drop(List<Author> authorsToBeDropped) {
store.remove(authorsToBeDropped);
}
/**
* The method <code>extractGender</code> provides means determine the gender
* of an author.
*
* <p>
* If a gender is given then the gender is used.
* </p>
*
* <p>
* If a gender is not given and the female attribute is "true" then the
* gender is "female". Otherwise the gender is "male".
* </p>
*
* @param gender the gender attribute
* @param female the female attribute
* @return the gender value
*/
private Gender extractGender(String gender, String female) {
return switch (gender.toLowerCase()) {
case "f", "female" -> Gender.F;
case "m", "male" -> Gender.M;
case "g", "group" -> Gender.G;
case "o", "x", "other" -> Gender.X;
default -> "true".equals(female)
? Gender.F
: Gender.M;
};
}
/**
* The method <code>updateAuthor</code> provides means to update an author
* from a DOM element.
*
* @param el the DOM element
* @param authors the authors
* @return the authors
*/
private void updateAuthor(Element el, List<Author> authors) {
var key = el.attr("id");
var a = store.getByKey(key);
if (a == null) {
a = Author.builder()
.key(key)
.build();
} else {
authors.remove(a);
a.setKey(key);
}
a.setFamilyname(el.attr("familyname"));
a.setGivenname(el.attr("givenname"));
a.setPseudonym(el.attr("pseudonym"));
a.setJunior(el.attr("junior"));
a.setVon(el.attr("von"));
a.setGender(extractGender(el.attr("gender"), el.attr("female")));
a.setDied("true".equals(el.attr("died")) ? Boolean.TRUE : null);
var emails = a.getEmails();
var emailsToBeDropped = emails.stream().collect(Collectors.toList());
for (var em : el.getElementsByTag("email")) {
var inactive = em.attribute("inactive");
var note = em.attribute("note");
var email = updateEmails(emails, em.text().replaceAll("\\s", ""),
inactive == null ? null : inactive.getValue(),
note == null ? null : note.getValue());
emailsToBeDropped.remove(email);
}
for (var del : emailsToBeDropped) {
emails.remove(del);
}
store.save(a);
}
/**
* The method <code>updateEmails</code> provides means to update the emails
* from the author entity.
*
* @param emails the list of emails present in the author.
* @param address the email address field
* @param inactive the inactivity indicator or {@code null}
* @param note the note field or {@code null}
* @return the updated item in the list
*/
private AuthorEmail updateEmails(List<AuthorEmail> emails, String address,
String inactive, String note) {
for (var email : emails) {
if (!email.getAddress().equals(address)) {
continue;
}
email.setInactive("true".equals(inactive));
email.setNote(note);
return email;
}
var email = AuthorEmail.builder()
.address(address)
.inactive(false)
.note(note)
.build();
emails.add(email);
return email;
}
/**
* The method <code>updateEntriesAuthors</code> provides means to parse an
* XML file and add the authors found to the database. As a side effect a
* list of authors is collected which are not in the XML files.
*
* @return a list of authors to be deleted
* @throws IOException in case of an I/O error
*/
public List<Author> updateEntriesAuthors()
throws IOException {
Document doc = Jsoup.parse(file, null, "", Parser.xmlParser());
var authors = doc.getElementsByTag("authors");
if (authors.size() == 0) {
throw new IOException("missing root node");
}
var authorElements = doc.getElementsByTag("author");
var authorsToBeDropped = store.findAll();
for (var el : authorElements) {
updateAuthor(el, authorsToBeDropped);
}
return authorsToBeDropped;
}
}