AuthorsImportService.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.catalogue;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.stream.Collectors;

import org.ctan.site.domain.Gender;
import org.ctan.site.domain.catalogue.Author;
import org.ctan.site.domain.catalogue.AuthorEmail;
import org.ctan.site.stores.AuthorStore;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * This service deals with the authors from a file.
 *
 * <p>
 * The file encoding is UTF-8.
 * </p>
 *
 * <pre>
 * &lt;authors&gt;
 *   &lt;author
 *     id="the-key"
 *     familyname="..."
 *     givenname="..."
 *     title="..."
 *     von="..."
 *     junior="..."
 *     gender="female"
 *     died="true"
 *   &gt;
 *     &lt;email
 *       inactive="true"
 *       note="some remarks on the email"
 *     &gt;
 *       email@address
 *     &lt;/email&gt;
 *   &lt;/author&gt;
 *   ...
 * &lt;/authors&gt;
 * </pre>
 *
 * <ul>
 * <li>The root node is <code>authors</code>.</li>
 * </ul>
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
public class AuthorsImportService extends AbstractImportService<AuthorStore> {

    /**
     * The field <code>AUTHORS_FILE</code> contains the file name for the
     * authors.
     */
    private static final String AUTHORS_FILE = "authors";

    /**
     * The field <code>file</code> contains the authors file.
     */
    private File file;

    /**
     * This is the constructor for <code>AuthorsImportService</code>.
     *
     * @param entries the entries directory
     * @param store the authors store
     * @throws IllegalArgumentException in case of an I/O error
     * @throws NullPointerException in case of a missing value
     */
    @SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
    public AuthorsImportService(File entries, AuthorStore store) {

        super(entries, store);
        file = new File(entries, AUTHORS_FILE);
        if (!file.isFile()) {
            throw new IllegalArgumentException(
                "entries file is not a file " + file.getPath());
        }
    }

    /**
     * The method <code>drop</code> provides means to drop some authors.
     *
     * @param authorsToBeDropped a list of authors to be dropped
     */
    public void drop(List<Author> authorsToBeDropped) {

        store.remove(authorsToBeDropped);
    }

    /**
     * The method <code>extractGender</code> provides means determine the gender
     * of an author.
     *
     * <p>
     * If a gender is given then the gender is used.
     * </p>
     *
     * <p>
     * If a gender is not given and the female attribute is "true" then the
     * gender is "female". Otherwise the gender is "male".
     * </p>
     *
     * @param gender the gender attribute
     * @param female the female attribute
     * @return the gender value
     */
    private Gender extractGender(String gender, String female) {

        return switch (gender.toLowerCase()) {
            case "f", "female" -> Gender.F;
            case "m", "male" -> Gender.M;
            case "g", "group" -> Gender.G;
            case "o", "x", "other" -> Gender.X;
            default -> "true".equals(female)
                ? Gender.F
                : Gender.M;
        };
    }

    /**
     * The method <code>updateAuthor</code> provides means to update an author
     * from a DOM element.
     *
     * @param el the DOM element
     * @param authors the authors
     * @return the authors
     */
    private void updateAuthor(Element el, List<Author> authors) {

        var key = el.attr("id");
        var a = store.getByKey(key);
        if (a == null) {
            a = Author.builder()
                .key(key)
                .build();
        } else {
            authors.remove(a);
            a.setKey(key);
        }
        a.setFamilyname(el.attr("familyname"));
        a.setGivenname(el.attr("givenname"));
        a.setPseudonym(el.attr("pseudonym"));
        a.setJunior(el.attr("junior"));
        a.setVon(el.attr("von"));
        a.setGender(extractGender(el.attr("gender"), el.attr("female")));
        a.setDied("true".equals(el.attr("died")) ? Boolean.TRUE : null);
        var emails = a.getEmails();
        var emailsToBeDropped = emails.stream().collect(Collectors.toList());
        for (var em : el.getElementsByTag("email")) {
            var inactive = em.attribute("inactive");
            var note = em.attribute("note");
            var email = updateEmails(emails, em.text().replaceAll("\\s", ""),
                inactive == null ? null : inactive.getValue(),
                note == null ? null : note.getValue());
            emailsToBeDropped.remove(email);
        }
        for (var del : emailsToBeDropped) {
            emails.remove(del);
        }
        store.save(a);
    }

    /**
     * The method <code>updateEmails</code> provides means to update the emails
     * from the author entity.
     *
     * @param emails the list of emails present in the author.
     * @param address the email address field
     * @param inactive the inactivity indicator or {@code null}
     * @param note the note field or {@code null}
     * @return the updated item in the list
     */
    private AuthorEmail updateEmails(List<AuthorEmail> emails, String address,
        String inactive, String note) {

        for (var email : emails) {
            if (!email.getAddress().equals(address)) {
                continue;
            }
            email.setInactive("true".equals(inactive));
            email.setNote(note);
            return email;
        }
        var email = AuthorEmail.builder()
            .address(address)
            .inactive(false)
            .note(note)
            .build();
        emails.add(email);
        return email;
    }

    /**
     * The method <code>updateEntriesAuthors</code> provides means to parse an
     * XML file and add the authors found to the database. As a side effect a
     * list of authors is collected which are not in the XML files.
     *
     * @return a list of authors to be deleted
     * @throws IOException in case of an I/O error
     */
    public List<Author> updateEntriesAuthors()
        throws IOException {

        Document doc = Jsoup.parse(file, null, "", Parser.xmlParser());
        var authors = doc.getElementsByTag("authors");
        if (authors.size() == 0) {
            throw new IOException("missing root node");
        }
        var authorElements = doc.getElementsByTag("author");
        var authorsToBeDropped = store.findAll();
        for (var el : authorElements) {
            updateAuthor(el, authorsToBeDropped);
        }
        return authorsToBeDropped;
    }
}