PkgsImportService.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.catalogue;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.ctan.site.domain.catalogue.AuthorRef;
import org.ctan.site.domain.catalogue.License;
import org.ctan.site.domain.catalogue.Pkg;
import org.ctan.site.domain.catalogue.PkgCaption;
import org.ctan.site.domain.catalogue.PkgCopyright;
import org.ctan.site.domain.catalogue.PkgDescription;
import org.ctan.site.domain.catalogue.PkgDoc;
import org.ctan.site.domain.catalogue.Topic;
import org.ctan.site.domain.catalogue.UploaderRef;
import org.ctan.site.stores.AuthorStore;
import org.ctan.site.stores.LicenseStore;
import org.ctan.site.stores.PkgStore;
import org.ctan.site.stores.TopicStore;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;

/**
 * This service deals with the packages from a file.
 *
 * <p>
 * The file encoding is UTF-8.
 * </p>
 *
 * <pre>
 * &lt;entry
 *   id="the-key"
 * &gt;
 *   &lt;name&gt;...&lt;/name&gt;
 *   ...
 * &lt;/entry&gt;
 * </pre>
 *
 * <ul>
 * <li>The root node is <code>entry</code>.</li>
 * </ul>
 *
 * @author <a href="mailto:gene@ctan.org">Gerd Neugebauer</a>
 */
@Slf4j
public class PkgsImportService extends AbstractImportService<PkgStore> {

    /**
     * The class <code>AddAlsoFunction</code> contains an also item which waits
     * to be processed.
     */
    private class AddAlsoFunction implements ImportFunction {

        /**
         * The field <code>id</code> contains the key to be added.
         */
        private String id;

        /**
         * The field <code>pkg</code> contains the package to be processed.
         */
        private Pkg pkg;

        /**
         * This is the constructor for <code>AddAlsoFunction</code>.
         *
         * @param pkg the package
         *
         * @param id the key to be added
         */
        public AddAlsoFunction(Pkg pkg, String id) {

            this.pkg = pkg;
            this.id = id;
        }

        /**
         * {@inheritDoc}
         *
         * @see org.ctan.site.services.catalogue.PkgsImportService.ImportFunction#run()
         */
        @Override
        public boolean run() {

            var p = store.getByKey(id);
            if (p == null) {
                return false;
            }
            pkg.getAlso().add(p);
            return true;
        }

        /**
         * {@inheritDoc}
         *
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {

            return pkg.getKey() + ": add also with refid='" + id + "'";
        }
    }

    /**
     * The class <code>AddAuthorFunction</code> contains an author item which
     * waits to be processed.
     */
    private class AddAuthorFunction implements ImportFunction {

        /**
         * The field <code>id</code> contains the key to be added.
         */
        private String id;

        /**
         * The field <code>pkg</code> contains the package to be processed.
         */
        private Pkg pkg;

        /**
         * The field <code>active</code> contains the active indicator.
         */
        private boolean active;

        /**
         * This is the constructor for <code>AddAlsoFunction</code>.
         *
         * @param pkg the package
         * @param id the key to be added
         * @param active the active indicator
         */
        public AddAuthorFunction(Pkg pkg, String id, boolean active) {

            this.pkg = pkg;
            this.id = id;
            this.active = active;
        }

        /**
         * {@inheritDoc}
         *
         * @see org.ctan.site.services.catalogue.PkgsImportService.ImportFunction#run()
         */
        @Override
        public boolean run() {

            var author = authorStore.getByKey(id);
            if (author == null) {
                return false;
            }
            pkg.getAuthors().add(AuthorRef.builder()
                .author(author)
                .active(active)
                .pkg(pkg)
                .build());
            return true;
        }

        /**
         * {@inheritDoc}
         *
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {

            return pkg.getKey() + ": add author with refid='" + id + "'";
        }
    }

    /**
     * The class <code>AddUploaderFunction</code> contains an uploader item
     * which waits to be processed.
     */
    private class AddUploaderFunction implements ImportFunction {

        /**
         * The field <code>id</code> contains the id.
         */
        private String id;

        /**
         * The field <code>pkg</code> contains the optional package.
         */
        private Pkg pkg;

        /**
         * The field <code>date</code> contains the date.
         */
        private String date;

        /**
         * The field <code>active</code> contains the active indicator.
         */
        private boolean active;

        /**
         * This is the constructor for <code>AddUploaderFunction</code>.
         *
         * @param pkg the package
         * @param id the key to be added
         * @param active the indicator for active uploaders
         * @param date the date
         */
        public AddUploaderFunction(Pkg pkg, String id, boolean active,
            String date) {

            this.pkg = pkg;
            this.id = id;
            this.active = active;
            this.date = date;
        }

        /**
         * {@inheritDoc}
         *
         * @see org.ctan.site.services.catalogue.PkgsImportService.ImportFunction#run()
         */
        @Override
        public boolean run() {

            var author = authorStore.getByKey(id);
            if (author == null) {
                return false;
            }
            pkg.getUploaders().add(UploaderRef.builder()
                .author(author)
                .active(active)
                .date(date)
                .pkg(pkg)
                .build());
            return true;
        }

        /**
         * {@inheritDoc}
         *
         * @see java.lang.Object#toString()
         */
        @Override
        public String toString() {

            return pkg.getKey() + ": add author with refid='" + id + "'";
        }
    }

    /**
     * This interface describes the function which is delayed during the import
     * process.
     */
    private interface ImportFunction {

        boolean run();
    }

    /**
     * The field <code>authorStore</code> contains the author repository.
     */
    private AuthorStore authorStore;

    /**
     * The field <code>licenseStore</code> contains the license repository.
     */
    private LicenseStore licenseStore;

    /**
     * The field <code>topicsStore</code> contains the topics repository.
     */
    private TopicStore topicsStore;

    /**
     * This is the constructor for <code>PkgsImportService</code>.
     *
     * @param entries the entries directory
     * @param store the package store
     * @param authorStore the author store
     * @param topicsStore the topic store
     * @param licenseStore the license store
     */
    @SuppressFBWarnings(value = {"CT_CONSTRUCTOR_THROW", "EI_EXPOSE_REP2"})
    public PkgsImportService(@NonNull File entries,
        @NonNull PkgStore store,
        @NonNull AuthorStore authorStore,
        @NonNull TopicStore topicsStore,
        @NonNull LicenseStore licenseStore) {

        super(entries, store);
        this.authorStore = authorStore;
        this.topicsStore = topicsStore;
        this.licenseStore = licenseStore;
    }

    /**
     * The method <code>drop</code> provides means to drop some packages.
     *
     * @param pkgsToBeDropped the packages to be dropped
     */
    public void drop(List<Pkg> pkgsToBeDropped) {

        store.remove(pkgsToBeDropped);
    }

    /**
     * The method <code>findAuthorRef</code> provides means to find an author
     * ref with a given key.
     *
     * @param list the list to search in
     * @param id the key
     * @return the element found or {@code null} for none
     */
    private AuthorRef findAuthorRef(List<AuthorRef> list, String id) {

        for (var ar : list) {
            if (id.equals(ar.getAuthor().getKey())) {
                return ar;
            }
        }
        return null;
    }

    /**
     * The method <code>findCaption</code> provides means to find a single
     * description by language.
     *
     * @param list the list to search in
     * @param lang the language code
     * @return the element found or {@code null} for none
     */
    private PkgCaption findCaption(List<PkgCaption> list, String lang) {

        for (var t : list) {
            if (lang.equals(t.getLang())) {
                return t;
            }
        }
        return null;
    }

    /**
     * The method <code>findCopyright</code> provides means to find a copyright
     * note.
     *
     * @param list the list to search in
     * @param owner the owner
     * @param year the year
     * @return the element found or {@code null} for none
     */
    private PkgCopyright findCopyright(List<PkgCopyright> list,
        String owner, String year) {

        for (var it : list) {
            var pkgOwner = it.getOwner();
            if ((owner != null
                ? owner.equals(pkgOwner)
                : it.getOwner() == null)
                && (year != null
                    ? year.equals(it.getYear())
                    : it.getYear() == null)) {
                return it;
            }
        }
        return null;
    }

    /**
     * The method <code>findDescription</code> provides means to find a single
     * description by language.
     *
     * @param list the list to search in
     * @param lang the language code
     * @return the element found or {@code null} for none
     */
    private PkgDescription findDescription(List<PkgDescription> list,
        String lang) {

        for (var t : list) {
            if (lang.equals(t.getLang())) {
                return t;
            }
        }
        return null;
    }

    /**
     * The method <code>findDocumentation</code> provides means to find a single
     * document by language.
     *
     * @param list the list to search in
     * @param href the target link
     * @return the element found or {@code null} for none
     */
    private PkgDoc findDocumentation(List<PkgDoc> list, String href) {

        for (var t : list) {
            if (href.equals(t.getHref())) {
                return t;
            }
        }
        return null;
    }

    /**
     * The method <code>findLicense</code> provides means to find a single
     * license.
     *
     * @param list the list to search in
     * @param type the license key
     * @return the element found or {@code null} for none
     */
    private License findLicense(List<License> list, String type) {

        for (var t : list) {
            if (type.equals(t.getKey())) {
                return t;
            }
        }
        return null;
    }

    /**
     * The method <code>findTopic</code> provides means to find a single topic.
     *
     * @param list the list to search in
     * @param key the topic key
     * @return the element found or {@code null} for none
     */
    private Topic findTopic(List<Topic> list, String key) {

        for (var t : list) {
            if (key.equals(t.getKey())) {
                return t;
            }
        }
        return null;
    }

    /**
     * The method <code>findUploaderRef</code> provides means to find an
     * uploader ref with a given key.
     *
     * @param list the list to search in
     * @param id the key
     * @return the element found or {@code null} for none
     */
    private UploaderRef findUploaderRef(List<UploaderRef> list,
        String id) {

        for (var ar : list) {
            if (id.equals(ar.getAuthor().getKey())) {
                return ar;
            }
        }
        return null;
    }

    /**
     * The method <code>readAlso</code> provides means to parse and update also
     * tags.
     *
     * <pre>
     * &lt;also
     *   refid="the-key"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     * @param todo list of unprocessed alsos
     */
    private void readAlso(Element el, Pkg pkg, List<ImportFunction> todo) {

        var alsoList = pkg.getAlso();
        if (alsoList == null) {
            alsoList = new ArrayList<Pkg>();
            pkg.setAlso(alsoList);
        }
        var deleteAlso = new ArrayList<Pkg>(alsoList);
        for (var also : el.getElementsByTag("also")) {
            var id = also.attribute("refid");
            if (id == null) {
                log.warn(pkg.getKey() + ": also attribute refid is missing");
                continue;
            }
            var key = id.getValue();
            Pkg p = null;
            for (var x : deleteAlso) {
                if (key.equals(x.getKey())) {
                    p = x;
                    break;
                }
            }
            if (p != null) {
                deleteAlso.remove(p);
            } else {
                todo.add(new AddAlsoFunction(pkg, key));
            }
        }
        alsoList.removeAll(deleteAlso);
    }

    /**
     * The method <code>readAuthors</code> provides means to retrieve the
     * authorref tags and update the database accordingly.
     *
     * <pre>
     * &lt;authorref
     *   id="the-key"
     *   active="false"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     * @param todo list of unprocessed authors
     */
    private void readAuthors(Element el, Pkg pkg, List<ImportFunction> todo) {

        var authorRefs = pkg.getAuthors();
        if (authorRefs == null) {
            authorRefs = new ArrayList<AuthorRef>();
            pkg.setAuthors(authorRefs);
        }
        var authorRefsToBeDeleted = new ArrayList<AuthorRef>(authorRefs);
        var authors = el.getElementsByTag("authorref");
        for (var ref : authors) {
            var aid = ref.attribute("id");
            if (aid == null) {
                log.warn(pkg.getKey() + ": authorref id is missing");
                continue;
            }
            var id = aid.getValue();
            var active = "true".equals(ref.hasAttr("active")
                ? ref.attribute("active").getValue()
                : "true");
            var ar = findAuthorRef(authorRefsToBeDeleted, id);
            if (ar == null) {
                todo.add(new AddAuthorFunction(pkg, id, active));
                continue;
            }
            ar.setActive(active);
            authorRefsToBeDeleted.remove(ar);
        }
        authorRefs.removeAll(authorRefsToBeDeleted);
    }

    /**
     * The method <code>readCaptions</code> provides means to read the captions
     * and update the database accordingly.
     *
     * <pre>
     * &lt;caption
     *   lang="locale"
     * &gt;
     *  caption text
     * &lt;/caption&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readCaptions(Element el, Pkg pkg) {

        var pkgCaptions = pkg.getCaptions();
        if (pkgCaptions == null) {
            pkgCaptions = new ArrayList<PkgCaption>();
            pkg.setCaptions(pkgCaptions);
        }
        var captionsToBeDeleted = new ArrayList<PkgCaption>(pkgCaptions);
        var captions = el.getElementsByTag("caption");
        for (var cap : captions) {
            var lang = cap.hasAttr("lang") ? cap.attr("lang") : "en";
            var text = cap.html();
            var pkgCap = findCaption(pkgCaptions, lang);
            if (pkgCap != null) {
                captionsToBeDeleted.remove(pkgCap);
                pkgCap.setLang(lang);
                pkgCap.setCaption(text);
                // }
                // var caption = findCaption(captionsToBeDeleted, lang);
                // if (caption != null) {
                // captionsToBeDeleted.remove(caption);
                // caption.setLang(lang);
                // caption.setCaption(text);
            } else {
                pkgCaptions.add(PkgCaption.builder()
                    .pkg(pkg)
                    .lang(lang)
                    .caption(text)
                    .build());
            }
        }
        pkgCaptions.removeAll(captionsToBeDeleted);
    }

    /**
     * The method <code>readContacts</code> provides means to read the contacts
     * and update the database accordingly.
     *
     * <pre>
     * &lt;contact
     *   type="announce"
     *   href="..."
     * /&gt;
     * &lt;contact
     *   type="bugs"
     *   href="..."
     * /&gt;
     * &lt;contact
     *   type="development"
     *   href="..."
     * /&gt;
     * &lt;contact
     *   type="home"
     *   href="..."
     * /&gt;
     * &lt;contact
     *   type="repository"
     *   href="..."
     * /&gt;
     * &lt;contact
     *   type="support"
     *   href="..."
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readContacts(Element el, Pkg pkg) {

        var contacts = el.getElementsByTag("contact");
        for (var c : contacts) {
            var href = c.attribute("href");
            if (href == null) {
                log.warn(pkg.getKey() + ": contact href is missing");
                continue;
            }
            var type = c.attribute("type");
            if (type == null) {
                log.warn(pkg.getKey() + ": contact type is missing");
                continue;
            }
            var t = type.getValue();
            var h = href.getValue();
            switch (t) {
                case "announce":
                    pkg.setAnnounce(h);
                    break;
                case "bugs":
                    pkg.setBugs(h);
                    break;
                case "home":
                    pkg.setHome(h);
                    break;
                case "development":
                    pkg.setDevelopment(h);
                    break;
                case "repository":
                    pkg.setRepository(h);
                    break;
                case "support":
                    pkg.setSupport(h);
                    break;
                default:
                    log.warn(pkg.getKey() + ": unknown contact type " + t);
            }
        }
    }

    /**
     * The method <code>readCopyrights</code> reads the copyright items.
     *
     * <pre>
     * &lt;copyright
     *   year="the year(s)"
     *   owner="the owner"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readCopyrights(Element el, Pkg pkg) {

        var pkgCopyright = pkg.getCopy();
        if (pkgCopyright == null) {
            pkgCopyright = new ArrayList<PkgCopyright>();
            pkg.setCopy(pkgCopyright);
        }
        var copiesToBeDeleted = new ArrayList<PkgCopyright>(pkgCopyright);
        var copyrights = el.getElementsByTag("copyright");
        for (var copyright : copyrights) {
            var owner = copyright.attr("owner");
            var year = copyright.attr("year");
            var cr = findCopyright(copiesToBeDeleted, owner, year);
            if (cr != null) {
                copiesToBeDeleted.remove(cr);
            } else {
                pkgCopyright.add(PkgCopyright.builder()
                    .pkg(pkg)
                    .owner(owner)
                    .year(year)
                    .build());
            }
        }
        pkgCopyright.removeAll(copiesToBeDeleted);
    }

    /**
     * The method <code>readCtan</code> reads the unique CTAN path or file
     * indicator.
     *
     * <pre>
     * &lt;ctan
     *   path="the relative path"
     *   file="true"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readCtan(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("ctan");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple ctan tags");
        }
        var path = tags.get(0).attribute("path");
        if (path != null) {
            pkg.setCtanPath(path.getValue());
        }
        var file = tags.get(0).attribute("file");
        if (file != null) {
            pkg.setCtanFile("true".equals(file.getValue()));
        }
    }

    /**
     * The method <code>readDescriptions</code> provides means to read the
     * descriptions and update the database accordingly.
     *
     * <pre>
     * &lt;description
     *   lang="locale"
     * &gt;
     *  caption text
     * &lt;/description&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readDescriptions(Element el, Pkg pkg) {

        var pkgDescription = pkg.getDescriptions();
        if (pkgDescription == null) {
            pkgDescription = new ArrayList<PkgDescription>();
            pkg.setDescriptions(pkgDescription);
        }
        var descsToBeDeleted = new ArrayList<PkgDescription>(pkgDescription);
        var descriptions = el.getElementsByTag("description");
        for (var desc : descriptions) {
            var lang = desc.hasAttr("lang") ? desc.attr("lang") : "en";
            var text = desc.html();
            var cr = findDescription(descsToBeDeleted, lang);
            if (cr != null) {
                descsToBeDeleted.remove(cr);
            } else {
                pkgDescription.add(PkgDescription.builder()
                    .pkg(pkg)
                    .lang(lang)
                    .description(text)
                    .build());
            }
        }
        pkgDescription.removeAll(descsToBeDeleted);
    }

    /**
     * The method <code>readDocumentations</code> provides means to read the
     * documentations and update the database accordingly.
     *
     * <pre>
     * &lt;documentation
     *   details="details"
     *   href="URL"
     *   lang="locale"
     *   title="title"
     *   author="author"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readDocumentations(Element el, Pkg pkg) {

        var pkgDocs = pkg.getDocs();
        if (pkgDocs == null) {
            pkgDocs = new ArrayList<PkgDoc>();
            pkg.setDocs(pkgDocs);
        }
        var docsToBeDeleted = new ArrayList<PkgDoc>(pkgDocs);
        var docs = el.getElementsByTag("documentation");
        for (var doc : docs) {
            if (!doc.hasAttr("href")) {
                log.warn(
                    pkg.getKey() + ": documentation attribute href is missing");
                continue;
            }
            var details = doc.hasAttr("details") ? doc.attr("details") : null;
            var href = doc.attr("href");
            var lang = doc.hasAttr("lang") ? doc.attr("lang") : "en";
            var title = doc.hasAttr("details") ? doc.attr("details") : null;
            var d = findDocumentation(docsToBeDeleted, href);
            if (d != null) {
                docsToBeDeleted.remove(d);
                d.setDetails(details);
                d.setLang(lang);
                d.setTitle(title);
            } else {
                pkgDocs.add(PkgDoc.builder()
                    .pkg(pkg)
                    .details(details)
                    .href(href)
                    .lang(lang)
                    .title(title)
                    .build());
            }
        }
        pkgDocs.removeAll(docsToBeDeleted);
    }

    /**
     * The method <code>readInstall</code> provides means to read the install
     * tag and update the database accordingly.
     *
     * <pre>
     * &lt;install
     *   path="the target"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readInstall(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("install");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple install tags");
        }
        var path = tags.get(0).attribute("path");
        pkg.setInstallPath(path != null ? path.getValue() : null);
    }

    /**
     * The method <code>readKeyval</code> provides means to read the topic tags
     * and update the database accordingly.
     *
     * <pre>
     * &lt;keyval
     *   key="topic"
     *   value="the-topic-key"
     * /&gt;
     * ...
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readKeyval(Element el, Pkg pkg) {

        var pkgTopics = pkg.getTopics();
        if (pkgTopics == null) {
            pkgTopics = new ArrayList<Topic>();
            pkg.setTopics(pkgTopics);
        }
        var topicsToBeDeleted = new ArrayList<Topic>(pkgTopics);
        var keyvals = el.getElementsByTag("keyval");
        for (var keyval : keyvals) {
            if (!keyval.hasAttr("key")
                || !"topic"
                    .equals(keyval.attribute("key").getValue())) {
                continue;
            }
            if (!keyval.hasAttr("value")) {
                log.warn(pkg.getKey() + ": keyval attribute value is missing");
                continue;
            }
            var t = keyval.attr("value");
            var to = findTopic(topicsToBeDeleted, t);
            if (to != null) {
                topicsToBeDeleted.remove(to);
                continue;
            }
            var topic = topicsStore.getByKey(t);
            if (topic != null) {
                pkgTopics.add(topic);
            } else {
                log.warn(pkg.getKey() + ": topic not found: " + t);
            }
        }
        pkgTopics.removeAll(topicsToBeDeleted);
    }

    /**
     * The method <code>readLicenses</code> provides means to read the license
     * tags and update the database accordingly.
     *
     * <pre>
     * &lt;license
     *   type="license-key"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readLicenses(Element el, Pkg pkg) {

        var pkgLics = pkg.getLicenses();
        if (pkgLics == null) {
            pkgLics = new ArrayList<License>();
            pkg.setLicenses(pkgLics);
        }
        var licsToBeDeleted = new ArrayList<License>(pkgLics);
        var lics = el.getElementsByTag("license");
        for (var license : lics) {
            if (!license.hasAttr("type")) {
                log.warn(pkg.getKey() + ": license attribute type is missing");
                continue;
            }
            var type = license.attr("type");
            var cr = findLicense(licsToBeDeleted, type);
            if (cr != null) {
                licsToBeDeleted.remove(cr);
                continue;
            }
            var lic = licenseStore.getByKey(type);
            if (lic == null) {
                log.warn(pkg.getKey() + ": license not found: " + type);
            } else {
                pkgLics.add(lic);
            }
        }
        pkgLics.removeAll(licsToBeDeleted);
    }

    /**
     * The method <code>readMikTex</code> reads the unique miktex location.
     *
     * <pre>
     * &lt;miktex
     *   location="the location"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readMikTex(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("miktex");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple miktex tags");
        }
        var location = tags.get(0).attribute("location");
        pkg.setMiktexLocation(location != null ? location.getValue() : null);
    }

    /**
     * The method <code>readName</code> reads the unique name otr reverts to the
     * key.
     *
     * <pre>
     * &lt;name&gt;
     *   the name
     * &lt;/name&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readName(Element el, Pkg pkg) {

        var names = el.getElementsByTag("name");
        if (names.isEmpty()) {
            pkg.setName(pkg.getKey());
            return;
        } else if (names.size() > 1) {
            throw new IllegalArgumentException("multiple name tags");
        }
        pkg.setName(names.get(0).text());
    }

    /**
     * The method <code>readTexLive</code> reads the unique texlive location.
     *
     * <pre>
     * &lt;texlive
     *   location="the location"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readTexLive(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("texlive");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple texlive tags");
        }
        var location = tags.get(0).attribute("location");
        pkg.setTexliveLocation(location != null ? location.getValue() : null);
    }

    /**
     * The method <code>readTlContrib</code> reads the unique tlcontrib
     * location.
     *
     * <pre>
     * &lt;tlcontrib
     *   location="the location"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readTlContrib(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("tlcontrib");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple tlcontrib tags");
        }
        var location = tags.get(0).attribute("location");
        pkg.setTlContribLocation(location != null ? location.getValue() : null);
    }

    /**
     * The method <code>readUploaders</code> provides means to retrieve the
     * uploader tags and update the database accordingly.
     *
     * <pre>
     * &lt;uploader
     *   id="the-key"
     *   active="false"
     *   date="..."
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     * @param todo list of unprocessed authors
     */
    private void readUploaders(Element el, Pkg pkg, List<ImportFunction> todo) {

        var uploaders = pkg.getUploaders();
        if (uploaders == null) {
            uploaders = new ArrayList<UploaderRef>();
            pkg.setUploaders(uploaders);
        }
        var deleteList = new ArrayList<UploaderRef>(uploaders);
        var ups = el.getElementsByTag("uploaderref");
        for (var ref : ups) {
            var aid = ref.attribute("id");
            if (aid == null) {
                log.warn(pkg.getKey() + ": uploaderref id is missing");
                continue;
            }
            var id = aid.getValue();
            var active = "true".equals(ref.hasAttr("active")
                ? ref.attribute("active").getValue()
                : "true");
            var ur = findUploaderRef(deleteList, id);
            if (ur == null) {
                todo.add(new AddUploaderFunction(pkg, id, active,
                    ref.hasAttr("date")
                        ? ref.attribute("date").getValue()
                        : ""));
                continue;
            }
            ur.setActive(active);
            deleteList.remove(ur);
        }
        uploaders.removeAll(deleteList);
    }

    /**
     * The method <code>readVersion</code> reads the unique version number or
     * date.
     *
     * <pre>
     * &lt;version
     *   number="the number"
     *   date="the date"
     * /&gt;
     * </pre>
     *
     * @param el the current element node
     * @param pkg the package to store the values in
     */
    private void readVersion(Element el, Pkg pkg) {

        var tags = el.getElementsByTag("version");
        if (tags.isEmpty()) {
            return;
        } else if (tags.size() > 1) {
            throw new IllegalArgumentException("multiple version tags");
        }
        var number = tags.get(0).attribute("number");
        pkg.setVersionNumber(number != null ? number.getValue() : null);
        var date = tags.get(0).attribute("date");
        pkg.setVersionDate(date != null ? date.getValue() : null);
    }

    /**
     * The method <code>updateExisting</code> provides means to parse an XML
     * file and add the entries found to the database. As a side effect a list
     * of packages is collected which are not in the XML files.
     *
     * @return the packages to be dropped
     * @throws IOException in case of an I/O error
     */
    public List<Pkg> updateExisting()
        throws IOException {

        var pkgsToBeDropped = new ArrayList<Pkg>(store.findAll());
        File[] listFiles = entries.listFiles(f -> f.isDirectory());
        if (listFiles == null) {
            throw new FileNotFoundException();
        }
        for (var dir : listFiles) {
            File[] list = dir
                .listFiles(f -> f.getName().endsWith(".xml") && f.isFile());
            if (list == null) {
                throw new FileNotFoundException();
            }
            for (var f : list) {
                updateFile(f, pkgsToBeDropped);
            }
        }
        return pkgsToBeDropped;
    }

    /**
     * The method <code>updateFile</code> provides means to read a single XML
     * file with one package.
     *
     * @param file the XML file
     * @param pkgsToBeDropped the list of the packages to be dropped
     * @throws IOException in case of an I/O error
     */
    private void updateFile(File file, List<Pkg> pkgsToBeDropped)
        throws IOException {

        var doc = Jsoup.parse(file, "UTF-8", "", Parser.xmlParser());
        var entryElements = doc.getElementsByTag("entry");
        for (var el : entryElements) {
            updatePkg(el, pkgsToBeDropped);
        }
    }

    /**
     * The method <code>updateTopic</code> provides means to update an author
     * from a DOM element.
     *
     * @param el the DOM element
     * @param authors the authors
     * @return the authors
     */
    private void updatePkg(Element el, List<Pkg> pkgsToBeDropped) {

        var key = el.attr("id");
        var pkg = store.getByKey(key);
        if (pkg == null) {
            pkg = Pkg.builder()
                .key(key)
                .build();
        } else {
            pkgsToBeDropped.remove(pkg);
            pkg.setKey(key);
        }
        List<ImportFunction> todo = new ArrayList<ImportFunction>();
        readAlso(el, pkg, todo);
        readAuthors(el, pkg, todo);
        readCaptions(el, pkg);
        readContacts(el, pkg);
        readCopyrights(el, pkg);
        readCtan(el, pkg);
        readDescriptions(el, pkg);
        readDocumentations(el, pkg);
        readInstall(el, pkg);
        readKeyval(el, pkg);
        readLicenses(el, pkg);
        readMikTex(el, pkg);
        readName(el, pkg);
        readTexLive(el, pkg);
        readTlContrib(el, pkg);
        readUploaders(el, pkg, todo);
        readVersion(el, pkg);
        for (var t : todo) {
            if (!t.run()) {
                log.warn(t.toString() + " failed");
            }
        }
        store.save(pkg);
    }
}