MirrMonService.java

/*
 * Copyright © 2024-2025 The CTAN Team and individual authors
 *
 * This file is distributed under the 3-clause BSD license.
 * See file LICENSE for details.
 */
package org.ctan.site.services.mirrors;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.commons.io.IOUtils;
import org.ctan.site.CtanConfiguration.MirrMonConfig;
import org.ctan.site.services.util.NullCheck;
import org.jsoup.Jsoup;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import lombok.NonNull;

/**
 * The class <code>MirrMonService</code> contains the mirror monitor service.
 */
public class MirrMonService {

    /**
     * The field <code>config</code> contains the configuration.
     */
    private MirrMonConfig config;

    /**
     * This is the constructor for <code>MirrMonService</code>.
     *
     * @param config the configuration
     */
    @SuppressFBWarnings(value = "CT_CONSTRUCTOR_THROW")
    public MirrMonService(@NonNull MirrMonConfig config) {

        this.config = config;
    }

    /**
     * The method <code>get</code> provides means to retrieve the summary HTML
     * page.
     *
     * @return the transformed HTML page
     * @throws URISyntaxException in case of an error in the URL
     * @throws MalformedURLException in case of an error in the URL
     * @throws IOException in case of an I/O error
     */
    public String get()
        throws MalformedURLException,
            IOException,
            URISyntaxException {

        var configUrl = config.getUrl();
        NullCheck.isNotNull(configUrl, "config.url");
        String url = IOUtils.toString(new URI(configUrl).toURL(), "UTF-8");
        var doc = Jsoup.parse(url);
        var logo = doc.getElementById("logo");
        if (logo != null) {
            logo.remove();
        }
        return doc.body().html()
            .replaceAll("\"icons/", "\"/mirrmon/")
            .replaceAll("border=\"[0-9]+\"", "")
            .replaceAll("--", "–")
            .replaceAll("<td>:</td>", "")
            .replaceAll("<h2>the status of", "<h2>The status of")
            .replaceAll("<h2>re", "<h2>Re")
            .replaceAll("<h3>legend", "<h3>Legend")
            .replaceAll(">probe results", ">Probe results")
            .replaceAll(">software", ">Software")
            .replaceAll("<h4>last", "<h4>Last")
            .replaceAll("\">age histogram", "\">Age histogram")
            .replaceAll("table +cellpadding=\"5\"", "table class=\"mm-5\"")
            .replace("table class=\"mm-5\"", "table class=\"mm-5a\"")
            .replaceAll("table +cellspacing=\"0\" cellpadding=\"1\"",
                "table class=\"mm-1\"")
            .replaceAll("<p>.*img.*</p>", "")
            .replaceAll("bgcolor=\"YELLOW\"", "class=\"bg-YELLOW\"")
            .replaceAll("bgcolor=\"AQUA\"", "class=\"bg-AQUA\"")
            .replaceAll("bgcolor=\"LIME\"", "class=\"bg-LIME\"")
            .replaceAll(
                "<th><a href=\"http://www.staff.science.uu.nl/~penni101/mirmon/\"><img  alt=\"mirmon\" src=\"/mirrmon/mirmon.gif\"></a></th>",
                "")
            .replaceAll(
                "<p><a href=\"http://validator.w3.org/check?uri=referer\"><img .*",
                "");
    }
}