/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.law.warc.tool;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import hep.aida.bin.StaticBin1D;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.log4j.Logger;

public class GZWarcStats {
    private static final Logger LOGGER = Logger.getLogger(GZWarcStats.class);
    static final int IO_BUFFER_SIZE = 65536;

    public static long run(FastBufferedInputStream in, StaticBin1D uncompressedSize, StaticBin1D compressedSize, StaticBin1D compressionRatio) throws IOException, WarcRecord.FormatException {
        GZWarcRecord r = new GZWarcRecord();
        ProgressLogger pl = new ProgressLogger(LOGGER, "records");
        pl.logInterval = 10000L;
        pl.start((CharSequence)"Analyzing...");
        while (r.read(in) != -1L) {
            GZWarcRecord.GZHeader gzheader = r.gzheader;
            compressedSize.add((double)gzheader.compressedSkipLength);
            uncompressedSize.add((double)gzheader.uncompressedSkipLength);
            compressionRatio.add((double)((int)(100.0 * (double)gzheader.compressedSkipLength / (double)gzheader.uncompressedSkipLength)));
            pl.update();
        }
        pl.done();
        return pl.count;
    }

    public static void main(String[] arg) throws Exception {
        SimpleJSAP jsap = new SimpleJSAP(GZWarcStats.class.getName(), "Compute some statistics about a gzipped warc file.", new Parameter[]{new Switch("html", 'h', "html", "Generate output in HTML format."), new Switch("headers", 'H', "header", "Generate HTML table headers format."), new UnflaggedOption("warcFile", (StringParser)JSAP.STRING_PARSER, "-", true, false, "The gzipped Warc file basename (if not present, or -, stdin/stdout will be used).")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        String warcFile = jsapResult.getString("warcFile");
        boolean html = jsapResult.getBoolean("html");
        boolean headers = jsapResult.getBoolean("headers");
        StaticBin1D uncompressedSize = new StaticBin1D();
        StaticBin1D compressedSize = new StaticBin1D();
        StaticBin1D compressionRatio = new StaticBin1D();
        FastBufferedInputStream in = new FastBufferedInputStream(warcFile.equals("-") ? System.in : new FileInputStream(new File(warcFile + ".warc.gz")), 65536);
        long n = GZWarcStats.run(in, uncompressedSize, compressedSize, compressionRatio);
        in.close();
        if (html) {
            if (headers) {
                System.out.println("<TABLE border='1'>");
                System.out.println("<TR><TH rowspan='2'>Name<TH rowspan='2'>Num.<br>Records<TH colspan='5'>Compressed byte size<TH colspan='5'>Uncompressed byte size<TH colspan='5'>Compression ratio (%)");
                System.out.println("<TR><TH>min<TH>max<TH>average<TH>stdev<TH>sum<TH>min<TH>max<TH>average<TH>stdev<TH>sum<TH>min<TH>max<TH>average<TH>stdev");
            }
            System.out.print("<tr><td>" + warcFile + "<td>" + n);
            System.out.print("<td>" + (long)compressedSize.min() + "<td>" + (long)compressedSize.max() + "<td>" + (double)((int)(100.0 * compressedSize.mean())) / 100.0 + "<td>" + (double)((int)(100.0 * compressedSize.standardDeviation())) / 100.0 + "<td>" + (long)compressedSize.sum());
            System.out.print("<td>" + (long)uncompressedSize.min() + "<td>" + (long)uncompressedSize.max() + "<td>" + (double)((int)(100.0 * uncompressedSize.mean())) / 100.0 + "<td>" + (double)((int)(100.0 * uncompressedSize.standardDeviation())) / 100.0 + "<td>" + (long)uncompressedSize.sum());
            System.out.print("<td>" + (long)compressionRatio.min() + "<td>" + (long)compressionRatio.max() + "<td>" + (double)((int)(100.0 * compressionRatio.mean())) / 100.0 + "<td>" + (double)((int)(100.0 * compressionRatio.standardDeviation())) / 100.0);
            System.out.println();
            if (headers) {
                System.out.println("</TABLE>");
            }
        } else {
            System.out.println("Records: " + n);
            System.out.println("Compressed size: min = " + (long)compressedSize.min() + ", max = " + (long)compressedSize.max() + ", avg = " + compressedSize.mean() + ", sd = " + compressedSize.standardDeviation() + ", sum = " + (long)compressedSize.sum());
            System.out.println("Uncompressed size: min = " + (long)uncompressedSize.min() + ", max = " + (long)uncompressedSize.max() + ", avg = " + uncompressedSize.mean() + ", sd = " + uncompressedSize.standardDeviation() + ", sum = " + (long)uncompressedSize.sum());
            System.out.println("Compression ratio: min = " + (long)compressionRatio.min() + "%, max = " + (long)compressionRatio.max() + "%, avg = " + compressionRatio.mean() + "%, sd = " + compressionRatio.standardDeviation() + "%");
        }
    }
}

