/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.law.warc.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.law.warc.filters.Filter;
import it.unimi.dsi.law.warc.filters.parser.FilterParser;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.HttpResponseFilteredIterator;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.util.HttpResponse;
import it.unimi.dsi.law.warc.util.WarcHttpResponse;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ExtractDigestUrls {
    private static final Logger LOGGER = Logger.getLogger(ExtractDigestUrls.class);
    public static final String DEFAULT_BUFFER_SIZE = "64Ki";

    public static void run(FastBufferedInputStream in, boolean isGZipped, Filter<HttpResponse> filter, PrintWriter pw) throws IOException {
        WarcRecord record = isGZipped ? new GZWarcRecord() : new WarcRecord();
        ProgressLogger pl = new ProgressLogger(LOGGER, 60000L, "records");
        WarcHttpResponse response = new WarcHttpResponse();
        HttpResponseFilteredIterator it = new HttpResponseFilteredIterator(in, record, response, filter);
        pl.start((CharSequence)"Listing...");
        long pos = -1L;
        WarcRecord.Header header = null;
        try {
            while (it.hasNext()) {
                pos = in.position();
                it.next();
                header = record.header;
                String digest = header.anvlFields.get("BUbiNG-content-digest");
                if (digest == null) {
                    digest = header.recordId.toString();
                }
                boolean isDuplicate = Boolean.valueOf(header.anvlFields.get("BUbiNG-isduplicate"));
                pw.println(digest + "\t" + header.subjectUri + "\t" + (isDuplicate ? "D" : "N"));
                pl.update();
            }
        }
        catch (RuntimeException e) {
            System.err.println("Got " + e);
            System.err.println("Position: " + pos + ", last url header:\n" + header);
            throw e;
        }
        pl.done();
    }

    public static void main(String[] arg) throws Exception {
        SimpleJSAP jsap = new SimpleJSAP(ExtractDigestUrls.class.getName(), "Extracts digests and URLs from http response records of a WARC file.", new Parameter[]{new Switch("gzip", 'z', "gzip", "Tells if the warc is compressed."), new FlaggedOption("filter", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'f', "filter", "The filter."), new FlaggedOption("bufferSize", (StringParser)JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, false, 'b', "buffer-size", "The size of an I/O buffer."), new UnflaggedOption("warcFile", (StringParser)JSAP.STRING_PARSER, "-", true, false, "The Warc input file basename (if not present, or -, stdin will be used).")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        boolean isGZipped = jsapResult.getBoolean("gzip");
        String filterSting = jsapResult.getString("filter") == null ? "TRUE" : jsapResult.getString("filter");
        int bufferSize = jsapResult.getInt("bufferSize");
        String warcFile = jsapResult.getString("warcFile");
        Filter<HttpResponse> filter = new FilterParser<HttpResponse>(HttpResponse.class).parse(filterSting);
        FastBufferedInputStream in = new FastBufferedInputStream(warcFile.equals("-") ? System.in : new FileInputStream(new File(warcFile + ".warc" + (isGZipped ? ".gz" : ""))), bufferSize);
        PrintWriter pw = new PrintWriter(new OutputStreamWriter((OutputStream)new FastBufferedOutputStream((OutputStream)System.out, bufferSize), "ASCII"));
        ExtractDigestUrls.run(in, isGZipped, filter, pw);
        in.close();
        pw.close();
    }
}

