/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.document;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentCollection;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.document.ZipDocumentCollection;
import it.unimi.dsi.mg4j.tool.Scan;
import it.unimi.dsi.mg4j.util.MG4JClassParser;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.lang.reflect.InvocationTargetException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ZipDocumentCollectionBuilder {
    private static final Logger LOGGER = Util.getLogger(ZipDocumentCollectionBuilder.class);
    private static final boolean DEBUG = false;
    private ZipOutputStream zipOut;
    private int numberOfDocuments;
    private boolean exact;
    private final ProgressLogger progressLogger;
    private final String zipFilename;
    private final DocumentFactory factory;
    private boolean inTextField;

    public ZipDocumentCollectionBuilder(String zipFilename, DocumentFactory factory, boolean exact, ProgressLogger progressLogger) throws FileNotFoundException {
        this.zipFilename = zipFilename;
        this.factory = factory;
        this.zipOut = new ZipOutputStream(new FileOutputStream(zipFilename));
        this.exact = exact;
        this.progressLogger = progressLogger;
        this.inTextField = false;
    }

    public void startDocument(CharSequence title, CharSequence uri) throws IOException {
        ZipEntry currEntry = new ZipEntry(Integer.toString(this.numberOfDocuments));
        currEntry.setComment(((Object)title).toString());
        this.zipOut.putNextEntry(currEntry);
        new MutableString(uri).writeSelfDelimUTF8((OutputStream)this.zipOut);
    }

    public void endDocument() throws IOException {
        this.zipOut.closeEntry();
        ++this.numberOfDocuments;
    }

    public void startTextField() {
        this.inTextField = true;
    }

    public void nonTextField(Object o) throws IOException {
        ObjectOutputStream oos = new ObjectOutputStream(this.zipOut);
        oos.writeObject(o);
        oos.flush();
    }

    public void virtualField(ObjectList<Scan.VirtualDocumentFragment> fragments) throws IOException {
        new MutableString().append(String.valueOf(fragments.size())).writeSelfDelimUTF8((OutputStream)this.zipOut);
        for (Scan.VirtualDocumentFragment fragment : fragments) {
            fragment.documentSpecifier().writeSelfDelimUTF8((OutputStream)this.zipOut);
            fragment.text().writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
    }

    public void endTextField() throws IOException {
        if (!this.inTextField) {
            throw new IllegalStateException();
        }
        this.inTextField = false;
        this.zipOut.write(0);
        if (this.exact) {
            this.zipOut.write(0);
        }
    }

    public void add(MutableString word, MutableString nonWord) throws IOException {
        if (!this.inTextField) {
            return;
        }
        if (this.exact || word.length() > 0) {
            word.writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
        if (this.exact) {
            nonWord.writeSelfDelimUTF8((OutputStream)this.zipOut);
        }
    }

    public ZipDocumentCollection close() throws IOException {
        this.zipOut.close();
        return new ZipDocumentCollection(this.zipFilename, this.factory, this.numberOfDocuments, this.exact);
    }

    public ZipDocumentCollection build(DocumentSequence inputSequence) throws IOException {
        this.progressLogger.start((CharSequence)"Zipping collection...");
        this.numberOfDocuments = 0;
        DocumentIterator docIt = inputSequence.iterator();
        if (this.factory != inputSequence.factory()) {
            throw new IllegalStateException("The factory provided by the constructor does not correspond to the factory of the input sequence");
        }
        int numberOfFields = this.factory.numberOfFields();
        MutableString word = new MutableString();
        MutableString nonWord = new MutableString();
        while (true) {
            this.progressLogger.update();
            Document document = docIt.nextDocument();
            if (document == null) break;
            this.startDocument(document.title(), document.uri());
            for (int field = 0; field < numberOfFields; ++field) {
                Object content = document.content(field);
                if (this.factory.fieldType(field) == DocumentFactory.FieldType.TEXT) {
                    this.startTextField();
                    WordReader wordReader = document.wordReader(field);
                    wordReader.setReader((Reader)content);
                    while (wordReader.next(word, nonWord)) {
                        this.add(word, nonWord);
                    }
                    this.endTextField();
                    continue;
                }
                if (this.factory.fieldType(field) == DocumentFactory.FieldType.VIRTUAL) {
                    this.virtualField((ObjectList<Scan.VirtualDocumentFragment>)((ObjectList)content));
                    continue;
                }
                this.nonTextField(content);
            }
            document.close();
            this.endDocument();
        }
        this.progressLogger.done();
        docIt.close();
        return this.close();
    }

    public static void main(String[] arg) throws JSAPException, IOException, ClassNotFoundException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, InstantiationException {
        SimpleJSAP jsap = new SimpleJSAP(ZipDocumentCollectionBuilder.class.getName(), "Produces a zip document collection from an existing document sequence.", new Parameter[]{new FlaggedOption("sequence", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'S', "sequence", "A serialised document sequence that will be used instead of stdin."), new FlaggedOption("factory", (StringParser)MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new FlaggedOption("delimiter", (StringParser)JSAP.INTEGER_PARSER, Integer.toString(10), false, 'd', "delimiter", "The document delimiter."), new Switch("approximated", 'a', "approximated", "If specified, non-words will not be copied."), new FlaggedOption("logInterval", (StringParser)JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new UnflaggedOption("collection", (StringParser)JSAP.STRING_PARSER, true, "The filename for the output document collection."), new UnflaggedOption("zipfile", (StringParser)JSAP.STRING_PARSER, true, "The filename for the output zip file.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        DocumentSequence documentSequence = Scan.getSequence(jsapResult.getString("sequence"), jsapResult.getClass("factory"), jsapResult.getStringArray("property"), jsapResult.getInt("delimiter"), LOGGER);
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "documents");
        if (documentSequence instanceof DocumentCollection) {
            progressLogger.expectedUpdates = ((DocumentCollection)documentSequence).size();
        }
        ZipDocumentCollectionBuilder builder = new ZipDocumentCollectionBuilder(jsapResult.getString("zipfile"), documentSequence.factory(), !jsapResult.getBoolean("approximated"), progressLogger);
        BinIO.storeObject((Object)builder.build(documentSequence), (CharSequence)jsapResult.getString("collection"));
    }
}

