/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.tool;

import cern.colt.GenericSorting;
import cern.colt.Sorting;
import cern.colt.Swapper;
import cern.colt.function.IntComparator;
import cern.colt.function.LongComparator;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.ParseException;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.LongSizeStringParser;
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
import it.unimi.dsi.fastutil.objects.Object2ReferenceOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectList;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentCollection;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.document.InputStreamDocumentSequence;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.mg4j.document.ZipDocumentCollectionBuilder;
import it.unimi.dsi.mg4j.index.BitStreamIndexWriter;
import it.unimi.dsi.mg4j.index.CompressionFlags;
import it.unimi.dsi.mg4j.index.DowncaseTermProcessor;
import it.unimi.dsi.mg4j.index.FileIndex;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.NullTermProcessor;
import it.unimi.dsi.mg4j.index.TermProcessor;
import it.unimi.dsi.mg4j.index.cluster.ContiguousDocumentalStrategy;
import it.unimi.dsi.mg4j.index.cluster.DocumentalConcatenatedCluster;
import it.unimi.dsi.mg4j.index.cluster.DocumentalMergedCluster;
import it.unimi.dsi.mg4j.index.cluster.IdentityDocumentalStrategy;
import it.unimi.dsi.mg4j.index.cluster.IndexCluster;
import it.unimi.dsi.mg4j.index.payload.DatePayload;
import it.unimi.dsi.mg4j.index.payload.IntegerPayload;
import it.unimi.dsi.mg4j.index.payload.Payload;
import it.unimi.dsi.mg4j.io.ByteArrayPostingList;
import it.unimi.dsi.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.mg4j.util.MG4JClassParser;
import it.unimi.dsi.util.Properties;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.Map;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Scan {
    private static final Logger LOGGER = Util.getLogger(Scan.class);
    private static final boolean ASSERTS = false;
    private static final String CLUSTER_STRATEGY_EXTENSION = ".cluster.strategy";
    public static final String CLUSTER_PROPERTIES_EXTENSION = ".cluster.properties";
    private static final int TERM_REPORT_STEP = 1000000;
    private static final int INITIAL_TERM_MAP_SIZE = 1000;
    private final TermProcessor termProcessor;
    private final String basename;
    private final String field;
    private final int bufferSize;
    private final File batchDir;
    final Map<CompressionFlags.Component, CompressionFlags.Coding> flags;
    private Object2ReferenceOpenHashMap<MutableString, ByteArrayPostingList> termMap;
    private OutputBitStream sizes;
    private long totOccurrences;
    private long totPostings;
    private int totDocuments;
    private int maxCount;
    private int globMaxDocSize;
    private int documentCount;
    private int numTerms;
    int maxDocSize;
    private int batch;
    private int numOccurrences;
    public boolean outOfMemoryError;
    private final IndexingType indexingType;
    private final boolean indexingIsStandard;
    private final boolean indexingIsRemapped;
    private final boolean indexingIsVirtual;
    private int occsInCurrDoc;
    protected int[] currMaxPos;
    private int maxDocInBatch;
    protected int virtualDocumentGap;
    private final ZipDocumentCollectionBuilder builder;
    protected final IntArrayList cutPoints;
    final MutableString word = new MutableString();
    final MutableString nonWord = new MutableString();
    public static final int DEFAULT_DELIMITER = 10;
    public static final int DEFAULT_BATCH_SIZE = 100000;
    public static final int DEFAULT_BUFFER_SIZE = 65536;
    public static final int DEFAULT_VIRTUAL_DOCUMENT_GAP = 64;

    public Scan(String basename, String field, TermProcessor termProcessor, boolean documentsAreInOrder, int bufferSize, ZipDocumentCollectionBuilder builder, File batchDir) throws FileNotFoundException {
        this(basename, field, termProcessor, documentsAreInOrder ? IndexingType.STANDARD : IndexingType.VIRTUAL, 0, 0, bufferSize, builder, batchDir);
    }

    public Scan(String basename, String field, TermProcessor termProcessor, IndexingType indexingType, int bufferSize, ZipDocumentCollectionBuilder builder, File batchDir) throws FileNotFoundException {
        this(basename, field, termProcessor, indexingType, 0, 0, bufferSize, builder, batchDir);
    }

    public Scan(String basename, String field, TermProcessor termProcessor, IndexingType indexingType, int numVirtualDocs, int virtualDocumentGap, int bufferSize, ZipDocumentCollectionBuilder builder, File batchDir) throws FileNotFoundException {
        this.basename = basename;
        this.field = field;
        this.indexingType = indexingType;
        this.termProcessor = termProcessor;
        this.bufferSize = bufferSize;
        this.builder = builder;
        this.batchDir = batchDir;
        this.virtualDocumentGap = virtualDocumentGap;
        this.cutPoints = new IntArrayList();
        this.cutPoints.add(0);
        this.termMap = new Object2ReferenceOpenHashMap(1000, 0.5f);
        this.flags = new EnumMap<CompressionFlags.Component, CompressionFlags.Coding>(CompressionFlags.DEFAULT_STANDARD_INDEX);
        this.maxDocInBatch = -1;
        this.indexingIsStandard = indexingType == IndexingType.STANDARD;
        this.indexingIsRemapped = indexingType == IndexingType.REMAPPED;
        boolean bl = this.indexingIsVirtual = indexingType == IndexingType.VIRTUAL;
        if (this.indexingIsVirtual && virtualDocumentGap == 0) {
            throw new IllegalArgumentException("Illegal virtual document gap: " + virtualDocumentGap);
        }
        if (this.indexingIsVirtual) {
            this.currMaxPos = new int[numVirtualDocs];
        }
        this.openSizeBitStream();
    }

    public static void cleanup(String basename, int batches, File batchDir) throws IOException {
        String basepath = (batchDir != null ? new File(basename) : new File(basename)).getCanonicalPath();
        new File(basepath.toString() + CLUSTER_STRATEGY_EXTENSION).delete();
        new File(basepath.toString() + CLUSTER_PROPERTIES_EXTENSION).delete();
        for (int i = 0; i < batches; ++i) {
            String batchBasename = Scan.batchBasename(i, basename, batchDir);
            new File(batchBasename + ".frequencies").delete();
            new File(batchBasename + ".globcounts").delete();
            new File(batchBasename + ".index").delete();
            new File(batchBasename + ".offsets").delete();
            new File(batchBasename + ".sizes").delete();
            new File(batchBasename + ".stats").delete();
            new File(batchBasename + ".properties").delete();
            new File(batchBasename + ".terms").delete();
            new File(batchBasename + ".terms.unsorted").delete();
        }
    }

    protected static String batchBasename(int batch, String basename, File batchDir) {
        return batchDir != null ? new File(batchDir, basename + "@" + batch).toString() : basename + "@" + batch;
    }

    protected long dumpBatch() throws IOException, ConfigurationException {
        this.outOfMemoryError = false;
        String batchBasename = Scan.batchBasename(this.batch, this.basename, this.batchDir);
        LOGGER.debug((Object)("Generating index " + batchBasename + "; documents: " + this.documentCount + "; terms:" + this.numTerms + "; occurrences: " + this.numOccurrences));
        Object[] termArray = (MutableString[])this.termMap.keySet().toArray((Object[])new MutableString[this.numTerms]);
        if (!this.indexingIsVirtual) {
            this.sizes.close();
        }
        Sorting.quickSort((Object[])termArray);
        PrintWriter pw = new PrintWriter(new OutputStreamWriter((OutputStream)new FastBufferedOutputStream((OutputStream)new FileOutputStream(batchBasename + ".terms"), this.bufferSize), "UTF-8"));
        for (Object t : termArray) {
            t.println(pw);
        }
        pw.close();
        try {
            int maxCount;
            OutputBitStream frequencies = new OutputBitStream(batchBasename + ".frequencies");
            OutputBitStream globCounts = new OutputBitStream(batchBasename + ".globcounts");
            if (this.indexingIsStandard) {
                OutputBitStream index = new OutputBitStream(batchBasename + ".index");
                OutputBitStream offsets = new OutputBitStream(batchBasename + ".offsets");
                maxCount = 0;
                long postings = 0L;
                long prevOffset = 0L;
                offsets.writeGamma(0);
                for (int i = 0; i < this.numTerms; ++i) {
                    ByteArrayPostingList baobs = (ByteArrayPostingList)this.termMap.get(termArray[i]);
                    int frequency = baobs.frequency;
                    baobs.flush();
                    if (maxCount < baobs.maxCount) {
                        maxCount = baobs.maxCount;
                    }
                    long bitLength = baobs.writtenBits();
                    baobs.align();
                    postings += (long)frequency;
                    index.writeGamma(frequency - 1);
                    if (frequency == this.documentCount) {
                        baobs.stripPointers(index, bitLength);
                    } else {
                        index.write(baobs.buffer, bitLength);
                    }
                    frequencies.writeGamma(frequency);
                    globCounts.writeLongGamma(baobs.globCount);
                    offsets.writeLongGamma(index.writtenBits() - prevOffset);
                    prevOffset = index.writtenBits();
                }
                this.totPostings += postings;
                Properties properties = new Properties();
                properties.setProperty((Enum)Index.PropertyKeys.DOCUMENTS, this.documentCount);
                properties.setProperty((Enum)Index.PropertyKeys.TERMS, this.numTerms);
                properties.setProperty((Enum)Index.PropertyKeys.POSTINGS, postings);
                properties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, maxCount);
                properties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)FileIndex.class.getName());
                properties.addProperty((Enum)Index.PropertyKeys.CODING, (Object)"FREQUENCIES:GAMMA");
                properties.addProperty((Enum)Index.PropertyKeys.CODING, (Object)"POINTERS:DELTA");
                properties.addProperty((Enum)Index.PropertyKeys.CODING, (Object)"COUNTS:GAMMA");
                properties.addProperty((Enum)Index.PropertyKeys.CODING, (Object)"POSITIONS:DELTA");
                properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)ObjectParser.toSpec((Object)this.termProcessor));
                properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, this.numOccurrences);
                properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, this.maxDocSize);
                properties.setProperty((Enum)Index.PropertyKeys.SIZE, index.writtenBits());
                if (this.field != null) {
                    properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
                }
                properties.save(batchBasename + ".properties");
                index.close();
                offsets.close();
            } else {
                BitStreamIndexWriter indexWriter = new BitStreamIndexWriter(batchBasename, this.maxDocInBatch + 1, true, this.flags);
                maxCount = 0;
                int maxFrequency = 0;
                for (ByteArrayPostingList b : this.termMap.values()) {
                    b.flush();
                    b.align();
                    if (maxFrequency < b.frequency) {
                        maxFrequency = b.frequency;
                    }
                    if (maxCount >= b.maxCount) continue;
                    maxCount = b.maxCount;
                }
                final long[] bitPos = new long[maxFrequency];
                final int[] pointer = new int[maxFrequency];
                int[] pos = new int[maxCount];
                for (int i = 0; i < this.numTerms; ++i) {
                    ByteArrayPostingList baobs = (ByteArrayPostingList)this.termMap.get(termArray[i]);
                    InputBitStream ibs = new InputBitStream(baobs.buffer);
                    int frequency = baobs.frequency;
                    for (int j = 0; j < frequency; ++j) {
                        bitPos[j] = ibs.readBits();
                        pointer[j] = ibs.readDelta();
                        int p = ibs.readGamma() + 1;
                        while (p-- != 0) {
                            ibs.readDelta();
                        }
                    }
                    GenericSorting.quickSort((int)0, (int)frequency, (IntComparator)new IntComparator(){

                        public int compare(int i0, int i1) {
                            int t = pointer[i0] - pointer[i1];
                            if (t != 0) {
                                return t;
                            }
                            long u = bitPos[i0] - bitPos[i1];
                            return u < 0L ? -1 : (u > 0L ? 1 : 0);
                        }
                    }, (Swapper)new Swapper(){

                        public void swap(int i0, int i1) {
                            long t = bitPos[i0];
                            bitPos[i0] = bitPos[i1];
                            bitPos[i1] = t;
                            int p = pointer[i0];
                            pointer[i0] = pointer[i1];
                            pointer[i1] = p;
                        }
                    });
                    int actualFrequency = frequency;
                    if (this.indexingIsVirtual) {
                        actualFrequency = 1;
                        for (int j = 1; j < frequency; ++j) {
                            if (pointer[j] == pointer[j - 1]) continue;
                            ++actualFrequency;
                        }
                    }
                    indexWriter.newInvertedList();
                    indexWriter.writeFrequency(actualFrequency);
                    for (int j = 0; j < frequency; ++j) {
                        ibs.position(bitPos[j]);
                        OutputBitStream obs = indexWriter.newDocumentRecord();
                        int currPointer = ibs.readDelta();
                        indexWriter.writeDocumentPointer(obs, currPointer);
                        int count = ibs.readGamma() + 1;
                        pos[0] = ibs.readDelta();
                        for (int p = 1; p < count; ++p) {
                            pos[p] = pos[p - 1] + 1 + ibs.readDelta();
                        }
                        if (this.indexingIsVirtual) {
                            while (j < frequency - 1) {
                                ibs.position(bitPos[j + 1]);
                                if (currPointer != ibs.readDelta()) break;
                                ++j;
                                int moreCount = ibs.readGamma() + 1;
                                pos = IntArrays.grow((int[])pos, (int)(count + moreCount), (int)count);
                                pos[count] = ibs.readDelta();
                                for (int p = 1; p < moreCount; ++p) {
                                    pos[count + p] = pos[count + p - 1] + 1 + ibs.readDelta();
                                }
                                count += moreCount;
                            }
                            if (maxCount < count) {
                                maxCount = count;
                            }
                        }
                        indexWriter.writePositionCount(obs, count);
                        indexWriter.writeDocumentPositions(obs, pos, 0, count, -1);
                    }
                    frequencies.writeGamma(actualFrequency);
                    globCounts.writeLongGamma(baobs.globCount);
                }
                indexWriter.close();
                Properties properties = indexWriter.properties();
                this.totPostings += properties.getLong("postings");
                properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)ObjectParser.toSpec((Object)this.termProcessor));
                properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, this.numOccurrences);
                properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, this.maxDocSize);
                properties.setProperty((Enum)Index.PropertyKeys.SIZE, indexWriter.writtenBits());
                if (this.field != null) {
                    properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
                }
                properties.save(batchBasename + ".properties");
                if (this.indexingIsRemapped) {
                    final int[] document = new int[this.documentCount];
                    final int[] size = new int[this.documentCount];
                    InputBitStream sizes = new InputBitStream(batchBasename + ".sizes");
                    for (int i = 0; i < this.documentCount; ++i) {
                        document[i] = sizes.readGamma();
                        size[i] = sizes.readGamma();
                    }
                    GenericSorting.quickSort((int)0, (int)this.documentCount, (IntComparator)new IntComparator(){

                        public int compare(int x, int y) {
                            return document[x] - document[y];
                        }
                    }, (Swapper)new Swapper(){

                        public void swap(int x, int y) {
                            int t = document[x];
                            document[x] = document[y];
                            document[y] = t;
                            t = size[x];
                            size[x] = size[y];
                            size[y] = t;
                        }
                    });
                    OutputBitStream permutedSizes = new OutputBitStream(Scan.batchBasename(this.batch, this.basename, this.batchDir) + ".sizes");
                    int d = 0;
                    for (int i = 0; i < this.documentCount; ++i) {
                        while (d++ < document[i]) {
                            permutedSizes.writeGamma(0);
                        }
                        permutedSizes.writeGamma(size[i]);
                    }
                    permutedSizes.close();
                }
            }
            if (this.indexingIsVirtual) {
                OutputBitStream sizes = new OutputBitStream(Scan.batchBasename(this.batch, this.basename, this.batchDir) + ".sizes");
                for (int i = 0; i < this.currMaxPos.length; ++i) {
                    sizes.writeGamma(this.currMaxPos[i]);
                }
                sizes.close();
                IntArrays.fill((int[])this.currMaxPos, (int)0);
            }
            globCounts.close();
            frequencies.close();
            this.termMap.clear();
            this.numTerms = 0;
            this.totOccurrences += (long)this.numOccurrences;
            this.totDocuments += this.documentCount;
            long result = this.numOccurrences;
            this.numOccurrences = 0;
            this.globMaxDocSize = Math.max(this.maxDocSize, this.globMaxDocSize);
            this.documentCount = 0;
            this.maxDocSize = 0;
            this.maxDocInBatch = -1;
            if (this.indexingIsStandard) {
                this.cutPoints.add(this.cutPoints.getInt(this.cutPoints.size() - 1) + this.documentCount);
            }
            ++this.batch;
            System.gc();
            return result;
        }
        catch (IOException e) {
            LOGGER.fatal((Object)("I/O Error on batch " + this.batch));
            throw e;
        }
    }

    protected void openSizeBitStream() throws FileNotFoundException {
        if (!this.indexingIsVirtual) {
            this.sizes = new OutputBitStream(Scan.batchBasename(this.batch, this.basename, this.batchDir) + ".sizes");
        }
    }

    public static void run(String basename, DocumentSequence documentSequence, TermProcessor termProcessor, String zipCollectionBasename, int bufferSize, int documentsPerBatch, int[] indexedField, String renumberingFile, long logInterval, String tempDirName) throws ConfigurationException, IOException {
        Scan.run(basename, documentSequence, termProcessor, zipCollectionBasename, bufferSize, documentsPerBatch, indexedField, null, null, renumberingFile, logInterval, tempDirName);
    }

    public static void run(String basename, DocumentSequence documentSequence, TermProcessor termProcessor, String zipCollectionBasename, int bufferSize, int documentsPerBatch, int[] indexedField, VirtualDocumentResolver[] virtualDocumentResolver, int[] virtualGap, String mapFile, long logInterval, String tempDirName) throws ConfigurationException, IOException {
        Document document;
        int numDocuments = 0;
        int numberOfIndexedFields = indexedField.length;
        if (numberOfIndexedFields == 0) {
            throw new IllegalArgumentException("You must specify at least one field");
        }
        DocumentFactory factory = documentSequence.factory();
        File tempDir = tempDirName == null ? null : new File(tempDirName);
        for (int i = 0; i < indexedField.length; ++i) {
            if (factory.fieldType(indexedField[i]) != DocumentFactory.FieldType.VIRTUAL || virtualDocumentResolver != null && virtualDocumentResolver[i] != null) continue;
            throw new IllegalArgumentException("No resolver was associated with virtual field " + factory.fieldName(indexedField[i]));
        }
        int[] map = mapFile != null ? BinIO.loadInts((CharSequence)mapFile) : null;
        Scan[] scan = new Scan[numberOfIndexedFields];
        PayloadAccumulator[] accumulator = new PayloadAccumulator[numberOfIndexedFields];
        ProgressLogger pl = new ProgressLogger(LOGGER, logInterval, "documents");
        if (documentSequence instanceof DocumentCollection) {
            pl.expectedUpdates = ((DocumentCollection)documentSequence).size();
        }
        boolean zipping = zipCollectionBasename != null;
        ZipDocumentCollectionBuilder builder = zipping ? new ZipDocumentCollectionBuilder(zipCollectionBasename + ".zip", documentSequence.factory(), true, pl) : null;
        block17: for (int i = 0; i < numberOfIndexedFields; ++i) {
            switch (factory.fieldType(indexedField[i])) {
                case TEXT: {
                    scan[i] = new Scan(basename + '-' + factory.fieldName(indexedField[i]), factory.fieldName(indexedField[i]), termProcessor, map != null ? IndexingType.REMAPPED : IndexingType.STANDARD, 0, 0, bufferSize, builder, tempDir);
                    continue block17;
                }
                case VIRTUAL: {
                    scan[i] = new Scan(basename + '-' + factory.fieldName(indexedField[i]), factory.fieldName(indexedField[i]), termProcessor, IndexingType.VIRTUAL, virtualDocumentResolver[i].numberOfDocuments(), virtualGap[i], bufferSize, builder, tempDir);
                    continue block17;
                }
                case DATE: {
                    accumulator[i] = new PayloadAccumulator(basename + '-' + factory.fieldName(indexedField[i]), new DatePayload(), factory.fieldName(indexedField[i]), map != null ? IndexingType.REMAPPED : IndexingType.STANDARD, documentsPerBatch, tempDir);
                    continue block17;
                }
                case INT: {
                    accumulator[i] = new PayloadAccumulator(basename + '-' + factory.fieldName(indexedField[i]), new IntegerPayload(), factory.fieldName(indexedField[i]), map != null ? IndexingType.REMAPPED : IndexingType.STANDARD, documentsPerBatch, tempDir);
                    continue block17;
                }
            }
        }
        pl.displayFreeMemory = true;
        pl.start((CharSequence)"Indexing documents...");
        DocumentIterator iterator = documentSequence.iterator();
        int documentPointer = 0;
        int documentsInBatch = 0;
        long batchStartTime = System.currentTimeMillis();
        boolean outOfMemoryError = false;
        boolean stopCompaction = false;
        while ((document = iterator.nextDocument()) != null) {
            if (zipping) {
                builder.startDocument(document.title(), document.uri());
            }
            for (int i = 0; i < numberOfIndexedFields; ++i) {
                switch (factory.fieldType(indexedField[i])) {
                    case TEXT: {
                        Reader reader = (Reader)document.content(indexedField[i]);
                        WordReader wordReader = document.wordReader(indexedField[i]);
                        wordReader.setReader(reader);
                        if (zipping) {
                            builder.startTextField();
                        }
                        scan[i].processDocument(map != null ? map[documentPointer] : documentPointer, wordReader);
                        if (!zipping) break;
                        builder.endTextField();
                        break;
                    }
                    case VIRTUAL: {
                        ObjectList fragments = (ObjectList)document.content(indexedField[i]);
                        WordReader wordReader = document.wordReader(indexedField[i]);
                        virtualDocumentResolver[i].context(document);
                        for (VirtualDocumentFragment fragment : fragments) {
                            int virtualDocumentPointer = virtualDocumentResolver[i].resolve((CharSequence)fragment.documentSpecifier());
                            if (virtualDocumentPointer < 0) continue;
                            if (map != null) {
                                virtualDocumentPointer = map[virtualDocumentPointer];
                            }
                            wordReader.setReader((Reader)new FastBufferedReader(fragment.text()));
                            scan[i].processDocument(virtualDocumentPointer, wordReader);
                        }
                        if (!zipping) break;
                        builder.virtualField((ObjectList<VirtualDocumentFragment>)fragments);
                        break;
                    }
                    default: {
                        Object o = document.content(indexedField[i]);
                        accumulator[i].processData(map != null ? map[documentPointer] : documentPointer, o);
                        if (!zipping) break;
                        builder.nonTextField(o);
                    }
                }
                if (scan[i] == null || !scan[i].outOfMemoryError) continue;
                outOfMemoryError = true;
                stopCompaction = true;
            }
            if (zipping) {
                builder.endDocument();
            }
            ++documentPointer;
            ++documentsInBatch;
            document.close();
            pl.update();
            long percAvailableMemory = Util.percAvailableMemory();
            if (percAvailableMemory < 10L && !stopCompaction) {
                LOGGER.info((Object)("Trying compaction... (" + percAvailableMemory + "% available)"));
                Util.compactMemory();
                percAvailableMemory = Util.percAvailableMemory();
                if (percAvailableMemory < 20L) {
                    stopCompaction = true;
                }
                LOGGER.info((Object)("Compaction completed (" + percAvailableMemory + "% available" + (stopCompaction ? ")" : ", will try again)")));
            }
            if (!outOfMemoryError && documentsInBatch != documentsPerBatch && percAvailableMemory >= 10L) continue;
            if (outOfMemoryError) {
                LOGGER.warn((Object)("OutOfMemoryError during buffer reallocation: writing a batch of " + documentsInBatch + " documents"));
            } else if (percAvailableMemory < 10L) {
                LOGGER.warn((Object)("Available memory below 10%: writing a batch of " + documentsInBatch + " documents"));
            }
            long occurrences = 0L;
            block21: for (int i = 0; i < numberOfIndexedFields; ++i) {
                switch (factory.fieldType(indexedField[i])) {
                    case TEXT: 
                    case VIRTUAL: {
                        occurrences += scan[i].dumpBatch();
                        scan[i].openSizeBitStream();
                        continue block21;
                    }
                    default: {
                        accumulator[i].writeData();
                    }
                }
            }
            LOGGER.info((Object)("Last set of batches indexed at " + Util.format((double)(1000.0 * (double)occurrences / (double)(System.currentTimeMillis() - batchStartTime))) + " occurrences/s"));
            batchStartTime = System.currentTimeMillis();
            documentsInBatch = 0;
            outOfMemoryError = false;
            stopCompaction = false;
        }
        iterator.close();
        if (builder != null) {
            BinIO.storeObject((Object)builder.close(), (CharSequence)(zipCollectionBasename + ".collection"));
        }
        block22: for (int i = 0; i < numberOfIndexedFields; ++i) {
            switch (factory.fieldType(indexedField[i])) {
                case TEXT: 
                case VIRTUAL: {
                    scan[i].close();
                    continue block22;
                }
                default: {
                    accumulator[i].close();
                }
            }
        }
        pl.done();
        if (numDocuments > 0 && documentPointer != numDocuments) {
            LOGGER.error((Object)("The document sequence contains " + documentPointer + " documents, but the ZerothPass property file claims that there are " + numDocuments + " documents"));
        }
        if (map != null && documentPointer != map.length) {
            LOGGER.warn((Object)("The document sequence contains " + documentPointer + " documents, but the map contains " + map.length + " integers"));
        }
    }

    public void processDocument(int documentPointer, WordReader wordReader) throws IOException {
        int pos = this.indexingIsVirtual ? this.currMaxPos[documentPointer] : 0;
        int actualPointer = this.indexingIsStandard ? this.documentCount : documentPointer;
        this.word.length(0);
        this.nonWord.length(0);
        while (wordReader.next(this.word, this.nonWord)) {
            if (this.builder != null) {
                this.builder.add(this.word, this.nonWord);
            }
            if (this.word.length() == 0) continue;
            if (!this.termProcessor.processTerm(this.word)) {
                ++pos;
                continue;
            }
            ByteArrayPostingList termBaobs = (ByteArrayPostingList)this.termMap.get((Object)this.word);
            if (termBaobs == null) {
                termBaobs = new ByteArrayPostingList(new byte[32], this.indexingIsStandard);
                this.termMap.put((Object)this.word.copy(), (Object)termBaobs);
                ++this.numTerms;
                if (this.numTerms % 1000000 == 0) {
                    LOGGER.info((Object)("[" + Util.format((long)this.numTerms) + " term(s), " + Util.format((long)(this.totOccurrences + 1L)) + " occ(s)]"));
                }
            }
            termBaobs.setDocumentPointer(actualPointer);
            termBaobs.addPosition(pos);
            if (termBaobs.outOfMemoryError) {
                this.outOfMemoryError = true;
            }
            ++this.occsInCurrDoc;
            ++this.numOccurrences;
            ++pos;
        }
        if (pos > this.maxDocSize) {
            this.maxDocSize = pos;
        }
        if (this.indexingIsStandard) {
            this.sizes.writeGamma(pos);
        } else if (this.indexingIsRemapped) {
            this.sizes.writeGamma(actualPointer);
            this.sizes.writeGamma(pos);
        }
        if (this.indexingIsVirtual) {
            int n = documentPointer;
            this.currMaxPos[n] = this.currMaxPos[n] + (this.occsInCurrDoc + this.virtualDocumentGap);
        }
        this.occsInCurrDoc = 0;
        pos = 0;
        ++this.documentCount;
        if (actualPointer > this.maxDocInBatch) {
            this.maxDocInBatch = actualPointer;
        }
    }

    private static void makeEmpty(String filename) throws IOException {
        File file = new File(filename);
        if (file.exists() && !file.delete()) {
            throw new IOException("Cannot delete file " + file);
        }
        file.createNewFile();
    }

    public void close() throws ConfigurationException, IOException {
        if (this.numOccurrences > 0) {
            this.dumpBatch();
        }
        if (this.numOccurrences == 0 && this.batch == 0) {
            String batchBasename = Scan.batchBasename(0, this.basename, this.batchDir);
            LOGGER.debug((Object)("Generating empty index " + batchBasename));
            Scan.makeEmpty(batchBasename + ".terms");
            Scan.makeEmpty(batchBasename + ".frequencies");
            Scan.makeEmpty(batchBasename + ".globcounts");
            Scan.makeEmpty(batchBasename + ".sizes");
            BitStreamIndexWriter indexWriter = new BitStreamIndexWriter(batchBasename, this.totDocuments, true, this.flags);
            indexWriter.close();
            Properties properties = indexWriter.properties();
            properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)ObjectParser.toSpec((Object)this.termProcessor));
            properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, 0);
            properties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, 0);
            properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, this.maxDocSize);
            properties.setProperty((Enum)Index.PropertyKeys.SIZE, 0);
            if (this.field != null) {
                properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
            }
            properties.save(batchBasename + ".properties");
            this.batch = 1;
        }
        this.termMap = null;
        Properties properties = new Properties();
        if (this.field != null) {
            properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
        }
        properties.setProperty((Enum)Index.PropertyKeys.BATCHES, this.batch);
        properties.setProperty((Enum)Index.PropertyKeys.DOCUMENTS, this.totDocuments);
        properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, this.globMaxDocSize);
        properties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, this.maxCount);
        properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, this.totOccurrences);
        properties.setProperty((Enum)Index.PropertyKeys.POSTINGS, this.totPostings);
        properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)this.termProcessor.getClass().getName());
        if (!this.indexingIsVirtual) {
            Properties clusterProperties = new Properties();
            clusterProperties.addAll((Configuration)properties);
            clusterProperties.setProperty((Enum)Index.PropertyKeys.TERMS, -1);
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.BLOOM, false);
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.FLAT, false);
            if (this.indexingIsStandard) {
                clusterProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)DocumentalConcatenatedCluster.class.getName());
                BinIO.storeObject((Object)new ContiguousDocumentalStrategy(this.cutPoints.toIntArray()), (CharSequence)(this.basename + CLUSTER_STRATEGY_EXTENSION));
            } else {
                clusterProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)DocumentalMergedCluster.class.getName());
                BinIO.storeObject((Object)new IdentityDocumentalStrategy(this.batch, this.totDocuments), (CharSequence)(this.basename + CLUSTER_STRATEGY_EXTENSION));
            }
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.STRATEGY, (Object)(this.basename + CLUSTER_STRATEGY_EXTENSION));
            for (int i = 0; i < this.batch; ++i) {
                clusterProperties.addProperty((Enum)IndexCluster.PropertyKeys.LOCALINDEX, (Object)Scan.batchBasename(i, this.basename, this.batchDir));
            }
            clusterProperties.save(this.basename + CLUSTER_PROPERTIES_EXTENSION);
        }
        properties.save(this.basename + ".properties");
    }

    public String toString() {
        return this.getClass().getSimpleName() + "(" + this.basename + ":" + this.field + ")";
    }

    public static int[] parseQualifiedSizes(String[] qualifiedSizes, String defaultSize, int[] indexedField, DocumentFactory factory) throws ParseException {
        int i;
        int[] size = new int[indexedField.length];
        String defaultSpec = defaultSize;
        IntArrayList indexedFields = IntArrayList.wrap((int[])indexedField);
        for (i = 0; i < qualifiedSizes.length; ++i) {
            if (qualifiedSizes[i].indexOf(58) != -1) continue;
            defaultSpec = qualifiedSizes[i];
        }
        for (i = 0; i < size.length; ++i) {
            size[i] = (int)LongSizeStringParser.parseSize((CharSequence)defaultSpec);
        }
        for (i = 0; i < qualifiedSizes.length; ++i) {
            int split = qualifiedSizes[i].indexOf(58);
            if (split < 0) continue;
            String fieldName = qualifiedSizes[i].substring(0, split);
            int field = factory.fieldIndex(fieldName);
            if (field < 0) {
                throw new IllegalArgumentException("Field " + fieldName + " is not part of factory " + factory.getClass().getName());
            }
            if (!indexedFields.contains(field)) {
                throw new IllegalArgumentException("Field " + factory.fieldName(field) + " is not being indexed");
            }
            size[indexedFields.indexOf((int)field)] = (int)LongSizeStringParser.parseSize((CharSequence)qualifiedSizes[i].substring(split + 1));
        }
        return size;
    }

    public static VirtualDocumentResolver[] parseVirtualDocumentResolver(String[] virtualDocumentSpec, int[] indexedField, DocumentFactory factory) {
        int i;
        VirtualDocumentResolver[] virtualDocumentResolver = new VirtualDocumentResolver[indexedField.length];
        VirtualDocumentResolver defaultResolver = null;
        IntArrayList indexedFields = IntArrayList.wrap((int[])indexedField);
        for (i = 0; i < virtualDocumentSpec.length; ++i) {
            if (virtualDocumentSpec[i].indexOf(58) != -1) continue;
            try {
                defaultResolver = (VirtualDocumentResolver)BinIO.loadObject((CharSequence)virtualDocumentSpec[i]);
                continue;
            }
            catch (IOException e) {
                throw new RuntimeException("An I/O error occurred while loading " + virtualDocumentSpec[i], e);
            }
            catch (ClassNotFoundException e) {
                throw new RuntimeException("Cannot load " + virtualDocumentSpec[i], e);
            }
        }
        for (i = 0; i < virtualDocumentResolver.length; ++i) {
            virtualDocumentResolver[i] = defaultResolver;
        }
        for (i = 0; i < virtualDocumentSpec.length; ++i) {
            int split = virtualDocumentSpec[i].indexOf(58);
            if (split < 0) continue;
            String fieldName = virtualDocumentSpec[i].substring(0, split);
            int field = factory.fieldIndex(fieldName);
            if (field < 0) {
                throw new IllegalArgumentException("Field " + fieldName + " is not part of factory " + factory.getClass().getName());
            }
            if (!indexedFields.contains(field)) {
                throw new IllegalArgumentException("Field " + factory.fieldName(field) + " is not being indexed");
            }
            if (factory.fieldType(field) != DocumentFactory.FieldType.VIRTUAL) {
                throw new IllegalArgumentException("Field " + factory.fieldName(field) + " is not virtual");
            }
            try {
                virtualDocumentResolver[indexedFields.indexOf((int)field)] = (VirtualDocumentResolver)BinIO.loadObject((CharSequence)virtualDocumentSpec[i].substring(split + 1));
                continue;
            }
            catch (IOException e) {
                throw new RuntimeException("An I/O error occurred while loading " + virtualDocumentSpec[i].substring(split + 1), e);
            }
            catch (ClassNotFoundException e) {
                throw new RuntimeException("Cannot load " + virtualDocumentSpec[i].substring(split + 1), e);
            }
        }
        return virtualDocumentResolver;
    }

    public static int[] parseVirtualDocumentGap(String[] virtualDocumentGapSpec, int[] indexedField, DocumentFactory factory) {
        int i;
        int[] virtualDocumentGap = new int[indexedField.length];
        int defaultGap = 64;
        IntArrayList indexedFields = IntArrayList.wrap((int[])indexedField);
        for (i = 0; i < virtualDocumentGapSpec.length; ++i) {
            if (virtualDocumentGapSpec[i].indexOf(58) != -1) continue;
            try {
                defaultGap = Integer.parseInt(virtualDocumentGapSpec[i]);
                if (defaultGap >= 0) continue;
                throw new NumberFormatException("Gap can't be negative");
            }
            catch (NumberFormatException e) {
                throw new RuntimeException("Cannot parse gap correctly " + virtualDocumentGapSpec[i], e);
            }
        }
        for (i = 0; i < virtualDocumentGap.length; ++i) {
            virtualDocumentGap[i] = defaultGap;
        }
        for (i = 0; i < virtualDocumentGapSpec.length; ++i) {
            int split = virtualDocumentGapSpec[i].indexOf(58);
            if (split < 0) continue;
            String fieldName = virtualDocumentGapSpec[i].substring(0, split);
            int field = factory.fieldIndex(fieldName);
            if (field < 0) {
                throw new IllegalArgumentException("Field " + fieldName + " is not part of factory " + factory.getClass().getName());
            }
            if (!indexedFields.contains(field)) {
                throw new IllegalArgumentException("Field " + factory.fieldName(field) + " is not being indexed");
            }
            if (factory.fieldType(field) != DocumentFactory.FieldType.VIRTUAL) {
                throw new IllegalArgumentException("Field " + factory.fieldName(field) + " is not virtual");
            }
            try {
                virtualDocumentGap[indexedFields.indexOf((int)field)] = Integer.parseInt(virtualDocumentGapSpec[i].substring(split + 1));
                if (virtualDocumentGap[indexedFields.indexOf(field)] >= 0) continue;
                throw new NumberFormatException("Gap can't be negative");
            }
            catch (NumberFormatException e) {
                throw new RuntimeException("Cannot parse gap correctly " + virtualDocumentGapSpec[i], e);
            }
        }
        return virtualDocumentGap;
    }

    public static int[] parseFieldNames(String[] indexedFieldName, DocumentFactory factory, boolean allSupported) {
        int i;
        IntArrayList indexedFields = new IntArrayList();
        if (indexedFieldName.length == 0) {
            for (i = 0; i < factory.numberOfFields(); ++i) {
                DocumentFactory.FieldType type = factory.fieldType(i);
                if (allSupported) {
                    indexedFields.add(i);
                    continue;
                }
                if (type != DocumentFactory.FieldType.VIRTUAL) {
                    indexedFields.add(i);
                    continue;
                }
                LOGGER.warn((Object)("Virtual field " + factory.fieldName(i) + " is not being indexed; use -a or explicitly add field among the indexed ones"));
            }
        } else {
            for (i = 0; i < indexedFieldName.length; ++i) {
                int field = factory.fieldIndex(indexedFieldName[i]);
                if (field < 0) {
                    throw new IllegalArgumentException("Field " + indexedFieldName[i] + " is not part of factory " + factory.getClass().getName());
                }
                indexedFields.add(field);
            }
        }
        int[] indexedField = indexedFields.toIntArray();
        Arrays.sort(indexedField);
        return indexedField;
    }

    public static DocumentSequence getSequence(String sequenceName, Class<?> factoryClass, String[] property, int delimiter, Logger logger) throws IllegalAccessException, InvocationTargetException, NoSuchMethodException, IOException, ClassNotFoundException, InstantiationException {
        if (sequenceName != null) {
            return (DocumentSequence)BinIO.loadObject((CharSequence)sequenceName);
        }
        logger.debug((Object)("Documents will be separated by the Unicode character " + delimiter));
        PropertyBasedDocumentFactory factory = PropertyBasedDocumentFactory.getInstance(factoryClass, property);
        return new InputStreamDocumentSequence(System.in, delimiter, factory);
    }

    public static void main(String[] arg) throws JSAPException, InvocationTargetException, NoSuchMethodException, ConfigurationException, ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
        SimpleJSAP jsap = new SimpleJSAP(Scan.class.getName(), "Builds a set of batches from a sequence of documents.", new Parameter[]{new FlaggedOption("sequence", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'S', "sequence", "A serialised document sequence that will be used instead of stdin."), new FlaggedOption("factory", (StringParser)MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new FlaggedOption("termProcessor", (StringParser)JSAP.STRING_PARSER, NullTermProcessor.class.getName(), false, 't', "term-processor", "Sets the term processor to the given class."), new Switch("downcase", '\u0000', "downcase", "A shortcut for setting the term processor to the downcasing processor."), new FlaggedOption("indexedField", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'I', "indexed-field", "The field(s) of the document factory that will be indexed. (default: all non-virtual fields)").setAllowMultipleDeclarations(true), new Switch("allFields", 'a', "all-fields", "Index also all virtual fields; has no effect if indexedField has been used at least once."), new FlaggedOption("zipCollection", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'z', "zip", "Creates a support ZipDocumentCollection with given basename."), new FlaggedOption("batchSize", (StringParser)JSAP.INTSIZE_PARSER, Integer.toString(100000), false, 's', "batch-size", "The size of a batch, in documents. (default: 100000)"), new FlaggedOption("virtualDocumentResolver", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'v', "virtual-document-resolver", "The virtual document resolver. It can be specified several times in the form [<field>:]<filename>. If the field is omitted, it sets the document resolver for all virtual fields.").setAllowMultipleDeclarations(true), new FlaggedOption("virtualDocumentGap", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'g', "virtual-document-gap", "The virtual document gap. It can be specified several times in the form [<field>:]<gap>. If the field is omitted, it sets the document gap for all virtual fields; the default gap is 64").setAllowMultipleDeclarations(true), new FlaggedOption("bufferSize", (StringParser)JSAP.INTSIZE_PARSER, Util.formatBinarySize((long)65536L), false, 'b', "buffer-size", "The size of an I/O buffer."), new FlaggedOption("delimiter", (StringParser)JSAP.INTEGER_PARSER, Integer.toString(10), false, 'd', "delimiter", "The document delimiter."), new FlaggedOption("renumber", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'r', "renumber", "The filename of a document renumbering."), new Switch("keepUnsorted", 'u', "keep-unsorted", "Keep the unsorted term file."), new FlaggedOption("logInterval", (StringParser)JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new FlaggedOption("tempDir", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for all temporary files (e.g., batches)."), new UnflaggedOption("basename", (StringParser)JSAP.STRING_PARSER, true, "The basename of the resulting index.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        DocumentSequence documentSequence = Scan.getSequence(jsapResult.getString("sequence"), jsapResult.getClass("factory"), jsapResult.getStringArray("property"), jsapResult.getInt("delimiter"), LOGGER);
        DocumentFactory factory = documentSequence.factory();
        int[] indexedField = Scan.parseFieldNames(jsapResult.getStringArray("indexedField"), factory, jsapResult.getBoolean("allFields"));
        int batchSize = jsapResult.getInt("batchSize");
        VirtualDocumentResolver[] virtualDocumentResolver = Scan.parseVirtualDocumentResolver(jsapResult.getStringArray("virtualDocumentResolver"), indexedField, factory);
        int[] virtualDocumentGap = Scan.parseVirtualDocumentGap(jsapResult.getStringArray("virtualDocumentGap"), indexedField, factory);
        Scan.run(jsapResult.getString("basename"), documentSequence, jsapResult.getBoolean("downcase") ? DowncaseTermProcessor.getInstance() : (TermProcessor)ObjectParser.fromSpec((String)jsapResult.getString("termProcessor"), TermProcessor.class, (String[])MG4JClassParser.PACKAGE, (String[])new String[]{"getInstance"}), jsapResult.getString("zipCollection"), jsapResult.getInt("bufferSize"), batchSize, indexedField, virtualDocumentResolver, virtualDocumentGap, jsapResult.getString("renumber"), jsapResult.getLong("logInterval"), jsapResult.getString("tempDir"));
    }

    protected static class PayloadAccumulator {
        private final String basename;
        private final String field;
        private long totPostings;
        private final File batchDir;
        final Map<CompressionFlags.Component, CompressionFlags.Coding> flags;
        private int totDocuments;
        private int documentCount;
        private int batch;
        private final IndexingType indexingType;
        private long[] position;
        private FastByteArrayOutputStream accumulatorStream;
        private OutputBitStream accumulator;
        protected final IntArrayList cutPoints;
        private final Payload payload;
        private int maxDocInBatch;

        public PayloadAccumulator(String basename, Payload payload, String field, IndexingType indexingType, int documentsPerBatch, File batchDir) {
            this.basename = basename;
            this.payload = payload;
            this.field = field;
            this.indexingType = indexingType;
            if (indexingType != IndexingType.STANDARD && indexingType != IndexingType.REMAPPED) {
                throw new UnsupportedOperationException("Non-standard payload-based indices support only standard and remapped indexing");
            }
            if (indexingType == IndexingType.REMAPPED) {
                this.position = new long[documentsPerBatch];
            }
            this.batchDir = batchDir;
            this.cutPoints = new IntArrayList();
            this.cutPoints.add(0);
            this.flags = new EnumMap<CompressionFlags.Component, CompressionFlags.Coding>(CompressionFlags.DEFAULT_PAYLOAD_INDEX);
            this.accumulatorStream = new FastByteArrayOutputStream();
            this.accumulator = new OutputBitStream((OutputStream)this.accumulatorStream);
        }

        protected void writeData() throws IOException, ConfigurationException {
            String batchBasename = Scan.batchBasename(this.batch, this.basename, this.batchDir);
            LOGGER.debug((Object)("Generating index " + batchBasename + "; documents: " + this.documentCount));
            try {
                int i;
                this.accumulator.flush();
                final InputBitStream ibs = new InputBitStream(this.accumulatorStream.array);
                BitStreamIndexWriter indexWriter = new BitStreamIndexWriter(batchBasename, this.indexingType == IndexingType.STANDARD ? this.documentCount : this.maxDocInBatch + 1, false, this.flags);
                indexWriter.newInvertedList();
                indexWriter.writeFrequency(this.documentCount);
                if (this.indexingType == IndexingType.STANDARD) {
                    for (i = 0; i < this.documentCount; ++i) {
                        OutputBitStream obs = indexWriter.newDocumentRecord();
                        indexWriter.writeDocumentPointer(obs, i);
                        this.payload.read(ibs);
                        indexWriter.writePayload(obs, this.payload);
                    }
                } else {
                    Sorting.quickSort((long[])this.position, (int)0, (int)this.documentCount, (LongComparator)new LongComparator(){

                        public int compare(long position0, long position1) {
                            try {
                                ibs.position(position0);
                                int d0 = ibs.readDelta();
                                ibs.position(position1);
                                return d0 - ibs.readDelta();
                            }
                            catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    });
                    for (i = 0; i < this.documentCount; ++i) {
                        OutputBitStream obs = indexWriter.newDocumentRecord();
                        ibs.position(this.position[i]);
                        indexWriter.writeDocumentPointer(obs, ibs.readDelta());
                        this.payload.read(ibs);
                        indexWriter.writePayload(obs, this.payload);
                    }
                    this.maxDocInBatch = 0;
                }
                indexWriter.close();
                Properties properties = indexWriter.properties();
                this.totPostings += properties.getLong("postings");
                properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, -1);
                properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, -1);
                properties.setProperty((Enum)Index.PropertyKeys.SIZE, indexWriter.writtenBits());
                properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)NullTermProcessor.class.getName());
                properties.setProperty((Enum)Index.PropertyKeys.PAYLOADCLASS, (Object)this.payload.getClass().getName());
                if (this.field != null) {
                    properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
                }
                properties.save(batchBasename + ".properties");
                PrintWriter termWriter = new PrintWriter(new FileWriter(batchBasename + ".terms"));
                termWriter.println("#");
                termWriter.close();
                this.cutPoints.add(this.cutPoints.getInt(this.cutPoints.size() - 1) + this.documentCount);
                this.accumulatorStream.reset();
                this.accumulator.writtenBits(0L);
                this.documentCount = 0;
                this.maxDocInBatch = 0;
                ++this.batch;
            }
            catch (IOException e) {
                LOGGER.fatal((Object)("I/O Error on batch " + this.batch));
                throw e;
            }
        }

        public void processData(int documentPointer, Object content) throws IOException {
            if (this.indexingType != IndexingType.STANDARD) {
                this.position[this.documentCount] = this.accumulator.writtenBits();
                this.accumulator.writeDelta(documentPointer);
            }
            this.payload.set(content);
            this.payload.write(this.accumulator);
            if (documentPointer > this.maxDocInBatch) {
                this.maxDocInBatch = documentPointer;
            }
            ++this.documentCount;
            ++this.totDocuments;
        }

        public void close() throws ConfigurationException, IOException {
            if (this.documentCount > 0) {
                this.writeData();
            }
            if (this.totDocuments == 0) {
                String batchBasename = Scan.batchBasename(0, this.basename, this.batchDir);
                LOGGER.debug((Object)("Generating empty index " + batchBasename));
                BitStreamIndexWriter indexWriter = new BitStreamIndexWriter(batchBasename, 0, false, this.flags);
                indexWriter.close();
                Properties properties = indexWriter.properties();
                properties.setProperty((Enum)Index.PropertyKeys.SIZE, 0);
                properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, -1);
                properties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, -1);
                properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, -1);
                properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)NullTermProcessor.class.getName());
                properties.setProperty((Enum)Index.PropertyKeys.PAYLOADCLASS, (Object)this.payload.getClass().getName());
                if (this.field != null) {
                    properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
                }
                properties.save(batchBasename + ".properties");
                new FileOutputStream(batchBasename + ".terms").close();
                this.batch = 1;
            }
            this.accumulator = null;
            this.accumulatorStream = null;
            this.position = null;
            Properties properties = new Properties();
            if (this.field != null) {
                properties.setProperty((Enum)Index.PropertyKeys.FIELD, (Object)this.field);
            }
            properties.setProperty((Enum)Index.PropertyKeys.BATCHES, this.batch);
            properties.setProperty((Enum)Index.PropertyKeys.DOCUMENTS, this.totDocuments);
            properties.setProperty((Enum)Index.PropertyKeys.POSTINGS, this.totPostings);
            properties.setProperty((Enum)Index.PropertyKeys.OCCURRENCES, -1);
            properties.setProperty((Enum)Index.PropertyKeys.MAXCOUNT, -1);
            properties.setProperty((Enum)Index.PropertyKeys.MAXDOCSIZE, -1);
            properties.setProperty((Enum)Index.PropertyKeys.TERMPROCESSOR, (Object)NullTermProcessor.class.getName());
            properties.setProperty((Enum)Index.PropertyKeys.PAYLOADCLASS, (Object)this.payload.getClass().getName());
            Properties clusterProperties = new Properties();
            clusterProperties.addAll((Configuration)properties);
            clusterProperties.setProperty((Enum)Index.PropertyKeys.TERMS, 1);
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.BLOOM, false);
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.FLAT, true);
            if (this.indexingType == IndexingType.STANDARD) {
                clusterProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)DocumentalConcatenatedCluster.class.getName());
                BinIO.storeObject((Object)new ContiguousDocumentalStrategy(this.cutPoints.toIntArray()), (CharSequence)(this.basename + Scan.CLUSTER_STRATEGY_EXTENSION));
            } else {
                clusterProperties.setProperty((Enum)Index.PropertyKeys.INDEXCLASS, (Object)DocumentalMergedCluster.class.getName());
                BinIO.storeObject((Object)new IdentityDocumentalStrategy(this.batch, this.totDocuments), (CharSequence)(this.basename + Scan.CLUSTER_STRATEGY_EXTENSION));
            }
            clusterProperties.setProperty((Enum)IndexCluster.PropertyKeys.STRATEGY, (Object)(this.basename + Scan.CLUSTER_STRATEGY_EXTENSION));
            for (int i = 0; i < this.batch; ++i) {
                clusterProperties.addProperty((Enum)IndexCluster.PropertyKeys.LOCALINDEX, (Object)Scan.batchBasename(i, this.basename, this.batchDir));
            }
            clusterProperties.save(this.basename + Scan.CLUSTER_PROPERTIES_EXTENSION);
            properties.save(this.basename + ".properties");
        }
    }

    public static interface VirtualDocumentFragment
    extends Serializable {
        public MutableString documentSpecifier();

        public MutableString text();
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    public static enum IndexingType {
        STANDARD,
        REMAPPED,
        VIRTUAL;

    }
}

