/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.bits.TransformationStrategy;
import it.unimi.dsi.bits.Utf16TransformationStrategy;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.sux4j.mph.LcpMinimalPerfectMonotoneHash;
import it.unimi.dsi.sux4j.util.ShiftAddXorSignedStringMap;
import it.unimi.dsi.util.BloomFilter;
import it.unimi.dsi.util.StringMap;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class URLMPHVirtualDocumentResolver
implements VirtualDocumentResolver {
    private static final long serialVersionUID = 1L;
    private static final Logger LOGGER = Logger.getLogger(URLMPHVirtualDocumentResolver.class);
    private final StringMap<? extends CharSequence> url2DocumentPointer;
    private transient URI documentURI;

    public URLMPHVirtualDocumentResolver(StringMap<? extends CharSequence> url2DocumentPointer) {
        this.url2DocumentPointer = url2DocumentPointer;
    }

    @Override
    public void context(Document document) {
        try {
            this.documentURI = new URI(((Object)document.uri()).toString()).normalize();
        }
        catch (URISyntaxException e) {
            this.documentURI = null;
        }
    }

    @Override
    public int resolve(CharSequence virtualDocumentSpec) {
        try {
            URI virtualURI = URI.create(((Object)virtualDocumentSpec).toString()).normalize();
            if (!virtualURI.isAbsolute()) {
                if (this.documentURI == null) {
                    return -1;
                }
                virtualURI = this.documentURI.resolve(virtualURI);
            }
            return (int)this.url2DocumentPointer.getLong((Object)virtualURI.toString());
        }
        catch (Exception e) {
            return -1;
        }
    }

    @Override
    public int numberOfDocuments() {
        return this.url2DocumentPointer.size();
    }

    private static void makeUnique(BloomFilter filter, MutableString uri) {
        while (!filter.add((CharSequence)uri)) {
            LOGGER.debug((Object)("Duplicate URI " + uri));
            uri.append('/').append(RandomStringUtils.randomAlphanumeric((int)32));
        }
    }

    public static void main(String[] arg) throws JSAPException, IOException {
        Object collection;
        SimpleJSAP jsap = new SimpleJSAP(URLMPHVirtualDocumentResolver.class.getName(), "Builds a URL document resolver from a sequence of URIs, extracted typically using ScanMetadata.", new Parameter[]{new FlaggedOption("bufferSize", (StringParser)JSAP.INTSIZE_PARSER, "64Ki", false, 'b', "buffer-size", "The size of the I/O buffer used to read terms."), new FlaggedOption("termFile", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'o', "offline", "Read terms from this file (without loading them into core memory) instead of standard input."), new FlaggedOption("uniqueUris", (StringParser)JSAP.INTSIZE_PARSER, JSAP.NO_DEFAULT, false, 'U', "unique-uris", "Force URIs to be unique by adding random garbage at the end of duplicates; the argument is an upper bound for the number of URIs that will be read, and will be used to create a Bloom filter."), new UnflaggedOption("resolver", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The filename for the resolver.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        int bufferSize = jsapResult.getInt("bufferSize");
        String resolverName = jsapResult.getString("resolver");
        String termFile = jsapResult.getString("termFile");
        BloomFilter filter = null;
        boolean uniqueURIs = jsapResult.userSpecified("uniqueUris");
        if (uniqueURIs) {
            filter = new BloomFilter(jsapResult.getInt("uniqueUris"));
        }
        if (termFile == null) {
            ArrayList<MutableString> termList = new ArrayList<MutableString>();
            ProgressLogger pl = new ProgressLogger();
            pl.itemsName = "URIs";
            LineIterator termIterator = new LineIterator(new FastBufferedReader((Reader)new InputStreamReader(System.in, "UTF-8"), bufferSize), pl);
            pl.start((CharSequence)"Reading URIs...");
            while (termIterator.hasNext()) {
                MutableString uri = termIterator.next();
                if (uniqueURIs) {
                    URLMPHVirtualDocumentResolver.makeUnique(filter, uri);
                }
                termList.add(uri.copy());
            }
            pl.done();
            collection = termList;
        } else {
            if (uniqueURIs) {
                ProgressLogger pl = new ProgressLogger();
                pl.itemsName = "URIs";
                pl.start((CharSequence)"Copying URIs...");
                LineIterator termIterator = new LineIterator(new FastBufferedReader((Reader)new InputStreamReader(new FileInputStream(termFile)), bufferSize), pl);
                File temp = File.createTempFile(URLMPHVirtualDocumentResolver.class.getName(), ".uniqueuris");
                temp.deleteOnExit();
                termFile = temp.toString();
                FastBufferedOutputStream outputStream = new FastBufferedOutputStream((OutputStream)new FileOutputStream(termFile), bufferSize);
                while (termIterator.hasNext()) {
                    MutableString uri = termIterator.next();
                    URLMPHVirtualDocumentResolver.makeUnique(filter, uri);
                    uri.writeUTF8((OutputStream)outputStream);
                    outputStream.write(10);
                }
                pl.done();
                outputStream.close();
            }
            collection = new FileLinesCollection((CharSequence)termFile, "UTF-8");
        }
        LOGGER.debug((Object)"Building minimal perfect hash table...");
        BinIO.storeObject((Object)new URLMPHVirtualDocumentResolver((StringMap<? extends CharSequence>)new ShiftAddXorSignedStringMap(collection.iterator(), (Object2LongFunction)new LcpMinimalPerfectMonotoneHash((Iterable)collection, (TransformationStrategy)new Utf16TransformationStrategy()))), (CharSequence)resolverName);
        LOGGER.debug((Object)" done.");
    }
}

