/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.law.warc.parser;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.MeasurableInputStream;
import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet;
import it.unimi.dsi.law.warc.io.InspectableBufferedInputStream;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.parser.Digester;
import it.unimi.dsi.law.warc.tool.ExtractLinks;
import it.unimi.dsi.law.warc.util.AbstractHttpResponse;
import it.unimi.dsi.law.warc.util.BURL;
import it.unimi.dsi.law.warc.util.ByteArrayCharSequence;
import it.unimi.dsi.law.warc.util.HttpResponse;
import it.unimi.dsi.parser.BulletParser;
import it.unimi.dsi.parser.callback.Callback;
import it.unimi.dsi.parser.callback.ComposedCallbackBuilder;
import it.unimi.dsi.parser.callback.LinkExtractor;
import it.unimi.dsi.util.TextPattern;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Parser {
    private static final Logger LOGGER = Logger.getLogger(Parser.class);
    private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
    public char[] buffer;
    private final BulletParser parser;
    private final LinkExtractor linkExtractor;
    private BURL responseUrl;
    private String guessedCharset;
    private Digester digester;
    private BURL location;
    private static final TextPattern META_PATTERN = new TextPattern((CharSequence)"<meta", 1);
    private static final Pattern HTTP_EQUIV_PATTERN = Pattern.compile(".*http-equiv\\s*=\\s*('|\")?content-type('|\")?.*", 2);
    private static final Pattern CONTENT_PATTERN = Pattern.compile(".*content\\s*=\\s*('|\")([^'\"]*)('|\").*", 2);
    private static final Pattern CHARSET_PATTERN = Pattern.compile(".*charset\\s*=\\s*(([\\041-\\0176&&[^<>\\{\\}\\\\/:,;@?=]])+|\"[^\"]*\").*", 2);

    public Parser(int maxResponseBodyLength, String digestAlgorithm) throws NoSuchAlgorithmException {
        this.buffer = new char[maxResponseBodyLength];
        this.parser = new BulletParser();
        ComposedCallbackBuilder builder = new ComposedCallbackBuilder();
        if (digestAlgorithm != null) {
            this.linkExtractor = new LinkExtractor();
            builder.add((Callback)this.linkExtractor);
            this.digester = new Digester(digestAlgorithm);
            builder.add((Callback)this.digester);
            this.parser.setCallback(builder.compose());
        } else {
            this.linkExtractor = new LinkExtractor();
            this.parser.setCallback((Callback)this.linkExtractor);
        }
    }

    public Parser(int maxResponseBodyLength) throws NoSuchAlgorithmException {
        this(maxResponseBodyLength, null);
    }

    public void parse(HttpResponse response) throws IOException {
        int result;
        MeasurableInputStream contentStream;
        String headerCharset;
        this.responseUrl = response.url();
        this.guessedCharset = "ISO-8859-1";
        String contentTypeHeader = response.headers().get("Content-Type");
        if (contentTypeHeader != null && (headerCharset = Parser.getCharsetNameFromHeader(contentTypeHeader)) != null) {
            this.guessedCharset = headerCharset;
        }
        if ((contentStream = response.contentAsStream()) instanceof InspectableBufferedInputStream) {
            InspectableBufferedInputStream inspectableStream = (InspectableBufferedInputStream)contentStream;
            String metaCharset = Parser.getCharsetName(inspectableStream.buffer, inspectableStream.inspectable);
            if (metaCharset != null) {
                this.guessedCharset = metaCharset;
            }
        }
        LOGGER.debug((Object)("Guessing charset " + this.guessedCharset + " for URL " + this.responseUrl));
        Charset charset = ISO_8859_1;
        try {
            charset = Charset.forName(this.guessedCharset);
        }
        catch (IllegalCharsetNameException e) {
            LOGGER.warn((Object)("Response for " + this.responseUrl + " contained an illegal charset name: " + this.guessedCharset));
        }
        catch (UnsupportedCharsetException e) {
            LOGGER.warn((Object)("Response for " + this.responseUrl + " contained an unsupported charset: " + this.guessedCharset));
        }
        this.location = null;
        if (response.headers().get("location") != null) {
            this.location = BURL.parse(response.headers().get("location"));
        }
        InputStreamReader reader = new InputStreamReader((InputStream)contentStream, charset);
        int length = 0;
        while ((result = ((Reader)reader).read(this.buffer, length, this.buffer.length - length)) > 0) {
            length += result;
        }
        LOGGER.debug((Object)("Response for URL " + response.url() + " produced " + length + " characters"));
        if (length < 0) {
            throw new IOException("Cannot decode stream");
        }
        if (this.digester != null) {
            this.digester.url(this.responseUrl);
        }
        this.parser.parse(this.buffer, 0, length);
    }

    public String guessedCharset() {
        return this.guessedCharset;
    }

    public byte[] digest() {
        if (this.digester == null) {
            throw new IllegalStateException("No digester has been set.");
        }
        return this.digester.digest();
    }

    public Set<BURL> urls() {
        BURL url;
        ObjectLinkedOpenHashSet urls = new ObjectLinkedOpenHashSet(this.linkExtractor.urls.size());
        BURL loc = this.location();
        if (loc != null) {
            urls.add(loc);
        }
        if (this.linkExtractor.metaRefresh() != null && (url = BURL.parse(this.linkExtractor.metaRefresh())) != null) {
            urls.add(url);
        }
        for (String s : this.linkExtractor.urls) {
            BURL url2 = BURL.parse(s);
            if (url2 == null) continue;
            urls.add(this.responseUrl.resolve(url2));
        }
        return urls;
    }

    public BURL location() {
        if (this.location != null) {
            return this.location;
        }
        if (this.linkExtractor.metaLocation() != null) {
            return BURL.parse(this.linkExtractor.metaLocation());
        }
        return null;
    }

    public static String getCharsetName(byte[] buffer, int length) {
        int start = 0;
        while ((start = META_PATTERN.search(buffer, start, length)) != -1) {
            Matcher m;
            int end;
            for (end = start; end < length && buffer[end] != 62; ++end) {
            }
            if (end == length) {
                return null;
            }
            ByteArrayCharSequence tagContent = new ByteArrayCharSequence(buffer, start + META_PATTERN.length(), end - start - META_PATTERN.length());
            if (HTTP_EQUIV_PATTERN.matcher(tagContent).matches() && (m = CONTENT_PATTERN.matcher(tagContent)).matches()) {
                return Parser.getCharsetNameFromHeader(m.group(2));
            }
            start = end + 1;
        }
        return null;
    }

    public static String getCharsetNameFromHeader(String headerValue) {
        Matcher m = CHARSET_PATTERN.matcher(headerValue);
        if (m.matches()) {
            String s = m.group(1);
            int start = 0;
            int end = s.length();
            if (end > 0 && (s.charAt(0) == '\"' || s.charAt(0) == '\'')) {
                start = 1;
            }
            if (end > 0 && (s.charAt(end - 1) == '\"' || s.charAt(end - 1) == '\'')) {
                --end;
            }
            if (start < end) {
                return s.substring(start, end);
            }
        }
        return null;
    }

    public static void main(String[] arg) throws Exception {
        SimpleJSAP jsap = new SimpleJSAP(ExtractLinks.class.getName(), "Extract links from a given page.", new Parameter[]{new UnflaggedOption("file", (StringParser)JSAP.STRING_PARSER, true, "The filename."), new UnflaggedOption("url", (StringParser)JSAP.STRING_PARSER, true, "The URL from which the file was downloaded.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        String file = jsapResult.getString("file");
        String urlString = jsapResult.getString("url");
        BURL url = BURL.parse(urlString);
        FastBufferedInputStream in = new FastBufferedInputStream((InputStream)new FileInputStream(file));
        Parser parser = new Parser(0x100000);
        parser.parse(new FakeHttpResponse(url, (MeasurableInputStream)in));
        System.out.println(parser.urls());
        in.close();
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    static class FakeHttpResponse
    extends AbstractHttpResponse {
        MeasurableInputStream in;

        protected FakeHttpResponse(BURL url, MeasurableInputStream in) {
            this.url = url;
            this.in = in;
        }

        @Override
        public int status() {
            return 200;
        }

        @Override
        public void status(int status) {
            throw new UnsupportedOperationException();
        }

        @Override
        public String statusLine() {
            return "200 Ok";
        }

        @Override
        public void statusLine(String statusLine) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Map<String, String> headers() {
            return new HashMap<String, String>();
        }

        @Override
        public MeasurableInputStream contentAsStream() throws IOException {
            return this.in;
        }

        @Override
        public BURL url() {
            return this.url;
        }

        @Override
        public void url(BURL url) {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean fromWarcRecord(WarcRecord wr) throws IOException {
            throw new UnsupportedOperationException();
        }
    }
}

