/*
 * Decompiled with CFR 0.152.
 */
package test.it.unimi.dsi.mg4j.document;

import cern.colt.GenericSorting;
import cern.colt.Swapper;
import cern.colt.function.IntComparator;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.io.WordReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.DocumentCollection;
import it.unimi.dsi.mg4j.document.DocumentIterator;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.FileSetDocumentCollection;
import it.unimi.dsi.mg4j.document.HtmlDocumentFactory;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.document.InputStreamDocumentSequence;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.mg4j.document.ZipDocumentCollection;
import it.unimi.dsi.mg4j.document.ZipDocumentCollectionBuilder;
import it.unimi.dsi.util.Properties;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.util.StringTokenizer;
import junit.framework.TestCase;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.io.FileUtils;

public class DocumentCollectionTest
extends TestCase {
    private static final String[][] document = new String[][]{{"xxx yyy zzz xxx", "xxx yyy zzz xxx aaa xxx aaa yyy aaa yyy aaa zzz aaa www aaa"}};
    private static final Properties DEFAULT_PROPERTIES = new Properties();
    private static final int ndoc;
    private File tempDir;
    private String[] htmlFileSet;

    private String getHTMLDocument(String[] document) {
        MutableString res = new MutableString();
        res.append("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Strict//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n");
        res.append("<HTML>\n<HEAD>\n<TITLE>" + document[0] + "</TITLE>\n");
        res.append("<BODY>\n" + document[1].substring(document[0].length()));
        res.append("\n</BODY>\n");
        res.append("</HTML>");
        return res.toString();
    }

    private String getMboxDocument(String[] document) {
        MutableString res = new MutableString();
        res.append("From MAILER-DAEMON Fri Apr 15 16:22:32 2005\n");
        res.append("Date: 15 Apr 2005 16:22:32 +0200\n");
        res.append("From: Mail System Internal Data <MAILER-DAEMON@sliver.usr.dsi.unimi.it>\n");
        res.append("Subject: " + document[0] + "\n");
        res.append("Message-ID: <1113574952@sliver.usr.dsi.unimi.it>\n");
        res.append("X-IMAP: 1102967122 0000138458\n");
        res.append("Return-Path: <matteo.xxx@unimi.it>\n");
        res.append("Received: from localhost (localhost.localdomain [127.0.0.1])\n");
        res.append("\tby sliver.usr.dsi.unimi.it (8.12.11/8.12.11) with ESMTP id iAUNtadn007305\n");
        res.append("\tfor <vigna@localhost>; Wed, 1 Dec 2004 00:55:36 +0100\n");
        res.append("Received: from law5.usr.dsi.unimi.it [159.149.146.241]\n");
        res.append("\tby localhost with IMAP (fetchmail-6.2.5)\n");
        res.append("\tfor vigna@localhost (single-drop); Wed, 01 Dec 2004 00:55:36 +0100 (CET)\n");
        res.append("To: vigna@dsi.unimi.it\n");
        res.append("Message-id: <Pine.WNT.4.33.0412010051240.-209505@p233-mmx>\n");
        res.append("Content-type: TEXT/PLAIN; charset=iso-8859-15\n");
        res.append("X-Warning: UNAuthenticated Sender\n");
        res.append("Content-Transfer-Encoding: 8bit\n");
        res.append("Content-Length: " + document[1].length() + "\n");
        res.append("\n");
        res.append(document[1] + "\n");
        return res.toString();
    }

    private void checkSameWords(WordReader wordReader, StringTokenizer tok) throws IOException {
        MutableString word = new MutableString();
        MutableString nonWord = new MutableString();
        boolean firstTime = true;
        while (true) {
            boolean aWordInDocum = wordReader.next(word, nonWord);
            if (firstTime) {
                firstTime = false;
                if (word.equals("")) continue;
            }
            DocumentCollectionTest.assertFalse((aWordInDocum && word.equals("") ? 1 : 0) != 0);
            boolean aWordInDocument = tok.hasMoreElements();
            DocumentCollectionTest.assertEquals((boolean)aWordInDocum, (boolean)aWordInDocument);
            if (!aWordInDocum) break;
            DocumentCollectionTest.assertEquals((Object)tok.nextElement(), (Object)word.toString());
        }
    }

    private void checkAllDocuments(DocumentCollection coll, final String[] fieldName, String[][] document) throws IOException {
        int nfields = fieldName.length;
        final int[] fieldNumber = new int[nfields];
        final int[] arrayIndex = new int[nfields];
        for (int i = 0; i < nfields; ++i) {
            int j;
            arrayIndex[i] = i;
            for (j = 0; j < coll.factory().numberOfFields(); ++j) {
                if (!coll.factory().fieldName(j).equals(fieldName[i])) continue;
                fieldNumber[i] = j;
                break;
            }
            assert (j < coll.factory().numberOfFields());
        }
        GenericSorting.quickSort((int)0, (int)nfields, (IntComparator)new IntComparator(){

            public int compare(int x, int y) {
                return fieldNumber[x] - fieldNumber[y];
            }
        }, (Swapper)new Swapper(){

            public void swap(int x, int y) {
                int t = fieldNumber[x];
                fieldNumber[x] = fieldNumber[y];
                fieldNumber[y] = t;
                t = arrayIndex[x];
                arrayIndex[x] = arrayIndex[y];
                arrayIndex[y] = t;
                String q = fieldName[x];
                fieldName[x] = fieldName[y];
                fieldName[y] = q;
            }
        });
        for (int doc = 0; doc < coll.size(); ++doc) {
            Document docum = coll.document(doc);
            for (int i = 0; i < nfields; ++i) {
                int field = fieldNumber[i];
                Reader content = (Reader)docum.content(field);
                WordReader wordReader = docum.wordReader(field);
                wordReader.setReader(content);
                StringTokenizer tok = new StringTokenizer(document[doc][arrayIndex[i]]);
                System.err.println("Checking document " + doc + " field " + fieldName[i] + " (" + field + ")");
                this.checkSameWords(wordReader, tok);
            }
            docum.close();
        }
    }

    private void checkAllDocumentsSeq(DocumentSequence seq, final String[] fieldName, String[][] document) throws IOException {
        Document docum;
        int nfields = fieldName.length;
        final int[] fieldNumber = new int[nfields];
        final int[] arrayIndex = new int[nfields];
        for (int i = 0; i < nfields; ++i) {
            int j;
            arrayIndex[i] = i;
            for (j = 0; j < seq.factory().numberOfFields(); ++j) {
                if (!seq.factory().fieldName(j).equals(fieldName[i])) continue;
                fieldNumber[i] = j;
                break;
            }
            assert (j < seq.factory().numberOfFields());
        }
        GenericSorting.quickSort((int)0, (int)nfields, (IntComparator)new IntComparator(){

            public int compare(int x, int y) {
                return fieldNumber[x] - fieldNumber[y];
            }
        }, (Swapper)new Swapper(){

            public void swap(int x, int y) {
                int t = fieldNumber[x];
                fieldNumber[x] = fieldNumber[y];
                fieldNumber[y] = t;
                t = arrayIndex[x];
                arrayIndex[x] = arrayIndex[y];
                arrayIndex[y] = t;
                String q = fieldName[x];
                fieldName[x] = fieldName[y];
                fieldName[y] = q;
            }
        });
        DocumentIterator iterator = seq.iterator();
        int doc = 0;
        while ((docum = iterator.nextDocument()) != null) {
            for (int i = 0; i < nfields; ++i) {
                int field = fieldNumber[i];
                Reader content = (Reader)docum.content(field);
                WordReader wordReader = docum.wordReader(field);
                wordReader.setReader(content);
                StringTokenizer tok = new StringTokenizer(document[doc][arrayIndex[i]]);
                System.err.println("Checking sequentially document " + doc + " field " + fieldName[i] + " (" + field + ")");
                this.checkSameWords(wordReader, tok);
            }
            docum.close();
            ++doc;
        }
        iterator.close();
    }

    protected void setUp() throws IOException, ClassNotFoundException, ConfigurationException {
        this.tempDir = File.createTempFile("mg4jtest", null);
        this.tempDir.delete();
        this.tempDir.mkdir();
        File htmlDir = new File(this.tempDir, "html");
        htmlDir.mkdir();
        System.err.println("Temporary directory: " + this.tempDir);
        this.htmlFileSet = new String[ndoc];
        for (int i = 0; i < ndoc; ++i) {
            String docFile;
            this.htmlFileSet[i] = docFile = new File(htmlDir, "doc" + i + ".html").toString();
            OutputStreamWriter docWriter = new OutputStreamWriter((OutputStream)new FileOutputStream(docFile), "ISO-8859-1");
            docWriter.write(this.getHTMLDocument(document[i]));
            ((Writer)docWriter).close();
        }
        OutputStreamWriter mboxWriter = new OutputStreamWriter((OutputStream)new FileOutputStream(new File(this.tempDir, "mbox")), "ISO-8859-1");
        for (int i = 0; i < ndoc; ++i) {
            mboxWriter.write(this.getMboxDocument(document[i]));
        }
        ((Writer)mboxWriter).close();
        FileSetDocumentCollection fileSetDocumentCollection = new FileSetDocumentCollection(this.htmlFileSet, new HtmlDocumentFactory(DEFAULT_PROPERTIES));
        ZipDocumentCollectionBuilder collBuilder = new ZipDocumentCollectionBuilder(new File(this.tempDir, "zip").toString(), fileSetDocumentCollection.factory(), true, new ProgressLogger());
        ZipDocumentCollection zipDocumentCollection = collBuilder.build(fileSetDocumentCollection);
        BinIO.storeObject((Object)zipDocumentCollection, (CharSequence)new File(this.tempDir, "zip.collection").toString());
        zipDocumentCollection.close();
        ZipDocumentCollectionBuilder apprCollBuilder = new ZipDocumentCollectionBuilder(new File(this.tempDir, "azip").toString(), fileSetDocumentCollection.factory(), false, new ProgressLogger());
        zipDocumentCollection = apprCollBuilder.build(fileSetDocumentCollection);
        BinIO.storeObject((Object)zipDocumentCollection, (CharSequence)new File(this.tempDir, "azip.collection").toString());
        zipDocumentCollection.close();
        fileSetDocumentCollection.close();
    }

    public void testFileSetDocumentCollection() throws IOException, ConfigurationException {
        System.err.println("Checking fileset collection");
        FileSetDocumentCollection coll = new FileSetDocumentCollection(this.htmlFileSet, new HtmlDocumentFactory(DEFAULT_PROPERTIES));
        DocumentCollectionTest.assertEquals((int)coll.size(), (int)ndoc);
        this.checkAllDocuments(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testFileSetDocumentCollectionSeq() throws IOException, ConfigurationException {
        System.err.println("Checking fileset collection sequentially");
        FileSetDocumentCollection coll = new FileSetDocumentCollection(this.htmlFileSet, new HtmlDocumentFactory(DEFAULT_PROPERTIES));
        this.checkAllDocumentsSeq(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testZipDocumentCollection() throws IOException, ClassNotFoundException {
        System.err.println("Checking zipped collection");
        ZipDocumentCollection coll = (ZipDocumentCollection)BinIO.loadObject((CharSequence)new File(this.tempDir, "zip.collection").toString());
        this.checkAllDocuments(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testZipDocumentCollectionSeq() throws IOException, ClassNotFoundException {
        System.err.println("Checking zipped collection sequentially");
        ZipDocumentCollection coll = (ZipDocumentCollection)BinIO.loadObject((CharSequence)new File(this.tempDir, "zip.collection").toString());
        this.checkAllDocumentsSeq(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testZipDocumentCollectionAppr() throws IOException, ClassNotFoundException {
        System.err.println("Checking approximated zipped collection");
        ZipDocumentCollection coll = (ZipDocumentCollection)BinIO.loadObject((CharSequence)new File(this.tempDir, "azip.collection").toString());
        this.checkAllDocuments(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testZipDocumentCollectionApprSeq() throws IOException, ClassNotFoundException {
        System.err.println("Checking approximated zipped collection sequentially");
        ZipDocumentCollection coll = (ZipDocumentCollection)BinIO.loadObject((CharSequence)new File(this.tempDir, "azip.collection").toString());
        this.checkAllDocumentsSeq(coll, new String[]{"title", "text"}, document);
        coll.close();
    }

    public void testInputStreamSequence() throws IOException, ConfigurationException {
        System.err.println("Checking input stream (text field only)");
        MutableString res = new MutableString();
        String[][] justSecondField = new String[ndoc][1];
        for (int i = 0; i < ndoc; ++i) {
            res.append(document[i][1] + "\u0000");
            justSecondField[i][0] = document[i][1];
        }
        String resString = res.toString();
        OutputStreamWriter resWriter = new OutputStreamWriter((OutputStream)new FileOutputStream(new File(this.tempDir, "stream")), "UTF-8");
        resWriter.write(resString);
        ((Writer)resWriter).close();
        FileInputStream is = new FileInputStream(new File(this.tempDir, "stream"));
        InputStreamDocumentSequence seq = new InputStreamDocumentSequence(is, 0, new IdentityDocumentFactory(DEFAULT_PROPERTIES));
        this.checkAllDocumentsSeq(seq, new String[]{"text"}, justSecondField);
        seq.close();
    }

    protected void tearDown() throws IOException {
        FileUtils.forceDeleteOnExit((File)this.tempDir);
    }

    static {
        DEFAULT_PROPERTIES.setProperty((Enum)PropertyBasedDocumentFactory.MetadataKeys.ENCODING, (Object)"ASCII");
        ndoc = document.length;
    }
}

