/*
 * Decompiled with CFR 0.152.
 */
package uk.ac.bham.clg.cue.corpus;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.UTFDataFormatException;
import java.util.Hashtable;
import java.util.Stack;
import uk.ac.bham.clg.cue.corpus.CueAdmin;
import uk.ac.bham.clg.cue.corpus.annotation.Tag;
import uk.ac.bham.clg.cue.io.BitOutputStream;
import uk.ac.bham.clg.cue.utils.WordListResource;
import uk.ac.bham.clg.util.ISO2UTF;

public class TextEncoder {
    static final boolean DEBUG = true;
    private CueAdmin admin = CueAdmin.getInstance();
    private Hashtable codeWords = null;
    private static final int granularity = 128;
    private int textSize = 0;
    private Stack tagStack = null;
    private BufferedInputStream wordFile = null;
    private DataInputStream codeWordFile = null;
    private DataInputStream codeLengthFile = null;
    private DataInputStream freqFile = null;
    private DataOutputStream syncFile = null;
    private DataOutputStream freqOutFile = null;
    private BitOutputStream dataOut = null;
    private BitOutputStream tagOut = null;
    private ISO2UTF isoConv = null;

    public TextEncoder(WordListResource wordListResource, String string, String string2, String string3) throws IOException {
        this.openOutputFiles(string2, string3, this.admin.getTmpDir());
        this.tagStack = new Stack();
        this.initCodeFiles(string);
        this.isoConv = ISO2UTF.getInstance();
        this.encodeData(wordListResource);
        this.closeFiles();
        if (!this.tagStack.empty()) {
            System.err.println("non-matching tags left over:");
            while (!this.tagStack.empty()) {
                String string4 = (String)this.tagStack.pop();
                Integer n = (Integer)this.tagStack.pop();
                System.err.println("-> " + string4 + " (" + n + ")");
            }
        }
    }

    private void closeFiles() throws IOException {
        this.wordFile = null;
        this.codeWordFile.close();
        this.codeLengthFile.close();
        this.freqFile.close();
        this.syncFile.close();
        this.freqOutFile.close();
        this.dataOut.close();
        this.tagOut.close();
    }

    private void encodeData(WordListResource wordListResource) throws IOException {
        wordListResource.rewind();
        int n = 0;
        int n2 = 0;
        boolean bl = false;
        ISO2UTF iSO2UTF = null;
        StringBuffer stringBuffer = new StringBuffer();
        iSO2UTF = ISO2UTF.getInstance();
        while (wordListResource.hasMoreWords()) {
            HashEntry hashEntry;
            if (n2 % 100 == 0 && !bl) {
                System.err.print("done " + n2 + " words\r");
                bl = true;
            }
            if (n2 % 10000 == 0) {
                System.gc();
            }
            wordListResource.getNextWord(stringBuffer);
            if (stringBuffer.toString().startsWith("<")) {
                this.writeTag(n2, stringBuffer.toString());
                continue;
            }
            String string = stringBuffer.toString();
            if (string.indexOf("&") > -1 && string.indexOf(";") > -1) {
                stringBuffer.setLength(0);
                stringBuffer.append(iSO2UTF.convertString(string));
            }
            if ((hashEntry = (HashEntry)this.codeWords.get(stringBuffer.toString())) == null) {
                throw new IOException("TextEncoder: `" + stringBuffer + "' (" + n2 + ") not in code!");
            }
            if (n2 % 128 == 0) {
                this.syncFile.writeInt(n);
            }
            this.dataOut.writeInt(hashEntry.word, hashEntry.length);
            n += hashEntry.length;
            ++n2;
            bl = false;
        }
        this.textSize = n2;
    }

    public int getSize() {
        return this.textSize;
    }

    private void initCodeFiles(String string) throws IOException, UTFDataFormatException {
        System.err.println("initCodeFiles(" + string + ")");
        int n = 0;
        int n2 = 0;
        try {
            String string2 = new String(String.valueOf(this.admin.getCodeDir()) + File.separatorChar + string);
            n2 = 1;
            this.wordFile = new BufferedInputStream(new FileInputStream(String.valueOf(string2) + ".wrd"));
            n2 = 2;
            this.codeWordFile = new DataInputStream(new BufferedInputStream(new FileInputStream(String.valueOf(string2) + ".cdw")));
            n2 = 3;
            this.codeLengthFile = new DataInputStream(new BufferedInputStream(new FileInputStream(String.valueOf(string2) + ".clg")));
            n2 = 4;
            n = (int)new File(String.valueOf(string2) + ".clg").length();
            n2 = 5;
            this.freqFile = new DataInputStream(new BufferedInputStream(new FileInputStream(String.valueOf(string2) + ".frq")));
            n2 = 6;
        }
        catch (FileNotFoundException fileNotFoundException) {
            throw new IOException("TextEncoder (open code files): " + fileNotFoundException);
        }
        catch (NullPointerException nullPointerException) {
            throw new IOException("TextEncoder (open code files, " + n2 + "): " + nullPointerException);
        }
        if (this.wordFile == null) {
            throw new IOException("TextEncoder (open code files): Cannot open wrd file");
        }
        if (n == 0) {
            System.err.println("TextEncoder: cannot determine # of code words");
            this.codeWords = new Hashtable(100000);
        } else {
            this.codeWords = new Hashtable(n * 2);
        }
        System.err.println("loading codewords");
        int n3 = 0;
        String string3 = ISO2UTF.readUTF(this.wordFile);
        while (string3 != null) {
            n2 = 0;
            try {
                n2 = 0;
                byte by = this.codeLengthFile.readByte();
                ++n2;
                int n4 = this.codeWordFile.readInt();
                ++n2;
                int n5 = this.freqFile.readInt();
                ++n2;
                HashEntry hashEntry = new HashEntry(n4, by);
                ++n2;
                this.codeWords.put(string3, hashEntry);
                ++n2;
                ++n3;
                ++n2;
                this.freqOutFile.writeInt(n5);
            }
            catch (IOException iOException) {
                System.err.println("Loading code words: " + iOException + ", error = " + n2);
                break;
            }
            catch (NullPointerException nullPointerException) {
                System.err.println("Loading code words: " + nullPointerException + ", error = " + n2);
                break;
            }
            string3 = ISO2UTF.readUTF(this.wordFile);
        }
        System.err.println("done loading " + n3 + " codewords");
    }

    public void openOutputFiles(String string, String string2, String string3) throws IOException {
        try {
            String string4 = new String(String.valueOf(string2) + "-" + string);
            this.syncFile = new DataOutputStream(new FileOutputStream(String.valueOf(string4) + ".sync"));
            this.freqOutFile = new DataOutputStream(new FileOutputStream(String.valueOf(string4) + ".freq"));
            this.dataOut = new BitOutputStream(new DataOutputStream(new FileOutputStream(String.valueOf(string4) + ".data")));
            this.tagOut = new BitOutputStream(new DataOutputStream(new FileOutputStream(String.valueOf(string3) + File.separatorChar + string2.substring(string2.lastIndexOf(File.separatorChar) + 1) + ".tags")));
        }
        catch (IOException iOException) {
            throw new IOException("TextEncoder (open output files): " + iOException);
        }
    }

    private void writeTag(int n, String string) throws IOException {
        ++n;
        if (string.startsWith("<?")) {
            System.err.println("skipping " + string);
            return;
        }
        if (!string.startsWith("</") && !string.endsWith("/>")) {
            this.tagStack.push(new Integer(n));
            this.tagStack.push(string);
            return;
        }
        if (string.endsWith("/>")) {
            this.tagOut.writeGamma(n);
            this.tagOut.writeGamma(1);
            this.tagOut.writeGamma(string.length() + 1);
            int n2 = 0;
            while (n2 < string.length()) {
                int n3 = string.charAt(n2);
                if (n3 < 32) {
                    n3 = 32;
                }
                this.tagOut.writeGamma(n3 - 30);
                ++n2;
            }
            return;
        }
        if (this.tagStack.empty()) {
            System.err.println("Tag Stack empty; did not expect " + string + " at " + n);
            return;
        }
        String string2 = (String)this.tagStack.pop();
        Integer n4 = (Integer)this.tagStack.pop();
        if (Tag.label(string2).equals(Tag.label(string))) {
            this.tagOut.writeGamma(n4 + 1);
            this.tagOut.writeGamma(n - n4 + 1);
            this.tagOut.writeGamma(string2.length() + 1);
            int n5 = 0;
            while (n5 < string2.length()) {
                this.tagOut.writeGamma(string2.charAt(n5) - 30);
                ++n5;
            }
        } else {
            System.err.println("Position " + n + ": " + Tag.label(string) + " encountered\n" + "expected: " + Tag.label(string2) + " (" + n4 + ")");
            this.tagStack.push(n4);
            this.tagStack.push(string2);
        }
    }

    private class HashEntry {
        int word;
        byte length;

        public HashEntry(int n, byte by) {
            this.word = n;
            this.length = by;
        }
    }
}

