/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.util;

import cc.mallet.pipe.Noop;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;

public class StateToInstances {
    static CommandOption.File inputFile = new CommandOption.File(StateToInstances.class, "input", "FILE", true, null, "The gzipped state file containing one row per token", null);
    static CommandOption.File outputFile = new CommandOption.File(StateToInstances.class, "output", "FILE", true, new File("mallet.data"), "Write the instance list to this file", null);

    public static void main(String[] args) throws Exception {
        CommandOption.setSummary(StateToInstances.class, "Tool for recovering an instance list file from an LDA state file.");
        CommandOption.process(StateToInstances.class, args);
        Alphabet alphabet = new Alphabet();
        Noop pipe = new Noop(alphabet, null);
        InstanceList instances = new InstanceList(pipe);
        BufferedReader reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(StateToInstances.inputFile.value))));
        String line = reader.readLine();
        while (line.startsWith("#")) {
            line = reader.readLine();
        }
        String[] fields = line.split(" ");
        int[] tokenBuffer = new int[10000];
        int documentLength = 0;
        int currentDocument = 0;
        while (line != null) {
            int alphabetType;
            int document = Integer.parseInt(fields[0]);
            int position = Integer.parseInt(fields[2]);
            int type = Integer.parseInt(fields[3]);
            if (type != (alphabetType = alphabet.lookupIndex(fields[4]))) {
                System.err.println("Expecting " + type + " for " + fields[4] + ", got " + alphabetType);
            }
            if (document != currentDocument) {
                int[] types = new int[documentLength];
                System.arraycopy(tokenBuffer, 0, types, 0, documentLength);
                instances.addThruPipe(new Instance(new FeatureSequence(alphabet, types), null, null, null));
                documentLength = 0;
                ++currentDocument;
                while (currentDocument < document) {
                    instances.addThruPipe(new Instance(new FeatureSequence(alphabet, new int[0]), null, null, null));
                    ++currentDocument;
                }
                currentDocument = document;
            }
            if (tokenBuffer.length <= position) {
                int[] biggerBuffer = new int[tokenBuffer.length * 2];
                System.arraycopy(tokenBuffer, 0, biggerBuffer, 0, tokenBuffer.length);
                tokenBuffer = biggerBuffer;
            }
            if (documentLength != position) {
                System.err.println("Expecting position " + documentLength + ", got " + position);
            }
            tokenBuffer[position] = type;
            ++documentLength;
            line = reader.readLine();
            if (line == null) continue;
            fields = line.split(" ");
        }
        int[] types = new int[documentLength];
        System.arraycopy(tokenBuffer, 0, types, 0, documentLength);
        instances.addThruPipe(new Instance(new FeatureSequence(alphabet, types), null, null, null));
        instances.save(StateToInstances.outputFile.value);
    }
}

