package is2.lemmatizer;

import is2.data.Cluster;
import is2.data.F2SF;
import is2.data.FV;
import is2.data.Instances;
import is2.data.InstancesTagger;
import is2.data.Long2Int;
import is2.data.ParametersFloat;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.io.CONLLWriter09;
import is2.tools.IPipe;
import is2.tools.Tool;
import is2.tools.Train;
import is2.util.DB;
import is2.util.OptionsSuper;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
import org.apache.uima.pear.tools.InstallationController;

/* loaded from: input_file:is2/lemmatizer/Lemmatizer.class */
public class Lemmatizer implements Tool, Train {
    public Pipe pipe;
    public ParametersFloat params;
    private Long2Int li;
    private boolean doUppercase;
    private long[] vs;

    public Lemmatizer(String str) {
        this.doUppercase = false;
        this.vs = new long[40];
        try {
            Options options = new Options(new String[]{"-model", str});
            this.li = new Long2Int(options.hsize);
            readModel(options);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public Lemmatizer(boolean z) {
        this.doUppercase = false;
        this.vs = new long[40];
        this.doUppercase = z;
    }

    public static void main(String[] strArr) throws FileNotFoundException, Exception {
        Options options = new Options(strArr);
        Lemmatizer lemmatizer = new Lemmatizer(options.upper);
        long currentTimeMillis = System.currentTimeMillis();
        if (options.train) {
            lemmatizer.li = new Long2Int(options.hsize);
            lemmatizer.pipe = new Pipe(options, lemmatizer.li);
            InstancesTagger createInstances = lemmatizer.pipe.createInstances(options.trainfile);
            DB.println("Features: " + lemmatizer.pipe.mf.size() + " Operations " + lemmatizer.pipe.mf.getFeatureCounter().get(Pipe.OPERATION));
            ParametersFloat parametersFloat = new ParametersFloat(lemmatizer.li.size());
            lemmatizer.train(options, lemmatizer.pipe, parametersFloat, createInstances);
            lemmatizer.writeModel(options, lemmatizer.pipe, parametersFloat);
        }
        if (options.test) {
            lemmatizer.readModel(options);
            lemmatizer.out(options, lemmatizer.pipe, lemmatizer.params);
        }
        System.out.println();
        if (options.eval) {
            System.out.println("\nEVALUATION PERFORMANCE:");
            Evaluator.evaluate(options.goldfile, options.outfile, options.format);
        }
        System.out.println("used time " + (((float) ((System.currentTimeMillis() - currentTimeMillis) / 100)) / 10.0f));
    }

    @Override // is2.tools.Train
    public void writeModel(OptionsSuper optionsSuper, IPipe iPipe, ParametersFloat parametersFloat) {
        try {
            ZipOutputStream zipOutputStream = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(optionsSuper.modelName)));
            zipOutputStream.putNextEntry(new ZipEntry(InstallationController.PACKAGE_DATA_DIR));
            DataOutputStream dataOutputStream = new DataOutputStream(new BufferedOutputStream(zipOutputStream));
            MFO mfo = this.pipe.mf;
            MFO.writeData(dataOutputStream);
            dataOutputStream.flush();
            parametersFloat.write(dataOutputStream);
            iPipe.write(dataOutputStream);
            dataOutputStream.writeBoolean(this.doUppercase);
            dataOutputStream.flush();
            dataOutputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // is2.tools.Train
    public void readModel(OptionsSuper optionsSuper) {
        try {
            ZipInputStream zipInputStream = new ZipInputStream(new BufferedInputStream(new FileInputStream(optionsSuper.modelName)));
            zipInputStream.getNextEntry();
            DataInputStream dataInputStream = new DataInputStream(new BufferedInputStream(zipInputStream));
            MFO mfo = new MFO();
            mfo.read(dataInputStream);
            this.params = new ParametersFloat(0);
            this.params.read(dataInputStream);
            this.li = new Long2Int(this.params.size());
            this.pipe = new Pipe(optionsSuper, this.li);
            this.pipe.mf = mfo;
            this.pipe.initFeatures();
            this.pipe.initValues();
            this.pipe.readMap(dataInputStream);
            for (Map.Entry<String, Integer> entry : MFO.getFeatureSet().get(Pipe.OPERATION).entrySet()) {
                this.pipe.types[entry.getValue().intValue()] = entry.getKey();
            }
            this.pipe.cl = new Cluster(dataInputStream);
            if (dataInputStream.available() > 0) {
                this.doUppercase = dataInputStream.readBoolean();
            }
            dataInputStream.close();
            DB.println("Loading data finished. ");
            DB.println("number of params  " + this.params.parameters.length);
            DB.println("number of classes " + this.pipe.types.length);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // is2.tools.Train
    public void train(OptionsSuper optionsSuper, IPipe iPipe, ParametersFloat parametersFloat, Instances instances) {
        InstancesTagger instancesTagger = (InstancesTagger) instances;
        int i = 0;
        FV fv = new FV();
        FV fv2 = new FV();
        int length = this.pipe.types.length + 1;
        int i2 = length + 1;
        MFO mfo = this.pipe.mf;
        String[] reverse = MFO.reverse(MFO.getFeatureSet().get(PipeGen.WORD));
        F2SF fv3 = parametersFloat.getFV();
        double d = 0.0d;
        int i3 = 0;
        while (i3 < optionsSuper.numIters) {
            System.out.print("Iteration " + i3 + ": ");
            long currentTimeMillis = System.currentTimeMillis();
            int size = instancesTagger.size();
            int i4 = 0;
            int i5 = 0;
            long currentTimeMillis2 = System.currentTimeMillis();
            int i6 = 0;
            int i7 = 0;
            int i8 = 0;
            int i9 = 0;
            HashMap hashMap = new HashMap();
            for (int i10 = 0; i10 < size; i10++) {
                if ((i10 + 1) % 500 == 0) {
                    i = Pipe.outValueErr(i10 + 1, i5 - i4, i4 / i5, i, currentTimeMillis2, d);
                }
                d = ((optionsSuper.numIters * size) - ((size * i3) + (i10 + 1))) + 1;
                for (int i11 = 0; i11 < instancesTagger.length(i10); i11++) {
                    double d2 = -1000.0d;
                    String str = "";
                    i5++;
                    this.pipe.addCoreFeatures(instancesTagger, i10, i11, 0, reverse[instancesTagger.forms[i10][i11]], this.vs);
                    String str2 = this.pipe.opse.get(reverse[instancesTagger.forms[i10][i11]].toLowerCase());
                    if (str2 == null) {
                        for (int i12 = 0; i12 < this.pipe.types.length; i12++) {
                            fv3.clear();
                            for (int length2 = this.vs.length - 1; length2 >= 0; length2--) {
                                if (this.vs[length2] > 0) {
                                    fv3.add(this.li.l2i(this.vs[length2] + (i12 * Pipe.s_type)));
                                }
                            }
                            float score = (float) fv3.getScore();
                            if (score > d2) {
                                str = this.pipe.types[i12];
                                d2 = score;
                            }
                        }
                    }
                    if (this.doUppercase) {
                        fv3.clear();
                        for (int length3 = this.vs.length - 1; length3 >= 0; length3--) {
                            if (this.vs[length3] > 0) {
                                fv3.add(this.li.l2i(this.vs[length3] + (length * Pipe.s_type)));
                            }
                        }
                        int i13 = -1;
                        int i14 = -1;
                        if (reverse[instancesTagger.glemmas[i10][i11]].length() > 0 && Character.isUpperCase(reverse[instancesTagger.glemmas[i10][i11]].charAt(0)) && fv3.score > 0.0f) {
                            i13 = i2;
                            i14 = length;
                        } else if (reverse[instancesTagger.glemmas[i10][i11]].length() > 0 && Character.isLowerCase(reverse[instancesTagger.glemmas[i10][i11]].charAt(0)) && fv3.score <= 0.0f) {
                            i13 = length;
                            i14 = i2;
                        }
                        if (i13 == -1 || reverse[instancesTagger.glemmas[i10][i11]].length() <= 0) {
                            i8++;
                        } else {
                            i9++;
                            fv2.clear();
                            for (int length4 = this.vs.length - 1; length4 >= 0; length4--) {
                                if (this.vs[length4] > 0) {
                                    fv2.add(this.li.l2i(this.vs[length4] + (i14 * Pipe.s_type)));
                                }
                            }
                            fv.clear();
                            for (int length5 = this.vs.length - 1; length5 >= 0; length5--) {
                                if (this.vs[length5] > 0) {
                                    fv.add(this.li.l2i(this.vs[length5] + (i13 * Pipe.s_type)));
                                }
                            }
                            double score2 = 1.0d - (parametersFloat.getScore(fv) - parametersFloat.getScore(fv2));
                            FV distVector = fv.getDistVector(fv2);
                            distVector.update(parametersFloat.parameters, parametersFloat.total, parametersFloat.update(distVector, score2), d, false);
                        }
                    }
                    if (str2 != null) {
                        i4++;
                        i7++;
                    } else {
                        String operation = Pipe.getOperation(instancesTagger, i10, i11, reverse);
                        if (operation.equals(str)) {
                            i4++;
                            i7++;
                        } else {
                            i6++;
                            fv2.clear();
                            int value = this.pipe.mf.getValue(Pipe.OPERATION, str);
                            for (int length6 = this.vs.length - 1; length6 >= 0; length6--) {
                                if (this.vs[length6] > 0) {
                                    fv2.add(this.li.l2i(this.vs[length6] + (value * Pipe.s_type)));
                                }
                            }
                            fv.clear();
                            int value2 = this.pipe.mf.getValue(Pipe.OPERATION, operation);
                            for (int length7 = this.vs.length - 1; length7 >= 0; length7--) {
                                if (this.vs[length7] > 0) {
                                    fv.add(this.li.l2i(this.vs[length7] + (value2 * Pipe.s_type)));
                                }
                            }
                            double score3 = 1.0d - (parametersFloat.getScore(fv) - parametersFloat.getScore(fv2));
                            FV distVector2 = fv.getDistVector(fv2);
                            distVector2.update(parametersFloat.parameters, parametersFloat.total, parametersFloat.update(distVector2, score3), d, false);
                        }
                    }
                }
            }
            ArrayList arrayList = new ArrayList();
            for (Map.Entry entry : hashMap.entrySet()) {
                if (((Integer) entry.getValue()).intValue() > 1) {
                    arrayList.add(entry);
                }
            }
            Collections.sort(arrayList, new Comparator<Map.Entry<String, Integer>>() { // from class: is2.lemmatizer.Lemmatizer.1
                @Override // java.util.Comparator
                public int compare(Map.Entry<String, Integer> entry2, Map.Entry<String, Integer> entry3) {
                    if (entry2.getValue() == entry3.getValue()) {
                        return 0;
                    }
                    return entry2.getValue().intValue() > entry3.getValue().intValue() ? 1 : -1;
                }
            });
            if (arrayList.size() > 0) {
                System.out.println();
            }
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                Map.Entry entry2 = (Map.Entry) it.next();
                System.out.println(((String) entry2.getKey()) + "  " + entry2.getValue());
            }
            hashMap.clear();
            Pipe.outValueErr(size, i5 - i4, i4 / i5, i, currentTimeMillis2, d, "time " + (System.currentTimeMillis() - currentTimeMillis) + " corr/wrong " + i7 + " " + i6 + " uppercase corr/wrong  " + i8 + " " + i9);
            i = 0;
            System.out.println();
            i3++;
        }
        parametersFloat.average(i3 * instancesTagger.size());
    }

    @Override // is2.tools.Train
    public void out(OptionsSuper optionsSuper, IPipe iPipe, ParametersFloat parametersFloat) {
        long currentTimeMillis = System.currentTimeMillis();
        CONLLReader09 cONLLReader09 = new CONLLReader09(optionsSuper.testfile, false);
        cONLLReader09.setInputFormat(optionsSuper.formatTask);
        CONLLWriter09 cONLLWriter09 = new CONLLWriter09(optionsSuper.outfile);
        cONLLWriter09.setOutputFormat(optionsSuper.formatTask);
        System.out.print("Processing Sentence: ");
        int i = 0;
        int i2 = 0;
        while (true) {
            try {
                InstancesTagger instancesTagger = new InstancesTagger();
                instancesTagger.init(1, new MFO());
                SentenceData09 next = cONLLReader09.getNext(instancesTagger);
                if (next == null) {
                    cONLLWriter09.finishWriting();
                    Pipe.outValue(i, i2);
                    long currentTimeMillis2 = System.currentTimeMillis();
                    System.out.println(PipeGen.getSecondsPerInstnace(i, currentTimeMillis2 - currentTimeMillis));
                    System.out.println(PipeGen.getUsedTime(currentTimeMillis2 - currentTimeMillis));
                    return;
                }
                instancesTagger.fillChars(next, 0, Pipe._CEND);
                i++;
                SentenceData09 lemmatize = lemmatize(instancesTagger, next, this.li);
                if (optionsSuper.normalize) {
                    for (int i3 = 0; i3 < lemmatize.length(); i3++) {
                        boolean z = cONLLReader09.normalizeOn;
                        cONLLReader09.normalizeOn = true;
                        lemmatize.plemmas[i3] = cONLLReader09.normalize(lemmatize.plemmas[i3]);
                        cONLLReader09.normalizeOn = z;
                    }
                }
                if (optionsSuper.overwritegold) {
                    lemmatize.lemmas = lemmatize.plemmas;
                }
                cONLLWriter09.write(lemmatize);
                if (i % 100 == 0) {
                    i2 = Pipe.outValue(i, i2);
                }
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
    }

    private SentenceData09 lemmatize(InstancesTagger instancesTagger, SentenceData09 sentenceData09, Long2Int long2Int) {
        int length = this.pipe.types.length + 1;
        instancesTagger.feats[0] = new short[sentenceData09.length()][11];
        instancesTagger.fillChars(sentenceData09, 0, Pipe._CEND);
        int length2 = sentenceData09.length();
        F2SF f2sf = new F2SF(this.params.parameters);
        for (int i = 0; i < length2; i++) {
            sentenceData09.plemmas[i] = "_";
            this.pipe.addCoreFeatures(instancesTagger, 0, i, 0, sentenceData09.forms[i], this.vs);
            String str = null;
            if (instancesTagger.forms[0][i] != -1) {
                str = this.pipe.opse.get(sentenceData09.forms[i].toLowerCase());
                if (str != null) {
                    sentenceData09.plemmas[i] = str;
                }
            }
            double d = -1000.0d;
            int i2 = 0;
            for (int i3 = 0; i3 < this.pipe.types.length; i3++) {
                f2sf.clear();
                for (int length3 = this.vs.length - 1; length3 >= 0; length3--) {
                    if (this.vs[length3] > 0) {
                        f2sf.add(long2Int.l2i(this.vs[length3] + (i3 * Pipe.s_type)));
                    }
                }
                if (f2sf.score >= d) {
                    d = f2sf.score;
                    i2 = i3;
                }
            }
            if (str == null) {
                sentenceData09.plemmas[i] = StringEdit.change(this.doUppercase ? sentenceData09.forms[i] : sentenceData09.forms[i].toLowerCase(), this.pipe.types[i2]);
            }
            if (sentenceData09.plemmas[i].length() == 0) {
                sentenceData09.plemmas[i] = "_";
            }
            if (this.doUppercase) {
                f2sf.clear();
                for (int length4 = this.vs.length - 1; length4 >= 0; length4--) {
                    if (this.vs[length4] > 0) {
                        f2sf.add(long2Int.l2i(this.vs[length4] + (length * Pipe.s_type)));
                    }
                }
                try {
                    if (f2sf.score <= 0.0f && sentenceData09.plemmas[i].length() > 1) {
                        sentenceData09.plemmas[i] = Character.toUpperCase(sentenceData09.plemmas[i].charAt(0)) + sentenceData09.plemmas[i].substring(1);
                    } else if (f2sf.score <= 0.0f && sentenceData09.plemmas[i].length() > 0) {
                        sentenceData09.plemmas[i] = String.valueOf(Character.toUpperCase(sentenceData09.plemmas[i].charAt(0)));
                    } else if (f2sf.score > 0.0f) {
                        sentenceData09.plemmas[i] = sentenceData09.plemmas[i].toLowerCase();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
        SentenceData09 sentenceData092 = new SentenceData09(sentenceData09);
        sentenceData092.createSemantic(sentenceData09);
        return sentenceData092;
    }

    @Override // is2.tools.Tool
    public SentenceData09 apply(SentenceData09 sentenceData09) {
        InstancesTagger instancesTagger = new InstancesTagger();
        if (sentenceData09.length() == 0) {
            return sentenceData09;
        }
        SentenceData09 sentenceData092 = new SentenceData09();
        sentenceData092.createWithRoot(sentenceData09);
        instancesTagger.init(1, new MFO());
        instancesTagger.createInstance09(sentenceData092.length());
        instancesTagger.fillChars(sentenceData092, 0, Pipe._CEND);
        for (int i = 0; i < sentenceData092.length(); i++) {
            instancesTagger.setForm(0, i, sentenceData092.forms[i]);
        }
        return lemmatize(instancesTagger, sentenceData092, this.li);
    }
}
