package is2.lemmatizer;

import is2.data.Cluster;
import is2.data.D4;
import is2.data.Instances;
import is2.data.InstancesTagger;
import is2.data.Long2Int;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.tools.IPipe;
import is2.util.DB;
import is2.util.OptionsSuper;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:is2/lemmatizer/Pipe.class */
public final class Pipe extends PipeGen implements IPipe {
    private static final int _MIN_WORDS_MAPPED_BY_SCRIPT = 1;
    private static final int _MIN_OCCURENT_FOR_SCRIPT_USE = 4;
    private static final String _F0 = "F0";
    private static final String _F1 = "F1";
    private static final String _F2 = "F2";
    private static final String _F3 = "F3";
    private static final String _F4 = "F4";
    private static final String _F5 = "F5";
    private static final String _F6 = "F6";
    private static final String _F7 = "F7";
    private static final String _F8 = "F8";
    private static final String _F9 = "F9";
    private static final String _F10 = "F10";
    private static final String _F11 = "F11";
    private static final String _F12 = "F12";
    private static final String _F13 = "F13";
    private static final String _F14 = "F14";
    private static final String _F15 = "F15";
    private static final String _F16 = "F16";
    private static final String _F17 = "F17";
    private static final String _F18 = "F18";
    private static final String _F19 = "F19";
    private static final String _F20 = "F20";
    private static final String _F21 = "F21";
    private static final String _F22 = "F22";
    private static final String _F23 = "F23";
    private static final String _F24 = "F24";
    private static final String _F25 = "F25";
    private static final String _F26 = "F26";
    private static final String _F27 = "F27";
    private static final String _F28 = "F28";
    private static final String _F29 = "F29";
    private static final String _F30 = "F30";
    private static final String _F31 = "F31";
    private static final String _F32 = "F32";
    private static final String _F33 = "F33";
    private static final String _F34 = "F34";
    private static final String _F35 = "F35";
    private static final String _F36 = "F36";
    private static final String _F37 = "F37";
    private static final String _F38 = "F38";
    private static final String _F39 = "F39";
    private static final String _F40 = "F40";
    private static final String _F41 = "F41";
    private static int _f0;
    private static int _f1;
    private static int _f2;
    private static int _f3;
    private static int _f4;
    private static int _f5;
    private static int _f6;
    private static int _f7;
    private static int _f8;
    private static int _f9;
    private static int _f10;
    private static int _f11;
    private static int _f12;
    private static int _f13;
    private static int _f14;
    private static int _f15;
    private static int _f16;
    private static int _f17;
    private static int _f18;
    private static int _f19;
    private static int _f20;
    private static int _f21;
    private static int _f22;
    private static int _f23;
    private static int _f24;
    private static int _f25;
    private static int _f26;
    private static int _f27;
    private static int _f28;
    private static int _f29;
    private static int _f30;
    private static int _f31;
    private static int _f32;
    private static int _f33;
    private static int _f34;
    private static int _f35;
    private static int _f36;
    private static int _f37;
    private static int _f38;
    private static int _f39;
    private static int _f41;
    public static int _CEND;
    public static int _swrd;
    public static int _ewrd;
    public static final String MID = "MID";
    public static final String END = "END";
    public static final String STR = "STR";
    public static final String OPERATION = "OP";
    private CONLLReader09 depReader;
    public String[] types;
    private D4 z;
    private D4 x;
    Cluster cl;
    OptionsSuper options;
    Long2Int li;
    public static int s_pos;
    public static int s_word;
    public static int s_type;
    public static int s_dir;
    public static int s_dist;
    public static int s_char;
    public static int s_oper;
    public HashMap<String, String> opse = new HashMap<>();
    public MFO mf = new MFO();

    public Pipe(OptionsSuper optionsSuper, Long2Int long2Int) {
        this.options = optionsSuper;
        this.li = long2Int;
    }

    @Override // is2.tools.IPipe
    public InstancesTagger createInstances(String str) {
        SentenceData09 next;
        InstancesTagger instancesTagger = new InstancesTagger();
        this.depReader = new CONLLReader09(false);
        this.depReader.startReading(str);
        this.mf.register("REL", "<root-type>");
        this.mf.register("POS", "<root-POS>");
        System.out.print("Registering feature parts ");
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        int i = 0;
        int i2 = 0;
        HashSet hashSet = new HashSet();
        while (true) {
            SentenceData09 next2 = this.depReader.getNext();
            if (next2 == null) {
                break;
            }
            i++;
            if (i % 100 == 0) {
                i2 = outValue(i, i2);
            }
            for (String str2 : next2.labels) {
                this.mf.register("REL", str2);
            }
            String[] strArr = next2.forms;
            for (int i3 = 0; i3 < strArr.length; i3++) {
                if (this.mf.getValue(PipeGen.WORD, strArr[i3].toLowerCase()) == -1) {
                    this.opse.put(next2.forms[i3].toLowerCase(), next2.lemmas[i3]);
                }
                this.mf.register(PipeGen.WORD, strArr[i3].toLowerCase());
            }
            for (String str3 : strArr) {
                this.mf.register(PipeGen.WORD, str3);
            }
            String[] strArr2 = next2.lemmas;
            for (String str4 : strArr2) {
                this.mf.register(PipeGen.WORD, str4);
            }
            for (String str5 : strArr2) {
                this.mf.register(PipeGen.WORD, str5.toLowerCase());
            }
            String[] strArr3 = next2.plemmas;
            for (String str6 : strArr3) {
                this.mf.register(PipeGen.WORD, str6);
            }
            for (String str7 : strArr3) {
                this.mf.register(PipeGen.WORD, str7.toLowerCase());
            }
            for (String str8 : strArr3) {
                registerChars(PipeGen.CHAR, str8);
            }
            for (String str9 : next2.ppos) {
                this.mf.register("POS", str9);
            }
            String[] strArr4 = next2.gpos;
            for (String str10 : strArr4) {
                this.mf.register("POS", str10);
            }
            for (int i4 = 1; i4 < strArr4.length; i4++) {
                String operation = getOperation(next2, i4);
                if (hashMap.get(operation) == null) {
                    hashMap.put(operation, 1);
                } else {
                    hashMap.put(operation, Integer.valueOf(((Integer) hashMap.get(operation)).intValue() + 1));
                    if (((Integer) hashMap.get(operation)).intValue() > 4) {
                        hashSet.add(next2.forms[i4].toLowerCase());
                    }
                }
                HashSet hashSet2 = (HashSet) hashMap2.get(operation);
                if (hashSet2 == null) {
                    hashSet2 = new HashSet();
                    hashMap2.put(operation, hashSet2);
                }
                hashSet2.add(next2.forms[i4].toLowerCase());
            }
        }
        int i5 = 0;
        int i6 = 0;
        ArrayList arrayList = new ArrayList();
        for (Map.Entry entry : hashMap.entrySet()) {
            if (((Integer) entry.getValue()).intValue() <= 4 || ((HashSet) hashMap2.get(entry.getKey())).size() <= 1) {
                hashSet.removeAll((Collection) hashMap2.get(entry.getKey()));
                if (((HashSet) hashMap2.get(entry.getKey())).size() <= 1) {
                    i5 += ((HashSet) hashMap2.get(entry.getKey())).size();
                }
            } else {
                this.mf.register(OPERATION, (String) entry.getKey());
                i6++;
                arrayList.add(entry);
            }
        }
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            this.opse.remove((String) it.next());
        }
        Collections.sort(arrayList, new Comparator<Map.Entry<String, Integer>>() { // from class: is2.lemmatizer.Pipe.1
            @Override // java.util.Comparator
            public int compare(Map.Entry<String, Integer> entry2, Map.Entry<String, Integer> entry3) {
                if (entry2.getValue() == entry3.getValue()) {
                    return 0;
                }
                return entry2.getValue().intValue() > entry3.getValue().intValue() ? 1 : -1;
            }
        });
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
        }
        if (this.options.clusterFile == null) {
            this.cl = new Cluster();
        } else {
            this.cl = new Cluster(this.options.clusterFile, this.mf, 6);
        }
        System.out.println("\nfound scripts " + hashMap.size() + " used scripts " + i6);
        System.out.println("found mappings of single words " + i5);
        System.out.println("use word maps instead of scripts " + this.opse.size());
        System.out.println("" + this.mf.toString());
        initFeatures();
        this.mf.calculateBits();
        initValues();
        this.depReader.startReading(this.options.trainfile);
        int i7 = 0;
        long currentTimeMillis = System.currentTimeMillis();
        System.out.print("Creating Features: ");
        instancesTagger.init(i, this.mf);
        int i8 = 0;
        while (true) {
            try {
                if (i7 % 100 == 0) {
                    i8 = outValue(i7, i8);
                }
                next = this.depReader.getNext(instancesTagger);
            } catch (Exception e) {
                DB.println("error in sentnence " + i7);
                e.printStackTrace();
            }
            if (next == null) {
                break;
            }
            instancesTagger.fillChars(next, i7, _CEND);
            if (i7 > this.options.count) {
                break;
            }
            i7++;
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        System.gc();
        System.out.print("  time " + (currentTimeMillis2 - currentTimeMillis) + " mem " + ((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / FileUtils.ONE_KB) + " kb");
        this.types = new String[this.mf.getFeatureCounter().get(OPERATION).intValue()];
        MFO mfo = this.mf;
        for (Map.Entry<String, Integer> entry2 : MFO.getFeatureSet().get(OPERATION).entrySet()) {
            this.types[entry2.getValue().intValue()] = entry2.getKey();
        }
        System.out.println("Num Features: " + this.mf.size());
        return instancesTagger;
    }

    public static String getOperation(Instances instances, int i, int i2, String[] strArr) {
        return getOperation2(new StringBuffer(strArr[instances.forms[i][i2]].toLowerCase()).reverse().toString(), new StringBuffer(strArr[instances.glemmas[i][i2]].toLowerCase()).reverse().toString());
    }

    public static String getOperation(SentenceData09 sentenceData09, int i) {
        return getOperation2(new StringBuffer(sentenceData09.forms[i].toLowerCase()).reverse().toString(), new StringBuffer(sentenceData09.lemmas[i].toLowerCase()).reverse().toString());
    }

    public static String getOperation(String str, String str2) {
        return getOperation2(new StringBuffer(str.toLowerCase()).reverse().toString(), new StringBuffer(str2.toLowerCase()).reverse().toString());
    }

    private static String getOperation2(String str, String str2) {
        String str3;
        StringBuffer stringBuffer = new StringBuffer();
        if (str.equals(str2)) {
            str3 = "0";
        } else {
            StringEdit.searchPath(str, str2, StringEdit.LD(str, str2), stringBuffer, false);
            str3 = stringBuffer.toString();
        }
        return str3;
    }

    private void registerChars(String str, String str2) {
        for (int i = 0; i < str2.length(); i++) {
            this.mf.register(str, Character.toString(str2.charAt(i)));
        }
    }

    @Override // is2.tools.IPipe
    public void initValues() {
        this.z = new D4(this.li);
        this.x = new D4(this.li);
        this.x.a0 = s_type;
        s_pos = this.mf.getFeatureCounter().get("POS").intValue();
        s_word = this.mf.getFeatureCounter().get(PipeGen.WORD).intValue();
        s_type = this.mf.getFeatureCounter().get("TYPE").intValue();
        s_char = this.mf.getFeatureCounter().get(PipeGen.CHAR).intValue();
        s_oper = this.mf.getFeatureCounter().get(OPERATION).intValue();
        this.types = new String[this.mf.getFeatureCounter().get(OPERATION).intValue()];
        MFO mfo = this.mf;
        for (Map.Entry<String, Integer> entry : MFO.getFeatureSet().get(OPERATION).entrySet()) {
            this.types[entry.getValue().intValue()] = entry.getKey();
        }
        this.z.a0 = s_type;
        this.z.a1 = s_oper;
        this.z.a2 = s_char;
        this.z.a3 = s_char;
        this.z.a4 = s_char;
        this.z.a5 = s_char;
        this.z.a6 = s_char;
        this.z.a7 = s_char;
        this.x.a0 = s_type;
        this.x.a1 = s_oper;
        this.x.a2 = s_word;
        this.x.a3 = s_word;
        this.x.a4 = s_word;
        this.x.a5 = s_char;
        this.x.a6 = s_char;
        this.x.a7 = s_char;
    }

    @Override // is2.tools.IPipe
    public void initFeatures() {
        for (int i = 0; i < 50; i++) {
            this.mf.register("TYPE", "F" + i);
        }
        _f0 = this.mf.register("TYPE", _F0);
        _f1 = this.mf.register("TYPE", _F1);
        _f2 = this.mf.register("TYPE", _F2);
        _f3 = this.mf.register("TYPE", _F3);
        _f4 = this.mf.register("TYPE", _F4);
        _f5 = this.mf.register("TYPE", _F5);
        _f6 = this.mf.register("TYPE", _F6);
        _f7 = this.mf.register("TYPE", _F7);
        _f8 = this.mf.register("TYPE", _F8);
        _f9 = this.mf.register("TYPE", _F9);
        _f10 = this.mf.register("TYPE", _F10);
        _f11 = this.mf.register("TYPE", _F11);
        _f12 = this.mf.register("TYPE", _F12);
        _f13 = this.mf.register("TYPE", _F13);
        _f14 = this.mf.register("TYPE", _F14);
        _f15 = this.mf.register("TYPE", _F15);
        _f16 = this.mf.register("TYPE", _F16);
        _f17 = this.mf.register("TYPE", _F17);
        _f18 = this.mf.register("TYPE", _F18);
        _f19 = this.mf.register("TYPE", _F19);
        _f20 = this.mf.register("TYPE", _F20);
        _f21 = this.mf.register("TYPE", _F21);
        _f22 = this.mf.register("TYPE", _F22);
        _f23 = this.mf.register("TYPE", _F23);
        _f24 = this.mf.register("TYPE", _F24);
        _f25 = this.mf.register("TYPE", _F25);
        _f26 = this.mf.register("TYPE", _F26);
        _f27 = this.mf.register("TYPE", _F27);
        _f28 = this.mf.register("TYPE", _F28);
        _f29 = this.mf.register("TYPE", _F29);
        _f30 = this.mf.register("TYPE", _F30);
        _f31 = this.mf.register("TYPE", _F31);
        _f32 = this.mf.register("TYPE", _F32);
        _f33 = this.mf.register("TYPE", _F33);
        _f34 = this.mf.register("TYPE", _F34);
        _f35 = this.mf.register("TYPE", _F35);
        _f36 = this.mf.register("TYPE", _F36);
        _f37 = this.mf.register("TYPE", _F37);
        _f38 = this.mf.register("TYPE", _F38);
        this.mf.register("POS", "MID");
        this.mf.register("POS", "STR");
        this.mf.register("POS", "END");
        this.mf.register("TYPE", PipeGen.CHAR);
        _swrd = this.mf.register(PipeGen.WORD, "STR");
        _ewrd = this.mf.register(PipeGen.WORD, "END");
        _CEND = this.mf.register(PipeGen.CHAR, "END");
    }

    public final void addCoreFeatures(InstancesTagger instancesTagger, int i, int i2, int i3, String str, long[] jArr) {
        long j;
        for (int length = jArr.length - 1; length >= 0; length--) {
            jArr[length] = 0;
        }
        int i4 = instancesTagger.forms[i][i2];
        short s = instancesTagger.chars[i][i2][11];
        int i5 = 1 + (i2 < 3 ? i2 : 3);
        short s2 = instancesTagger.chars[i][i2][0];
        short s3 = instancesTagger.chars[i][i2][1];
        short s4 = instancesTagger.chars[i][i2][2];
        short s5 = instancesTagger.chars[i][i2][3];
        short s6 = instancesTagger.chars[i][i2][4];
        short s7 = instancesTagger.chars[i][i2][5];
        short s8 = instancesTagger.chars[i][i2][6];
        short s9 = instancesTagger.chars[i][i2][7];
        short s10 = instancesTagger.chars[i][i2][8];
        short s11 = instancesTagger.chars[i][i2][9];
        short s12 = instancesTagger.chars[i][i2][10];
        int length2 = instancesTagger.length(i);
        this.x.v1 = i3;
        this.x.v0 = _f0;
        this.x.v2 = i4;
        this.x.cz3();
        jArr[0] = this.x.getVal();
        jArr[1] = this.x.csa(3, i5);
        this.x.v0 = _f1;
        this.x.v2 = i4;
        D4 d4 = this.x;
        if (i2 + 1 >= length2) {
            D4 d42 = this.x;
            long j2 = _ewrd;
            j = j2;
            d42.v3 = j2;
        } else {
            j = instancesTagger.forms[i][i2 + 1];
        }
        d4.v3 = j;
        this.x.cz4();
        jArr[2] = this.x.getVal();
        int i6 = 0;
        int i7 = 1;
        for (int i8 = 0; i8 < s; i8++) {
            char charAt = str.charAt(i8);
            if (Character.isUpperCase(charAt)) {
                if (i8 == 0) {
                    i6 = 1;
                } else if (i6 == 1) {
                    i6 = 3;
                } else if (i6 == 0) {
                    i6 = 2;
                }
            }
            if (Character.isDigit(charAt) && i8 == 0) {
                i7 = 2;
            } else if (Character.isDigit(charAt) && i7 == 1) {
                i7 = 3;
            }
        }
        this.z.v0 = _f21;
        this.z.v2 = i7;
        this.z.cz3();
        jArr[3] = this.z.getVal();
        this.z.v0 = _f4;
        this.z.v1 = i3;
        this.z.v2 = s2;
        this.z.cz3();
        jArr[4] = this.z.getVal();
        this.z.v0 = _f5;
        this.z.v2 = s8;
        this.z.cz3();
        jArr[5] = this.z.getVal();
        this.z.v2 = s2;
        this.z.v3 = s3;
        this.z.v4 = s4;
        this.z.v5 = s5;
        this.z.v6 = s6;
        this.z.v0 = _f6;
        this.z.cz4();
        jArr[6] = this.z.getVal();
        this.z.v0 = _f7;
        this.z.cz5();
        jArr[7] = this.z.getVal();
        this.z.v0 = _f8;
        this.z.cz6();
        jArr[8] = this.z.getVal();
        this.z.v0 = _f9;
        this.z.cz7();
        jArr[9] = this.z.getVal();
        this.z.v2 = s8;
        this.z.v3 = s9;
        this.z.v4 = s10;
        this.z.v5 = s11;
        this.z.v6 = s12;
        this.z.v0 = _f10;
        this.z.cz4();
        int i9 = 10 + 1;
        jArr[10] = this.z.getVal();
        int i10 = i9 + 1;
        jArr[i9] = this.z.csa(3, i6);
        this.z.v0 = _f11;
        this.z.cz5();
        int i11 = i10 + 1;
        jArr[i10] = this.z.getVal();
        int i12 = i11 + 1;
        jArr[i11] = this.z.csa(3, i6);
        this.z.v0 = _f12;
        this.z.cz6();
        int i13 = i12 + 1;
        jArr[i12] = this.z.getVal();
        int i14 = i13 + 1;
        jArr[i13] = this.z.csa(3, i6);
        this.z.v0 = _f13;
        this.z.cz7();
        int i15 = i14 + 1;
        jArr[i14] = this.z.getVal();
        int i16 = i15 + 1;
        jArr[i15] = this.z.csa(3, i6);
        if (length2 > i2 + 1) {
            this.z.v0 = _f14;
            this.z.v2 = instancesTagger.chars[i][i2 + 1][0];
            this.z.cz3();
            int i17 = i16 + 1;
            jArr[i16] = this.z.getVal();
            this.z.v0 = _f15;
            this.z.v2 = instancesTagger.chars[i][i2 + 1][5];
            this.z.cz3();
            int i18 = i17 + 1;
            jArr[i17] = this.z.getVal();
            if (instancesTagger.chars[i][i2 + 1][11] > 1) {
                this.z.v0 = _f16;
                this.z.v2 = instancesTagger.chars[i][i2 + 1][0];
                this.z.v3 = instancesTagger.chars[i][i2 + 1][2];
                this.z.cz4();
                int i19 = i18 + 1;
                jArr[i18] = this.z.getVal();
                this.z.v0 = _f17;
                this.z.v2 = instancesTagger.chars[i][i2 + 1][1];
                this.z.v3 = instancesTagger.chars[i][i2 + 1][6];
                this.z.cz4();
                i18 = i19 + 1;
                jArr[i19] = this.z.getVal();
            }
            this.x.v0 = _f18;
            this.x.v2 = instancesTagger.forms[i][i2 + 1];
            this.x.cz3();
            int i20 = i18;
            i16 = i18 + 1;
            jArr[i20] = this.x.getVal();
            if (length2 > i2 + 2) {
                this.x.v0 = _f32;
                this.x.v2 = instancesTagger.forms[i][i2 + 2];
                this.x.v3 = instancesTagger.forms[i][i2 + 1];
                this.x.cz4();
                int i21 = i16 + 1;
                jArr[i16] = this.x.getVal();
                this.x.cz3();
                i16 = i21 + 1;
                jArr[i21] = this.x.getVal();
            }
            if (length2 > i2 + 3) {
                this.x.v0 = _f33;
                this.x.v2 = instancesTagger.forms[i][i2 + 3];
                this.x.v3 = instancesTagger.forms[i][i2 + 2];
                this.x.cz4();
                int i22 = i16;
                i16++;
                jArr[i22] = this.x.getVal();
                this.x.cz3();
                jArr[27] = this.x.getVal();
            }
        }
        this.z.v0 = _f19;
        this.z.v1 = i3;
        this.z.v2 = s;
        this.z.cz3();
        int i23 = i16;
        int i24 = i16 + 1;
        jArr[i23] = this.z.getVal();
        if (i2 < 1) {
            return;
        }
        this.x.v0 = _f27;
        this.x.v1 = i3;
        this.x.v2 = instancesTagger.forms[i][i2 - 1];
        this.x.cz3();
        int i25 = i24 + 1;
        jArr[i24] = this.x.getVal();
        if (i2 < 2) {
            return;
        }
        this.x.v0 = _f28;
        this.x.v2 = instancesTagger.forms[i][i2 - 2];
        this.x.cz3();
        int i26 = i25 + 1;
        jArr[i25] = this.x.getVal();
        if (i2 < 3) {
            return;
        }
        this.x.v0 = _f31;
        this.x.v1 = i3;
        this.x.v2 = instancesTagger.forms[i][i2 - 3];
        this.x.v3 = instancesTagger.forms[i][i2 - 2];
        this.x.cz4();
        int i27 = i26 + 1;
        jArr[i26] = this.x.getVal();
    }

    private void writeMap(DataOutputStream dataOutputStream) {
        try {
            dataOutputStream.writeInt(this.opse.size());
            for (Map.Entry<String, String> entry : this.opse.entrySet()) {
                dataOutputStream.writeUTF(entry.getKey());
                dataOutputStream.writeUTF(entry.getValue());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void readMap(DataInputStream dataInputStream) {
        try {
            int readInt = dataInputStream.readInt();
            for (int i = 0; i < readInt; i++) {
                this.opse.put(dataInputStream.readUTF(), dataInputStream.readUTF());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override // is2.tools.IPipe
    public void write(DataOutputStream dataOutputStream) {
        writeMap(dataOutputStream);
        try {
            this.cl.write(dataOutputStream);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
