package wordlistGeneration;

import coreferenceResources.CompoundSplit;
import coreferenceResources.CoreferenceResourceResolver;
import coreferenceResources.CoreferenceUtil;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import typesystmutil.Util_impl;

/* loaded from: input_file:wordlistGeneration/MainGenerateWordLists.class */
public class MainGenerateWordLists {
    static File in = new File("\\\\hastur\\scratch\\kallimachos\\kalimachos Doks\\PreprocessedData\\kernkorpusXMI-NoBig");
    static File outListFolder = new File("C:\\Users\\mkrug\\git\\CoreferenceResolution\\ProjectResources\\src\\main\\resources\\listsJournal");
    static double keepW2VClusterAbove = 0.8d;

    public static void main(String[] strArr) {
        ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap();
        ((Stream) Arrays.stream(in.listFiles(file -> {
            return file.getName().endsWith(".xmi");
        })).parallel()).forEach(file2 -> {
            CAS createCASAndDeserialize = CoreferenceUtil.createCASAndDeserialize(file2);
            Util_impl util_impl = new Util_impl(createCASAndDeserialize);
            Iterator it = createCASAndDeserialize.getAnnotationIndex(util_impl.getPOSType()).iterator();
            while (it.hasNext()) {
                AnnotationFS annotationFS = (AnnotationFS) it.next();
                if (!concurrentHashMap.containsKey(annotationFS.getCoveredText())) {
                    String coveredText = annotationFS.getCoveredText();
                    concurrentHashMap.put(coveredText, new WordWithForms(coveredText, annotationFS.getFeatureValueAsString(util_impl.getLemmaFeature()), CoreferenceResourceResolver.stem(annotationFS.getCoveredText())));
                }
            }
            System.out.println(concurrentHashMap.size());
        });
        Set<String> gNMenschEntries = getGNMenschEntries();
        CoreferenceResourceResolver.getW2VClusterFor("Bier");
        CoreferenceResourceResolver.isInBerufeList("Bier");
        CoreferenceResourceResolver.isAdelstitel("Bier");
        CoreferenceResourceResolver.isMilitTitle("Bier");
        CoreferenceResourceResolver.isSpiritualtitel("Bier");
        createLists(gNMenschEntries, concurrentHashMap, "GermaNet");
        createLists(CoreferenceResourceResolver.berufe, concurrentHashMap, "Profession");
        createLists(CoreferenceResourceResolver.geistTitel, concurrentHashMap, "Spiritual");
        createLists(CoreferenceResourceResolver.militTitle, concurrentHashMap, "Military");
        createLists(CoreferenceResourceResolver.adelsTitel, concurrentHashMap, "Noble");
    }

    private static void createLists(Set<String> set, Map<String, WordWithForms> map, String str) {
        String testForSuffix;
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        HashSet hashSet3 = new HashSet();
        HashSet hashSet4 = new HashSet();
        hashSet4.addAll(set);
        HashSet hashSet5 = new HashSet();
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Set<String>> entry : CoreferenceResourceResolver.word2VecClusterMapping.entrySet()) {
            String str2 = (String) new ArrayList(entry.getValue()).get(0);
            if (hashMap.containsKey(str2)) {
                ((Set) hashMap.get(str2)).add(entry.getKey());
            } else {
                HashSet hashSet6 = new HashSet();
                hashSet6.add(entry.getKey());
                hashMap.put(str2, hashSet6);
            }
        }
        for (Map.Entry entry2 : hashMap.entrySet()) {
            if (getW2vClusterRatio(set, (Set) entry2.getValue()) >= keepW2VClusterAbove) {
                System.out.println("W2V: " + entry2.getValue());
                hashSet5.addAll((Collection) entry2.getValue());
            }
        }
        for (Map.Entry<String, WordWithForms> entry3 : map.entrySet()) {
            if (set.contains(entry3.getValue().stem)) {
                hashSet2.add(entry3.getKey());
            }
            if (Character.isUpperCase(entry3.getKey().charAt(0)) && (testForSuffix = testForSuffix(set, entry3.getKey())) != null) {
                Set<String> gNCategories = CoreferenceResourceResolver.getGNCategories(testForSuffix);
                if (gNCategories.size() == 0 || gNCategories.contains("Mensch")) {
                    hashSet3.add(entry3.getKey());
                }
            }
            if (set.contains(entry3.getValue().lemma)) {
                hashSet.add(entry3.getKey());
            }
        }
        writeToFile("Lemma", str, hashSet);
        writeToFile("Stem", str, hashSet2);
        writeToFile("Token", str, hashSet4);
        writeToFile("Suffix", str, hashSet3);
        writeToFile("W2V", str, hashSet5);
    }

    private static double getW2vClusterRatio(Set<String> set, Set<String> set2) {
        set2.size();
        HashSet hashSet = new HashSet(set);
        hashSet.retainAll(set2);
        double size = hashSet.size();
        double d = 0.0d;
        Iterator<String> it = set2.iterator();
        while (it.hasNext()) {
            if (!CoreferenceResourceResolver.getGNCategories(it.next()).contains("Mensch")) {
                d += 1.0d;
            }
        }
        return size / d;
    }

    private static void writeToFile(String str, String str2, Set<String> set) {
        try {
            FileWriter fileWriter = new FileWriter(Paths.get(outListFolder.getAbsolutePath(), str2 + str + ".txt").toFile());
            Iterator<String> it = set.iterator();
            while (it.hasNext()) {
                fileWriter.append((CharSequence) it.next()).append((CharSequence) "\n");
            }
            fileWriter.flush();
            fileWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static String testForSuffix(Set<String> set, String str) {
        for (CompoundSplit compoundSplit : CoreferenceResourceResolver.splitWordByDictionary(str, true)) {
            String str2 = compoundSplit.getComponents().get(compoundSplit.getComponents().size() - 1);
            if (set.contains(str2)) {
                return str2;
            }
        }
        return null;
    }

    private static Set<String> getGNMenschEntries() {
        HashSet hashSet = new HashSet();
        if (CoreferenceResourceResolver.gnCategoriesMap == null) {
            CoreferenceResourceResolver.getGNCategories("Bier");
        }
        Map<String, Set<String>> map = CoreferenceResourceResolver.gnCategoriesMap;
        for (String str : map.keySet()) {
            if (map.get(str).contains("Mensch")) {
                hashSet.add(str);
            }
        }
        return hashSet;
    }
}
