package de.uniwue.dw.segmentation.abbreviations;

import com.ziclix.python.sql.pipe.csv.CSVString;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:libs/padawan-ie-algorithm-0.5.1-jar-with-dependencies.jar:de/uniwue/dw/segmentation/abbreviations/Abbreviations.class */
public class Abbreviations {
    public static List<String> generateAbbrevsFromLetters(List<String> list, int i, int i2, double d, String str) throws IOException {
        System.out.println("generation abbreviations from input data...");
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        HashMap hashMap3 = new HashMap();
        LinkedList linkedList = new LinkedList();
        System.out.println("looking for candidates and calculating raw counts ...");
        int i3 = 0;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            Matcher matcher = Pattern.compile("\\b\\w{0," + i2 + "}\\.").matcher(it.next());
            while (matcher.find()) {
                String group = matcher.group();
                if (hashMap.get(group) != null) {
                    Integer[] numArr = (Integer[]) hashMap.get(group);
                    numArr[0] = Integer.valueOf(numArr[0].intValue() + 1);
                } else {
                    hashMap.put(group, new Integer[]{1, 0});
                }
            }
            i3++;
            if (i3 % 1000 == 0) {
                System.out.println("input string " + i3 + "/" + list.size());
            }
        }
        System.out.println("... done!");
        for (String str2 : hashMap.keySet()) {
            Integer[] numArr2 = (Integer[]) hashMap.get(str2);
            if (numArr2[0].intValue() > i) {
                hashMap2.put(str2, numArr2);
            }
        }
        int i4 = 0;
        System.out.println("calculating counts without dot ...");
        for (String str3 : hashMap2.keySet()) {
            i4++;
            Pattern compile = Pattern.compile("\\b" + str3.substring(0, str3.length() - 1) + "( |\\n)");
            int i5 = 0;
            Iterator<String> it2 = list.iterator();
            while (it2.hasNext()) {
                Matcher matcher2 = compile.matcher(it2.next());
                while (matcher2.find()) {
                    Integer[] numArr3 = (Integer[]) hashMap2.get(str3);
                    numArr3[1] = Integer.valueOf(numArr3[1].intValue() + 1);
                }
                i5++;
                if (i5 % 1000 == 0) {
                    System.out.println("input " + i5 + "/" + list.size() + "@ candidate " + i4 + "/" + hashMap2.size());
                }
            }
        }
        System.out.println("... done!");
        for (String str4 : hashMap2.keySet()) {
            Integer[] numArr4 = (Integer[]) hashMap2.get(str4);
            if (numArr4[0].doubleValue() / (numArr4[0].doubleValue() + numArr4[1].doubleValue()) > d) {
                hashMap3.put(str4, numArr4);
                System.out.println(str4 + CSVString.DELIMITER + numArr4[0] + CSVString.DELIMITER + numArr4[1]);
                linkedList.add(str4);
            }
        }
        if (str != null && !str.equals("")) {
            FileUtils.writeLines(new File(str), linkedList);
        }
        return linkedList;
    }

    public static List<String> readAbbrevsFromDisk(String str) throws IOException {
        return FileUtils.readLines(new File(str), Charset.forName("utf-8"));
    }
}
