package de.uniwue.dw.segmentation;

import de.uniwue.dw.uima.types.Types;
import de.uniwue.dw.uima.util.UIMAUtils;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.InvalidXMLException;

/* loaded from: input_file:libs/padawan-ie-algorithm-0.5.1-jar-with-dependencies.jar:de/uniwue/dw/segmentation/Sectionization.class */
public class Sectionization {
    public static List<AnnotationFS> sectionizeLetter(Letter letter, boolean z) throws IOException, AnalysisEngineProcessException, InvalidXMLException, ResourceInitializationException, ResourceConfigurationException, URISyntaxException {
        return sectionizeLetter(letter, null, null, z);
    }

    public static List<AnnotationFS> sectionizeLetter(Letter letter, String str, SectionType sectionType, boolean z) throws IOException, AnalysisEngineProcessException, InvalidXMLException, ResourceInitializationException, ResourceConfigurationException, URISyntaxException {
        if (sectionType != null) {
            CAS cas = letter.getCas();
            Type type = Types.getType(cas, Types.SECTION);
            if (z) {
                type = Types.getType(cas, Types.SECTIONG);
            }
            AnnotationFS createAnnotation = cas.createAnnotation(type, 0, cas.getDocumentText().length() - 1);
            createAnnotation.setFeatureValueFromString(type.getFeatureByBaseName("category"), sectionType.getName());
            cas.addFsToIndexes(createAnnotation);
            letter.setSectionized(true);
        } else {
            if (!letter.isSectionized()) {
                LinkedList linkedList = new LinkedList();
                CAS cas2 = letter.getCas();
                addParagraphAnnotations(cas2);
                generateHeadlineCandidates(cas2);
                detectHeadlinesWithLocalFormat(cas2, detectLocalHeadlineFormat(cas2), z);
                categorizeFoundHeadlines(cas2, z, linkedList);
                detectHeadlinesByScoring(cas2, z, linkedList);
                detectHeadlinesByWordlist(cas2, z, linkedList);
                filterWeakHeadlines(cas2, z, linkedList);
                filterDoubleHeadlines(cas2, z);
                addSectionAnnotations(cas2, z);
                removeSingleUnknownHeadlines(cas2, z);
                letter.setSectionized(true);
            }
            if (str != null) {
                writeSectionsToDisk(letter, str);
            }
        }
        LinkedList linkedList2 = new LinkedList();
        FSIterator it = letter.getCas().getAnnotationIndex(Types.getType(letter.getCas(), Types.SECTION)).iterator();
        while (it.hasNext()) {
            linkedList2.add((AnnotationFS) it.next());
        }
        return linkedList2;
    }

    private static void filterWeakHeadlines(CAS cas, boolean z, List<SectionType> list) {
        if (list.contains(SectionType.PROCEDERE)) {
            Type type = Types.getType(cas, Types.HEADLINE);
            if (z) {
                type = Types.getType(cas, Types.HEADLINEG);
            }
            LinkedList linkedList = new LinkedList();
            FSIterator it = cas.getAnnotationIndex(type).iterator();
            while (it.hasNext()) {
                AnnotationFS annotationFS = (AnnotationFS) it.next();
                if (annotationFS.getCoveredText().matches(".*(Befund|Modell|Grenzfrequenz|Atrium|Batterieimpen?danz|Sinusrhythmus|Elektroden|Ventrikel|Modus|aerob).*") && annotationFS.getFeatureValueAsString(type.getFeatureByBaseName("category")).equals(SectionType.UNKNOWN.getName())) {
                    linkedList.add(annotationFS);
                }
            }
            linkedList.forEach(annotationFS2 -> {
                cas.removeFsFromIndexes(annotationFS2);
            });
        }
    }

    private static void removeSingleUnknownHeadlines(CAS cas, boolean z) {
        Type type = Types.getType(cas, Types.HEADLINE);
        Type type2 = Types.getType(cas, Types.SECTION);
        if (z) {
            type = Types.getType(cas, Types.HEADLINEG);
            type2 = Types.getType(cas, Types.SECTIONG);
        }
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            List<AnnotationFS> covered = UIMAUtils.getCovered(cas, annotationFS, type2);
            if (covered.size() > 0) {
                AnnotationFS annotationFS2 = covered.get(0);
                SectionType typeForString = SectionType.getTypeForString(annotationFS.getFeatureValueAsString(type.getFeatureByBaseName("category")));
                SectionType typeForString2 = SectionType.getTypeForString(annotationFS2.getFeatureValueAsString(type2.getFeatureByBaseName("category")));
                if (annotationFS2.getBegin() == annotationFS.getBegin() && annotationFS2.getEnd() == annotationFS.getEnd() && typeForString == SectionType.UNKNOWN && typeForString2 == SectionType.UNKNOWN) {
                    linkedList.add(annotationFS2);
                    linkedList.add(annotationFS);
                }
            }
        }
        linkedList.forEach(annotationFS3 -> {
            cas.removeFsFromIndexes(annotationFS3);
        });
    }

    private static void detectHeadlinesByWordlist(CAS cas, boolean z, List<SectionType> list) {
        Type type = Types.getType(cas, Types.HEADLINECANDIDATE);
        Type type2 = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type2 = Types.getType(cas, Types.HEADLINEG);
        }
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            String coveredText = annotationFS.getCoveredText();
            if (coveredText.contains(org.apache.uima.collection.impl.cpm.Constants.SHORT_COLON_TERM)) {
                coveredText = coveredText.substring(0, coveredText.indexOf(org.apache.uima.collection.impl.cpm.Constants.SHORT_COLON_TERM));
            }
            SectionType categoryFromString = getCategoryFromString(coveredText);
            if (categoryFromString != SectionType.UNKNOWN && !list.contains(categoryFromString)) {
                AnnotationFS createAnnotation = cas.createAnnotation(type2, annotationFS.getBegin(), annotationFS.getEnd());
                createAnnotation.setFeatureValueFromString(type2.getFeatureByBaseName("category"), categoryFromString.getName());
                cas.addFsToIndexes(createAnnotation);
                linkedList.add(annotationFS);
            }
        }
        linkedList.forEach(annotationFS2 -> {
            cas.removeFsFromIndexes(annotationFS2);
        });
    }

    private static void filterDoubleHeadlines(CAS cas, boolean z) {
        Type type = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type = Types.getType(cas, Types.HEADLINEG);
        }
        SectionType sectionType = null;
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            SectionType typeForString = SectionType.getTypeForString(annotationFS.getFeatureValueAsString(type.getFeatureByBaseName("category")));
            if (typeForString == sectionType && sectionType != SectionType.UNKNOWN) {
                linkedList.add(annotationFS);
            }
            sectionType = typeForString;
        }
        linkedList.forEach(annotationFS2 -> {
            cas.removeFsFromIndexes(annotationFS2);
        });
    }

    private static void categorizeFoundHeadlines(CAS cas, boolean z, List<SectionType> list) {
        Type type = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type = Types.getType(cas, Types.HEADLINEG);
        }
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            SectionType categoryFromString = getCategoryFromString(annotationFS.getCoveredText());
            annotationFS.setFeatureValueFromString(type.getFeatureByBaseName("category"), categoryFromString.getName());
            list.add(categoryFromString);
        }
    }

    private static void detectHeadlinesByScoring(CAS cas, boolean z, List<SectionType> list) {
        Type type = Types.getType(cas, Types.PARAGRAPH);
        Type type2 = Types.getType(cas, Types.HEADLINECANDIDATE);
        Type type3 = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type3 = Types.getType(cas, Types.HEADLINEG);
        }
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type2).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            int i = 0;
            List<AnnotationFS> covered = UIMAUtils.getCovered(cas, annotationFS, type);
            if (covered.size() > 0) {
                AnnotationFS annotationFS2 = covered.get(0);
                if (annotationFS2.getBegin() == annotationFS.getBegin()) {
                    i = 0 + 2;
                }
                if (annotationFS2.getEnd() == annotationFS.getEnd()) {
                    i += 2;
                }
            }
            if (annotationFS.getCoveredText().length() <= 50) {
                i += 2;
            }
            if (isBold(annotationFS)) {
                i += 2;
            }
            if (isItalic(annotationFS)) {
                i++;
            }
            if (isUnderlined(annotationFS)) {
                i++;
            }
            if (endsWithColon(annotationFS)) {
                i += 5;
            }
            if (i >= 10) {
                AnnotationFS createAnnotation = cas.createAnnotation(type3, annotationFS.getBegin(), annotationFS.getEnd());
                SectionType categoryFromString = getCategoryFromString(createAnnotation.getCoveredText());
                if (categoryFromString != SectionType.UNKNOWN && !list.contains(categoryFromString)) {
                    createAnnotation.setFeatureValueFromString(type3.getFeatureByBaseName("category"), categoryFromString.getName());
                    cas.addFsToIndexes(createAnnotation);
                    linkedList.add(annotationFS);
                }
            }
        }
        linkedList.forEach(annotationFS3 -> {
            cas.removeFsFromIndexes(annotationFS3);
        });
    }

    private static SectionType getCategoryFromString(String str) {
        if (str == null) {
            return null;
        }
        if (str.matches("(?i).*(diagnose|vorerkrankung(en)?).*")) {
            return SectionType.DIAGNOSE;
        }
        if (!str.matches("(?i).*(sonogra(f|ph)ie|sonogramm).*") && !str.matches("(?i).*sono\\b.*")) {
            if (str.matches("(?i).*szintigra.*")) {
                return SectionType.SZINTIGRAFIE;
            }
            if (str.matches("(?i).*ergo.*")) {
                return SectionType.ERGOMETRIE;
            }
            if (str.matches("(?i).*viro.*")) {
                return SectionType.VIROLOGIE;
            }
            if (str.matches("(?i).*lungenfu.*")) {
                return SectionType.LUNGENFUNKTION;
            }
            if (!str.matches("(?i).*((ekg\\b)|(elektro.ardio)).*") && !str.matches(".*EKG.*")) {
                if (!str.matches("(?i).*(CVRF).*") && !str.matches("(?i).*[ck]ardiovaskul.*risiko.*") && !str.matches("(?i).*tumorstadium.*")) {
                    if (str.matches("(?i).*eeg\\b.*")) {
                        return SectionType.EEG;
                    }
                    if (str.matches("(?i).*(r(.|oe)ntgen|durchleuchtung).*")) {
                        return str.matches("(?i).*thorax.*") ? SectionType.ROENTGEN_THORAX : SectionType.ROENTGEN;
                    }
                    if (str.matches("(?i).*r(oe|ö).*") && str.matches("(?i).*thorax.*")) {
                        return SectionType.ROENTGEN_THORAX;
                    }
                    if (str.matches(".*Thorax.*[A-Za-z].*") && !str.matches("(?i).*(ct|mrt|sono|echo).*")) {
                        return SectionType.ROENTGEN_THORAX;
                    }
                    if (str.matches("(?i).*neurolog.*")) {
                        return SectionType.NEUROLOGIE;
                    }
                    if (str.matches("(?i).*anamnes.*")) {
                        return SectionType.ANAMNESE;
                    }
                    if (!str.matches("(?i).*mrt.*") && !str.matches("(?i).*kardio-mr.*")) {
                        if (str.matches("(?i).*chirurg.*")) {
                            return SectionType.CHIRURGIE;
                        }
                        if (str.matches("(?i).*densitometri.*")) {
                            return SectionType.DENSITOMETRIE;
                        }
                        if (str.matches("(?i).*medika[tm].*")) {
                            return SectionType.MEDIKATION;
                        }
                        if (!str.matches("(?i)(.*labor.*|Lab)") && !str.matches("(?i)(.*klinisch-chemisch.*)") && !str.matches("(?i).*sammelurin.*") && !str.matches("(?i).*crh.*test.*")) {
                            if (str.matches("(?i).*blutbild.*") && !str.matches("(?i).*(wir|aktuell).*")) {
                                return SectionType.LABOR;
                            }
                            if (!str.matches("(?i).*\\b(Kontroll)?echo.*") && !str.matches(".*(TTE|TEE|Echokardiogra(ph|f)i).*")) {
                                if (str.matches("(?i).*(herzkath|angiogra(f|ph)i).*")) {
                                    return SectionType.HERZKATHETER;
                                }
                                if (str.matches("Allergi.*")) {
                                    return SectionType.ALLERGIEN;
                                }
                                if (!str.matches("(?i).{0,10}((^|\\s|-)(c|hr)?ct\\b|tomog).*") && !str.matches(".*\\bC?CT\\b.*")) {
                                    if (str.matches(".{0,4}PET.*")) {
                                        return SectionType.PET;
                                    }
                                    if (str.matches("(?i).*radiolog.*")) {
                                        return SectionType.RADIOLOGIE;
                                    }
                                    if (str.matches("(?i).*(gastro|oegd|ögd).*")) {
                                        return SectionType.GASTRO;
                                    }
                                    if (str.matches("(?i).*(langzeit.*blut).*")) {
                                        return SectionType.LANGZEIT_BLUTDRUCK;
                                    }
                                    if (str.matches("(?i).*broncho.*") && !str.matches("(?i).*lavage.*")) {
                                        return SectionType.BRONCHO;
                                    }
                                    if (str.matches("(?i).*olos(c|k)okop.*")) {
                                        return SectionType.KOLOSKOPIE;
                                    }
                                    if (str.matches("(?i).*knochen.*") && str.matches("(?i).*(knochenmark|zyto|biops).*")) {
                                        return SectionType.ZYTOLOGIE;
                                    }
                                    if (str.matches("(?i).*knochen.*") && str.matches("(?i).*knochendichte.*")) {
                                        return SectionType.KNOCHENDICHTE;
                                    }
                                    if (str.matches("(?i).*endokrino.*")) {
                                        return SectionType.ENDOKRINOLOGIE;
                                    }
                                    if (str.matches("(?i).*mi(c|k)robio.*")) {
                                        return SectionType.MIKROBIOLOGIE;
                                    }
                                    if (str.matches("(?i).*(histologie|zytologie).*")) {
                                        return SectionType.ZYTOLOGIE;
                                    }
                                    if (str.matches("(?i).*lymphozyten.*(diff|ana).*")) {
                                        return SectionType.LYMPHOZYTENANALYSE;
                                    }
                                    if (str.matches("(?i).*ercp.*")) {
                                        return SectionType.ERCP;
                                    }
                                    if (str.matches("(?i).*ele(c|k)trophorese.*")) {
                                        return SectionType.LABOR;
                                    }
                                    if (str.matches("(?i).*(ele(c|k)trophysio|epu).*")) {
                                        return SectionType.ELEKTROPHYSIOLOGIE;
                                    }
                                    if (str.matches("(?i).*patholog.*")) {
                                        return SectionType.PATHOLOGIE;
                                    }
                                    if (str.matches("(?i).{0,3}(auto.*(immun|anti)|immun.*diag).*")) {
                                        return SectionType.AUTOIMMUN_DIAGNOSTIK;
                                    }
                                    if (str.matches("(?i).*immonolog.*")) {
                                        return SectionType.IMMUNOLOGIE;
                                    }
                                    if (str.matches("(?i).*serolog.*")) {
                                        return SectionType.LABOR;
                                    }
                                    if (str.matches("(?i).*schellong.*")) {
                                        return SectionType.SCHELLONG;
                                    }
                                    if (str.matches("(?i).*gerinnung.*")) {
                                        return SectionType.LABOR;
                                    }
                                    if (str.matches("(?i).*mrcp.*")) {
                                        return SectionType.MRCP;
                                    }
                                    if (str.matches("(?i).*icd|schrittmacher.*") && !str.matches("(?i).*icd.?10.*")) {
                                        return SectionType.ICD;
                                    }
                                    if (str.matches("(?i).*(fkds|farbdoppler|tccd|eccd).*")) {
                                        return SectionType.FARBDOPPLER;
                                    }
                                    if (str.matches("(?i).*gyn(ae|ä)kologi.*")) {
                                        return SectionType.GYNAEKOLOGIE;
                                    }
                                    if (str.matches("(?i).*(transplan).*")) {
                                        return SectionType.TRANSPLANTATION;
                                    }
                                    if (str.matches("(?i).*(nephrologi).*")) {
                                        return SectionType.NEPHROLOGIE;
                                    }
                                    if (!str.matches("(?i).*(k.rperl|aufnahme).*(befund|unters).*") && !str.matches("(?i)(.*klin(\\.|isch).*befund.*|Befund:|Lokalbefund:\\s?)")) {
                                        if (str.matches("(?i).*\\bvorstellung\\b.*")) {
                                            return SectionType.VORSTELLUNG;
                                        }
                                        if (str.matches("(?i).{0,10}chemo.*")) {
                                            return SectionType.CHEMO;
                                        }
                                        if (!str.matches("(?i).*(pro[cz]edere|\\bbeurteilung|diskussion|vorgehen|verlauf|epikrise|zusammenfassung|schlagen?\\svor).*") && !str.matches(".{0,10}\\bTherapie.*")) {
                                            return str.matches("(?i).{0,10}entlass(ungs)?(befund)?.*") ? SectionType.ENTLASSUNG : str.matches("(?i).*verleg(ungs)?befund.*") ? SectionType.VERLEGUNG : str.matches(".*\\bR.\\.-.*") ? SectionType.ROENTGEN : str.matches("(?i).*(op|operations)-?bericht.*") ? SectionType.OPBERICHT : str.matches("(?i)(sehr\\s*geehrt|wir\\s*(berichten|(be)?danken|verbleiben)|mit\\s*freundl).*") ? SectionType.DISCARD : (!str.matches("(?i).*ak...ll.*") || str.matches("(?i).*medikation.*") || str.matches("(?i).*status.*")) ? SectionType.UNKNOWN : SectionType.DIAGNOSE;
                                        }
                                        return SectionType.PROCEDERE;
                                    }
                                    return SectionType.KU;
                                }
                                return SectionType.CT;
                            }
                            return SectionType.ECHO;
                        }
                        return SectionType.LABOR;
                    }
                    return SectionType.MRT;
                }
                return SectionType.DIAGNOSE;
            }
            return SectionType.EKG;
        }
        return SectionType.SONOGRAFIE;
    }

    private static void addSectionAnnotations(CAS cas, boolean z) {
        int i;
        Type type = Types.getType(cas, Types.PARAGRAPH);
        Type type2 = Types.getType(cas, Types.SECTION);
        Type type3 = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type3 = Types.getType(cas, Types.HEADLINEG);
            type2 = Types.getType(cas, Types.SECTIONG);
        }
        FSIterator it = cas.getAnnotationIndex(type3).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            int begin = annotationFS.getBegin();
            List selectFollowing = CasUtil.selectFollowing(cas, type3, annotationFS, 1);
            if (selectFollowing.size() > 0) {
                i = ((AnnotationFS) CasUtil.selectPreceding(cas, type, (AnnotationFS) selectFollowing.get(0), 1).get(0)).getEnd();
            } else {
                int end = cas.getDocumentAnnotation().getEnd();
                while (true) {
                    i = end;
                    if (cas.getDocumentText().substring(i).matches("\\s*")) {
                        end = i - 1;
                    }
                }
            }
            AnnotationFS createAnnotation = cas.createAnnotation(type2, begin, i);
            createAnnotation.setFeatureValueFromString(type2.getFeatureByBaseName("category"), annotationFS.getFeatureValueAsString(type3.getFeatureByBaseName("category")));
            cas.addFsToIndexes(createAnnotation);
        }
    }

    private static void detectHeadlinesWithLocalFormat(CAS cas, int i, boolean z) {
        Type type = Types.getType(cas, Types.HEADLINECANDIDATE);
        Type type2 = Types.getType(cas, Types.HEADLINE);
        if (z) {
            type2 = Types.getType(cas, Types.HEADLINEG);
        }
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            if (detectFormatOfHeadlineCandidate(annotationFS) == i && annotationFS.getCoveredText().matches(".*[A-Za-z].*")) {
                AnnotationFS createAnnotation = cas.createAnnotation(type2, annotationFS.getBegin(), annotationFS.getEnd());
                createAnnotation.setFeatureValueFromString(type2.getFeatureByBaseName("category"), SectionType.UNKNOWN.getName());
                cas.addFsToIndexes(createAnnotation);
                linkedList.add(annotationFS);
            }
        }
        linkedList.forEach(annotationFS2 -> {
            cas.removeFsFromIndexes(annotationFS2);
        });
    }

    private static int detectLocalHeadlineFormat(CAS cas) {
        Type type = Types.getType(cas, Types.HEADLINECANDIDATE);
        LinkedList linkedList = new LinkedList();
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            if (annotationFS.getCoveredText().matches("(?i).*(labor|diagnose|medikation|epikrise|k.rperliche untersuchung|aufnahmebefund).*")) {
                linkedList.add(Integer.valueOf(detectFormatOfHeadlineCandidate(annotationFS)));
            }
        }
        int i = 0;
        int i2 = 0;
        for (int i3 = 0; i3 <= 15; i3++) {
            int frequency = Collections.frequency(linkedList, Integer.valueOf(i3));
            if (frequency >= i) {
                i = frequency;
                i2 = i3;
            }
        }
        return i2;
    }

    private static int detectFormatOfHeadlineCandidate(AnnotationFS annotationFS) {
        int i = 0;
        if (isUnderlined(annotationFS)) {
            i = 0 + 2;
        }
        if (isItalic(annotationFS)) {
            i++;
        }
        if (isBold(annotationFS)) {
            i += 8;
        }
        if (endsWithColon(annotationFS)) {
            i += 4;
        }
        return i;
    }

    public static boolean isUnderlined(AnnotationFS annotationFS) {
        return annotationFS.getCoveredText().matches(".*<u>.*[A-Za-z].*</u>.*");
    }

    public static boolean isItalic(AnnotationFS annotationFS) {
        return annotationFS.getCoveredText().matches(".*<i>.*[A-Za-z].*</i>.*");
    }

    public static boolean isBold(AnnotationFS annotationFS) {
        return annotationFS.getCoveredText().matches(".*<b>.*[A-Za-z].*</b>.*");
    }

    public static boolean endsWithColon(AnnotationFS annotationFS) {
        return annotationFS.getCoveredText().matches(".*?:") || annotationFS.getCoveredText().matches("(<.*?>){1,3}.*: *?(<.*?>){1,3}");
    }

    private static void generateHeadlineCandidates(CAS cas) {
        FSIterator it = cas.getAnnotationIndex(Types.getType(cas, Types.PARAGRAPH)).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            Matcher matcher = Pattern.compile("(<[biu]>){1,3}(.*?)(</[biu]>){1,3}:?").matcher(annotationFS.getCoveredText());
            if (matcher.find()) {
                String group = matcher.group();
                if (!group.matches(".*[Zz]\\. ?[Nn]\\..*") && !group.matches("(?i).*Aktuell:.*") && !group.matches(".*ED ?(\\d{1,2}.)?\\d{2,4}.*")) {
                    if (annotationFS.getCoveredText().indexOf(group) == 0) {
                        cas.addFsToIndexes(cas.createAnnotation(Types.getType(cas, Types.HEADLINECANDIDATE), annotationFS.getBegin() + matcher.start(), annotationFS.getBegin() + matcher.end()));
                    } else {
                        findCandidateWithHeuristics(cas, annotationFS);
                    }
                }
            } else {
                findCandidateWithHeuristics(cas, annotationFS);
            }
        }
    }

    private static void findCandidateWithHeuristics(CAS cas, AnnotationFS annotationFS) {
        if (annotationFS.getCoveredText().startsWith("-?") || annotationFS.getCoveredText().startsWith("??") || annotationFS.getCoveredText().startsWith("-") || annotationFS.getCoveredText().matches(".*[Zz]\\. ?[Nn]\\..*") || annotationFS.getCoveredText().matches(".*ED ?(\\d{1,2}.)?\\d{2,4}.*")) {
            return;
        }
        int i = 0;
        if (annotationFS.getCoveredText().startsWith("--- begin table ---\n")) {
            i = 20;
        }
        int indexOf = annotationFS.getCoveredText().indexOf(org.apache.uima.collection.impl.cpm.Constants.SHORT_COLON_TERM, i);
        int indexOf2 = annotationFS.getCoveredText().indexOf(";", i);
        int max = (indexOf <= -1 || indexOf2 <= -1) ? Math.max(indexOf, indexOf2) : Math.min(indexOf, indexOf2);
        if (max > -1) {
            AnnotationFS createAnnotation = cas.createAnnotation(Types.getType(cas, Types.HEADLINECANDIDATE), annotationFS.getBegin(), annotationFS.getBegin() + i + ((max <= 0 || max >= 100) ? Math.min(61, annotationFS.getEnd() - (annotationFS.getBegin() + i)) : max + 1));
            if (createAnnotation.getCoveredText().matches("[A-Z]:")) {
                return;
            }
            cas.addFsToIndexes(createAnnotation);
        }
    }

    private static void addParagraphAnnotations(CAS cas) {
        String documentText = cas.getDocumentText();
        int i = 0;
        for (String str : documentText.trim().split("\\n\\n")) {
            String trim = str.trim();
            int indexOf = documentText.indexOf(trim, i);
            int length = indexOf + trim.length();
            i = length + 1;
            if (!trim.matches("<b>.*?Medizinische Universit.tsklinik.*?</b>")) {
                AnnotationFS createAnnotation = cas.createAnnotation(Types.getType(cas, Types.PARAGRAPH), indexOf, length);
                createAnnotation.setFeatureValueFromString(Types.getType(cas, Types.PARAGRAPH).getFeatureByBaseName("category"), SectionType.UNKNOWN.getName());
                cas.addFsToIndexes(createAnnotation);
            }
        }
    }

    private static void writeSectionsToDisk(Letter letter, String str) throws IOException {
        File file = new File(str);
        CAS cas = letter.getCas();
        Type type = Types.getType(cas, Types.SECTION);
        FSIterator it = cas.getAnnotationIndex(type).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            String featureValueAsString = annotationFS.getFeatureValueAsString((Feature) type.getFeatures().get(3));
            String coveredText = annotationFS.getCoveredText();
            File file2 = new File(file, featureValueAsString);
            File file3 = new File(file2, letter.getId() + ".txt");
            int i = 1;
            while (file3.exists()) {
                file3 = new File(file2, letter.getId() + "_" + i + ".txt");
                i++;
            }
            FileUtils.writeStringToFile(file3, coveredText);
        }
    }
}
