package de.uniwue.dw.segmentation;

import de.uniwue.dw.uima.types.Types;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;

/* loaded from: input_file:libs/padawan-ie-algorithm-0.5.1-jar-with-dependencies.jar:de/uniwue/dw/segmentation/SimpleSegmentation.class */
public class SimpleSegmentation {
    public static void segmentize(CAS cas, String str) throws IOException {
        SegmentationPhaseOne.findAbbreviations(cas);
        SegmentationPhaseOne.findDates(cas);
        SegmentationPhaseOne.findNumbers(cas);
        SegmentationPhaseOne.findBlocks(cas);
        findSegmentStops(cas, str);
        SegmentationPhaseOne.filterWrongStops(cas);
        createSegments(cas).forEach(annotationFS -> {
            cas.addFsToIndexes(annotationFS);
        });
    }

    private static void findSegmentStops(CAS cas, String str) {
        Type type = Types.getType(cas, Types.SEGMENTSTOP);
        Matcher matcher = Pattern.compile(str).matcher(cas.getDocumentText());
        while (matcher.find()) {
            cas.addFsToIndexes(cas.createAnnotation(type, matcher.start(), matcher.end()));
        }
    }

    protected static List<AnnotationFS> createSegments(CAS cas) {
        LinkedList linkedList = new LinkedList();
        Type type = Types.getType(cas, Types.SEGMENT);
        int i = 0;
        FSIterator it = cas.getAnnotationIndex(Types.getType(cas, Types.SEGMENTSTOP)).iterator();
        while (it.hasNext()) {
            AnnotationFS annotationFS = (AnnotationFS) it.next();
            int begin = annotationFS.getBegin();
            if (i < begin) {
                AnnotationFS createAnnotation = cas.createAnnotation(type, i, begin);
                SegmentationPhaseOne.trimSegment(createAnnotation, type);
                if (!createAnnotation.getCoveredText().matches("(\\s*|;|:|,|\\.|!|\\?)")) {
                    linkedList.add(createAnnotation);
                }
                i = annotationFS.getEnd();
            }
        }
        if (i < cas.getDocumentText().length()) {
            AnnotationFS createAnnotation2 = cas.createAnnotation(type, i, cas.getDocumentText().length());
            SegmentationPhaseOne.trimSegment(createAnnotation2, type);
            if (!createAnnotation2.getCoveredText().matches("(\\s*|;|:|,|\\.|!|\\?)")) {
                linkedList.add(createAnnotation2);
            }
        }
        return linkedList;
    }
}
