package de.uniwue.mk.kall.athen.projectExplorer.conversion;

import de.uniwue.mk.kall.athen.appDelegation.util.ApplicationUtil;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.util.FileUtils;

/* loaded from: input_file:de/uniwue/mk/kall/athen/projectExplorer/conversion/MedTxtToApplicationXMIFormatConverter.class */
public class MedTxtToApplicationXMIFormatConverter extends AFormatConverter {
    private static final String OCR_TYPE = "de.uniwue.kallimachos.ocr.narragonien.type.Segmentation";
    private Map<String, String> ocrTypeMap;

    public MedTxtToApplicationXMIFormatConverter() {
        super(EFormat.MEDTXT, EFormat.APPLICATION_XMI);
        this.ocrTypeMap = new HashMap();
        this.ocrTypeMap.put("#d", "Motto");
        this.ocrTypeMap.put("#i", "Holzschnitt");
        this.ocrTypeMap.put("#s", "HolzschnittBeischriften");
        this.ocrTypeMap.put("#h", "Kapitel�berschrift");
        this.ocrTypeMap.put("#m", "Marginalie");
        this.ocrTypeMap.put("#e", "Kolumnentitel");
        this.ocrTypeMap.put("#k", "Bogensignatur");
        this.ocrTypeMap.put("#*", "Initiale");
        this.ocrTypeMap.put("#u", "Umbruch");
        this.ocrTypeMap.put("#l", "<unknown>");
        this.ocrTypeMap.put("#r", "<unknown>");
    }

    @Override // de.uniwue.mk.kall.athen.projectExplorer.conversion.AFormatConverter
    public ADocument convertFormat(File file) {
        CAS createCAS = ApplicationUtil.createCAS();
        try {
            createCAS.setDocumentText(createOCRAnnotations(createCAS, FileUtils.file2String(file, "UTF-8")));
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new XMIDocument(createCAS, EFormat.APPLICATION_XMI);
    }

    private String createOCRAnnotations(CAS cas, String str) {
        Type type = cas.getTypeSystem().getType(OCR_TYPE);
        Feature featureByBaseName = type.getFeatureByBaseName("RegionType");
        int i = 0;
        for (String str2 : str.split("\n")) {
            if (str2.startsWith("#")) {
                for (String str3 : this.ocrTypeMap.keySet()) {
                    if (str2.startsWith(str3)) {
                        str2.replaceFirst(str3, str3.replaceAll(".", " "));
                        if (str3.equals("#*")) {
                            AnnotationFS createAnnotation = cas.createAnnotation(type, i + str3.length(), i + 1 + str3.length());
                            createAnnotation.setFeatureValueFromString(featureByBaseName, this.ocrTypeMap.get(str3));
                            cas.addFsToIndexes(createAnnotation);
                        } else {
                            int length = i + str2.length();
                            int i2 = length;
                            while (true) {
                                if (i2 <= i + str3.length()) {
                                    break;
                                }
                                if (!Character.isWhitespace(str.charAt(i2))) {
                                    length = i2 + 1;
                                    break;
                                }
                                i2--;
                            }
                            if (i + str3.length() < length && !str.substring(i + str3.length(), length).trim().isEmpty()) {
                                AnnotationFS createAnnotation2 = cas.createAnnotation(type, i + str3.length(), length);
                                createAnnotation2.setFeatureValueFromString(featureByBaseName, this.ocrTypeMap.get(str3));
                                cas.addFsToIndexes(createAnnotation2);
                            }
                        }
                    }
                }
            }
            i += str2.length() + 1;
        }
        return str;
    }
}
