package de.uniwue.mk.kall.formatconversion.teireader.reader;

import de.uniwue.mk.kall.formatconversion.teireader.struct.XMLAttribute;
import de.uniwue.mk.kall.formatconversion.teireader.struct.XMLDocument;
import de.uniwue.mk.kall.formatconversion.teireader.struct.XMLElement;
import de.uniwue.mk.kall.formatconversion.xmlFormat.detection.XMLFormatDetector;
import de.uniwue.mk.kall.formatconversion.xmlFormat.detection.XMLPostprocessingFactory;
import de.uniwue.mk.kallimachos.tcf.util.TypesystemConstants;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.MalformedParametersException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Stack;
import org.apache.commons.io.IOUtils;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypeDescription;
import org.apache.uima.resource.metadata.TypePriorities;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
import org.apache.uima.util.CasCreationUtils;
import org.springframework.util.AntPathMatcher;
import org.springframework.util.Log4jConfigurer;
import org.xml.sax.SAXException;

/* loaded from: input_file:libs/TEIReader-0.0.1-SNAPSHOT-jar-with-dependencies.jar:de/uniwue/mk/kall/formatconversion/teireader/reader/TEIReader.class */
public class TEIReader {
    private String encoding = "UTF-8";
    private TypeSystemDescription currentTS;

    public TEIReader() {
        this.currentTS = null;
        this.currentTS = TEIReaderUtil.createStandardTypesystem();
    }

    public TEIReader(boolean z) {
        this.currentTS = null;
        if (z) {
            this.currentTS = TEIReaderUtil.createStandardTypesystem();
        }
    }

    public void batchConvertDocuments(File file, File file2, boolean z) throws ResourceInitializationException, FileNotFoundException, SAXException {
        if (!file.isDirectory() || !file2.isDirectory()) {
            throw new IllegalArgumentException("Please provide 2 folder!!");
        }
        for (File file3 : file.listFiles()) {
            if (file3.getName().endsWith(Log4jConfigurer.XML_FILE_EXTENSION)) {
                System.out.println(file3);
                Pair<CAS, XMLDocument> readDocument = readDocument(file3, z);
                CAS first = readDocument.getFirst();
                postprocess(first, readDocument.getSecond());
                XmiCasSerializer.serialize(first, new FileOutputStream(new File(file2 + AntPathMatcher.DEFAULT_PATH_SEPARATOR + file3.getName() + ".xmi")));
            }
        }
        try {
            this.currentTS.toXML(new FileOutputStream(new File(file2.getAbsolutePath() + "/generatedTypesystem.xml")));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void postprocess(CAS cas, XMLDocument xMLDocument) {
        XMLPostprocessingFactory.createPostprocessor(XMLFormatDetector.detectFormat(cas)).postprocessDocument(cas, xMLDocument);
    }

    public Pair<CAS, XMLDocument> readDocument(File file, boolean z) throws ResourceInitializationException {
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        try {
            return readDocument(fileInputStream, z, file);
        } finally {
            try {
                fileInputStream.close();
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
    }

    public Pair<CAS, XMLDocument> readDocument(File file, CAS cas) throws ResourceInitializationException {
        FileInputStream fileInputStream = null;
        try {
            fileInputStream = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        try {
            return readDocument(fileInputStream, cas, file);
        } finally {
            try {
                fileInputStream.close();
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
    }

    public Pair<CAS, XMLDocument> readDocument(InputStream inputStream, CAS cas, File file) throws ResourceInitializationException {
        XMLDocument xMLDocument = null;
        try {
            xMLDocument = readTextAndAnnotations(inputStream);
            if (file != null) {
                xMLDocument.setOrigin(file);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new Pair<>(createCASFromXML(xMLDocument, null, cas), xMLDocument);
    }

    public Pair<CAS, XMLDocument> readDocument(InputStream inputStream, boolean z, File file) throws ResourceInitializationException {
        XMLDocument xMLDocument = null;
        try {
            xMLDocument = readTextAndAnnotations(inputStream);
            if (file != null) {
                xMLDocument.setOrigin(file);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (z) {
            this.currentTS = CasCreationUtils.mergeTypeSystems(Arrays.asList(inferTypesystem(xMLDocument), this.currentTS));
        }
        return new Pair<>(createCASFromXML(xMLDocument, this.currentTS, null), xMLDocument);
    }

    private TypeSystemDescription inferTypesystem(XMLDocument xMLDocument) {
        TypeSystemDescription_impl typeSystemDescription_impl = new TypeSystemDescription_impl();
        HashMap hashMap = new HashMap();
        for (XMLElement xMLElement : xMLDocument.getElements()) {
            if (hashMap.containsKey(xMLElement.getName())) {
                ((Set) hashMap.get(xMLElement.getName())).addAll(xMLElement.getAttributes());
            } else {
                hashMap.put(xMLElement.getName(), new HashSet(xMLElement.getAttributes()));
            }
        }
        for (String str : hashMap.keySet()) {
            Set<XMLAttribute> set = (Set) hashMap.get(str);
            TypeDescription addType = typeSystemDescription_impl.addType(TEiReaderConstants.TEI_TYPES_PREFIX + str, "", CAS.TYPE_NAME_ANNOTATION);
            for (XMLAttribute xMLAttribute : set) {
                if (xMLAttribute.getName() != null) {
                    addType.addFeature(validateFeatureName(xMLAttribute.getName()), "", xMLAttribute.getType().getUimaCorrespondingType());
                }
            }
        }
        return typeSystemDescription_impl;
    }

    private String validateFeatureName(String str) {
        return str.matches("[0-9a-zA-Z_]*") ? str : str.replaceAll(":", "_").replaceAll("\\-", "_");
    }

    private CAS createCASFromXML(XMLDocument xMLDocument, TypeSystemDescription typeSystemDescription, CAS cas) throws ResourceInitializationException {
        CAS createCas = cas == null ? CasCreationUtils.createCas(typeSystemDescription, (TypePriorities) null, (FsIndexDescription[]) null) : cas;
        createCas.setDocumentText(xMLDocument.getDocText());
        Type type = createCas.getTypeSystem().getType(TEiReaderConstants.DEFAULT_TYPESYSTEM_XML_TYPE);
        HashMap hashMap = new HashMap();
        for (XMLElement xMLElement : xMLDocument.getElements()) {
            AnnotationFS createAnnotation = createCas.createAnnotation(type, xMLElement.getBegin(), xMLElement.getEnd());
            setDefaultFeatures(createAnnotation, xMLElement);
            hashMap.put(xMLElement, createAnnotation);
            createCas.addFsToIndexes(createAnnotation);
            Type type2 = createCas.getTypeSystem().getType(TEiReaderConstants.TEI_TYPES_PREFIX + xMLElement.getName());
            if (type2 != null) {
                AnnotationFS createAnnotation2 = createCas.createAnnotation(type2, xMLElement.getBegin(), xMLElement.getEnd());
                for (XMLAttribute xMLAttribute : xMLElement.getAttributes()) {
                    createAnnotation2.setFeatureValueFromString(type2.getFeatureByBaseName(validateFeatureName(xMLAttribute.getName())), xMLAttribute.getValue());
                }
                createCas.addFsToIndexes(createAnnotation2);
            }
        }
        for (XMLElement xMLElement2 : xMLDocument.getElements()) {
            XMLElement parent = xMLElement2.getParent();
            if (parent != null) {
                AnnotationFS annotationFS = (AnnotationFS) hashMap.get(xMLElement2);
                AnnotationFS annotationFS2 = (AnnotationFS) hashMap.get(parent);
                annotationFS.setFeatureValue(type.getFeatureByBaseName(TypesystemConstants.STANFORD_PARSE_PARENT), annotationFS2);
                if (annotationFS2 != null && annotationFS != null) {
                    addChild(annotationFS2, annotationFS, type);
                }
            }
        }
        return createCas;
    }

    private void addChild(AnnotationFS annotationFS, AnnotationFS annotationFS2, Type type) {
        Feature featureByBaseName = type.getFeatureByBaseName("Children");
        ArrayFS arrayFS = (ArrayFS) annotationFS.getFeatureValue(featureByBaseName);
        ArrayFS createArrayFS = annotationFS2.getCAS().createArrayFS(arrayFS != null ? arrayFS.size() + 1 : 1);
        if (arrayFS != null) {
            for (int i = 0; i < arrayFS.size(); i++) {
                createArrayFS.set(i, arrayFS.get(i));
            }
        }
        createArrayFS.set(createArrayFS.size() - 1, annotationFS2);
        annotationFS.setFeatureValue(featureByBaseName, createArrayFS);
    }

    private void setDefaultFeatures(AnnotationFS annotationFS, XMLElement xMLElement) {
        Feature featureByBaseName = annotationFS.getType().getFeatureByBaseName(TEiReaderConstants.DEFAULT_TYPESYSTEM_XML_TAGNAME_FEATURE);
        Feature featureByBaseName2 = annotationFS.getType().getFeatureByBaseName(TEiReaderConstants.DEFAULT_TYPESYSTEM_XML_ATTRIBUTES_FEATURE);
        annotationFS.setFeatureValueFromString(featureByBaseName, xMLElement.getName());
        StringBuilder sb = new StringBuilder();
        for (XMLAttribute xMLAttribute : xMLElement.getAttributes()) {
            sb.append(xMLAttribute.getName() + "=" + xMLAttribute.getValue() + ExternalResourceFactory.PREFIX_SEPARATOR);
        }
        annotationFS.setFeatureValueFromString(featureByBaseName2, sb.toString());
    }

    private XMLDocument readTextAndAnnotations(InputStream inputStream) throws IOException {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        String iOUtils = IOUtils.toString(inputStream, this.encoding);
        boolean z = false;
        boolean z2 = false;
        StringBuilder sb2 = new StringBuilder();
        Stack stack = new Stack();
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < iOUtils.length(); i2++) {
            if (iOUtils.charAt(i2) == '<') {
                z = true;
                z2 = false;
                if (i2 + 1 < iOUtils.length() && iOUtils.charAt(i2 + 1) != '/') {
                    z2 = true;
                }
            }
            if (iOUtils.charAt(i2) == '>') {
                z = false;
                if (!z2) {
                    sb2.deleteCharAt(0);
                    XMLElement xMLElement = (XMLElement) stack.pop();
                    if (!xMLElement.getName().equals(sb2.toString())) {
                        throw new MalformedParametersException("XML document probably malformed, can not parse annotations!");
                    }
                    if (!stack.isEmpty()) {
                        xMLElement.setParent((XMLElement) stack.peek());
                    }
                    xMLElement.setEnd(i);
                    arrayList.add(xMLElement);
                } else if (sb2.toString().endsWith(AntPathMatcher.DEFAULT_PATH_SEPARATOR)) {
                    sb2.deleteCharAt(sb2.length() - 1);
                    XMLElement xMLElement2 = new XMLElement(sb2.toString(), i, i);
                    xMLElement2.setParent((XMLElement) stack.peek());
                    arrayList.add(xMLElement2);
                } else {
                    stack.push(new XMLElement(sb2.toString(), i, i));
                }
                sb2 = new StringBuilder();
            }
            if (z && iOUtils.charAt(i2) != '<') {
                sb2.append(iOUtils.charAt(i2));
            } else if (!z && iOUtils.charAt(i2) != '>') {
                sb.append(iOUtils.charAt(i2));
                i++;
            }
        }
        return new XMLDocument(arrayList, sb.toString());
    }
}
