package de.sfb833.a4.RFTagger;

import de.sfb833.a4.RFTagger.lemmatizer.Lemmatizer;
import de.sfb833.a4.RFTagger.lemmatizer.LemmatizerFactory;
import de.sfb833.a4.RFTagger.tagcorrector.NoSuchTagCorrectorException;
import de.sfb833.a4.RFTagger.tagcorrector.TagCorrector;
import de.sfb833.a4.RFTagger.tagcorrector.TagCorrectorFactory;
import de.sfb833.a4.RFTagger.tagsetconv.ConverterFactory;
import de.sfb833.a4.RFTagger.tagsetconv.NoSuchTagsetException;
import de.sfb833.a4.RFTagger.tagsetconv.TagsetConverter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

/* loaded from: input_file:de/sfb833/a4/RFTagger/TaggerCliApp.class */
public class TaggerCliApp {
    public static final String VERSION = "0.2.1";

    public static void helpAndQuit(Options options) {
        HelpFormatter helpFormatter = new HelpFormatter();
        StringWriter stringWriter = new StringWriter();
        PrintWriter printWriter = new PrintWriter(stringWriter);
        helpFormatter.printUsage(printWriter, 50, "rftj", options);
        printWriter.flush();
        String str = String.valueOf(stringWriter.toString().trim().replaceFirst(".+rftj", "rftj")) + " parfile [infile] [[outfile]]";
        PrintWriter printWriter2 = new PrintWriter(System.err);
        helpFormatter.printHelp(printWriter2, 80, str, "", options, 0, 1, "");
        printWriter2.flush();
        System.err.println();
        System.err.println("rftj always uses UTF8 parfiles. Do convert lexicon files to UTF8, too!");
        System.err.println("The encoding used for other IO depends on your LOCALE setting.");
        System.err.println();
        System.err.println("rftj version 0.2.1");
        System.err.println();
        System.err.println("Native library: " + LibraryBinaryFile.getLibraryName() + LibraryBinaryFile.getLibraryFileSuffix());
        System.err.println();
        System.err.println("RFTagger Copyright: Helmut Schmid and Florian Laws,");
        System.err.println("RFTagger is freely available for education, research and other");
        System.err.println("non-commercial purposes.");
        System.err.println();
        System.err.println("Java Interface Copyright: Ramon Ziai and Niels Ott,");
        System.err.println("License CC BY-NC-SA");
        System.err.println();
        System.exit(1);
    }

    public static void errx(String str) {
        System.err.println(str);
        System.exit(1);
    }

    public static void errx(Throwable th) {
        errx(String.valueOf(th.getClass().getSimpleName()) + ": " + th.getMessage());
    }

    public static void tagAndPrint(RFTagger rFTagger, List<String> list, BufferedWriter bufferedWriter, TagCorrector tagCorrector, TagsetConverter tagsetConverter, Lemmatizer lemmatizer, boolean z) throws IOException {
        if (list.size() < 1) {
            bufferedWriter.flush();
            return;
        }
        LinkedList linkedList = new LinkedList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            linkedList.add(it.next().replaceAll("\\s.*", ""));
        }
        LinkedList linkedList2 = new LinkedList(rFTagger.getTags(linkedList));
        if (linkedList2.size() != list.size()) {
            throw new RuntimeException("Internal error: tagger gave fewer tags than words");
        }
        for (String str : list) {
            String str2 = (String) linkedList2.remove(0);
            if (tagCorrector != null) {
                str2 = tagCorrector.correctTag(str2);
            }
            bufferedWriter.write(String.valueOf(str) + "\t" + str2);
            if (tagsetConverter != null) {
                bufferedWriter.write("\t" + tagsetConverter.rftag2tag(str2));
            }
            if (lemmatizer != null) {
                String lemma = lemmatizer.getLemma(str, str2);
                if (lemma != null) {
                    bufferedWriter.write("\t" + lemma);
                } else if (z) {
                    bufferedWriter.write("\t" + str);
                } else {
                    bufferedWriter.write("\t<unknown>");
                }
            }
            bufferedWriter.newLine();
        }
        bufferedWriter.newLine();
        bufferedWriter.flush();
    }

    public static void tell(boolean z, String str) {
        if (z) {
            System.err.println(str);
        }
    }

    public static void main(String[] strArr) {
        Options options = new Options();
        options.addOption("u", false, "no normalization of probabilities");
        options.addOption("s", false, "consider the lower-case version of sentence-initial words");
        options.addOption("t", true, "beam threshold");
        options.addOption("b", true, "maximal beam size");
        options.addOption("v", false, "verbose output");
        options.addOption("c", true, "convert rftags to this tagset (e.g., \"stts\") and add tags to output");
        options.addOption("n", false, "print the token as lemma if lemmatizer is active but no lemma can be determined. Otherwise <unknown> will be the lemma in that case.");
        options.addOption("h", false, "this message");
        options.addOption("l", true, "lemmatize for this language (e.g., \"german\"), requires -x");
        options.addOption("o", true, "correct tags for this language (e.g., \"german\"), done by Perl wrappers in original RFTagger. Automatically activated by option -l");
        options.addOption("x", true, "specify lexicon file for lemmatization (encoding must be UTF8)");
        CommandLine commandLine = null;
        try {
            commandLine = new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Invalid command line arguments.");
            helpAndQuit(options);
        }
        if (commandLine.hasOption("h")) {
            helpAndQuit(options);
        }
        boolean z = !commandLine.hasOption("u");
        boolean hasOption = commandLine.hasOption("s");
        boolean hasOption2 = commandLine.hasOption("v");
        boolean hasOption3 = commandLine.hasOption("n");
        double doubleValue = RFTagger.BEAM_THRESHOLD_DEFAULT.doubleValue();
        if (commandLine.hasOption("t")) {
            try {
                doubleValue = Double.parseDouble(commandLine.getOptionValue("t"));
            } catch (NumberFormatException e2) {
                errx("Invalid value for option -t, must be a number.");
            }
        }
        int intValue = RFTagger.MAX_BEAM_SIZE_DEFAULT.intValue();
        if (commandLine.hasOption("b")) {
            try {
                intValue = Integer.parseInt(commandLine.getOptionValue("b"));
            } catch (NumberFormatException e3) {
                errx("Invalid value for option -b, must be an integer.");
            }
        }
        TagsetConverter tagsetConverter = null;
        if (commandLine.hasOption("c")) {
            try {
                tagsetConverter = ConverterFactory.getConverter(commandLine.getOptionValue("c"));
            } catch (NoSuchTagsetException e4) {
                errx(e4);
            }
        }
        List argList = commandLine.getArgList();
        String str = null;
        if (argList.size() < 1) {
            System.err.println("No parameter file specified.");
            helpAndQuit(options);
        } else {
            str = (String) argList.remove(0);
        }
        String str2 = null;
        if (argList.size() > 0) {
            str2 = (String) argList.remove(0);
        }
        String str3 = null;
        if (argList.size() > 0) {
            str3 = (String) argList.remove(0);
        }
        Model model = null;
        InputStream inputStream = null;
        OutputStream outputStream = null;
        try {
            model = new Model(new File(str));
            inputStream = str2 == null ? System.in : new FileInputStream(new File(str2));
            outputStream = str3 == null ? System.out : new FileOutputStream(new File(str3));
        } catch (IOException e5) {
            errx(e5);
        }
        tell(hasOption2, "Successfully loaded model from " + new File(str).getAbsolutePath());
        TagCorrector tagCorrector = null;
        if (commandLine.hasOption("o")) {
            String optionValue = commandLine.getOptionValue("o");
            try {
                tagCorrector = TagCorrectorFactory.getTagCorrector(optionValue);
                tell(hasOption2, "Tag corrector for language " + optionValue + " is active.");
            } catch (NoSuchTagCorrectorException e6) {
                errx(e6);
            }
        }
        Lemmatizer lemmatizer = null;
        if (commandLine.hasOption("l")) {
            if (!commandLine.hasOption("x")) {
                errx("Option -l needs option -x, too.");
            }
            try {
                tell(hasOption2, "Initializing lemmatizer, this may take a while...");
                File file = new File(commandLine.getOptionValue("x"));
                String optionValue2 = commandLine.getOptionValue("l");
                lemmatizer = LemmatizerFactory.getLemmatizer(optionValue2, file);
                tell(hasOption2, "Initialized lemmatizer for " + optionValue2 + " with lexicon from " + file.getAbsolutePath() + " using UTF8 encoding.");
                tagCorrector = TagCorrectorFactory.getTagCorrector(optionValue2);
                tell(hasOption2, "Using the lemmatizer automatically activates the tag corrector for " + optionValue2);
            } catch (NoSuchTagCorrectorException e7) {
                tell(false, "Cannot load tag corrector that fits the lemmatizer. This is a bug. Please report.");
                errx(e7);
            } catch (Exception e8) {
                errx(e8);
            }
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outputStream));
        RFTagger rFTagger = new RFTagger(model, z, doubleValue, hasOption, intValue);
        tell(hasOption2, "Starting to tag...");
        int i = 0;
        int i2 = 0;
        try {
            LinkedList linkedList = new LinkedList();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String trim = readLine.trim();
                if ("".equals(trim)) {
                    i2++;
                    tell(hasOption2, "SentNr " + i2);
                    tagAndPrint(rFTagger, linkedList, bufferedWriter, tagCorrector, tagsetConverter, lemmatizer, hasOption3);
                    linkedList.clear();
                } else {
                    linkedList.add(trim);
                    i++;
                }
            }
            i2++;
            tell(hasOption2, "SentNr " + i2);
            tagAndPrint(rFTagger, linkedList, bufferedWriter, tagCorrector, tagsetConverter, lemmatizer, hasOption3);
        } catch (IOException e9) {
            errx(e9);
        }
        tell(hasOption2, "Tagged " + i2 + " sentences, " + i + " tokens.");
    }
}
