package org.languagetool.chunking;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.tools.Tools;

/* loaded from: input_file:org/languagetool/chunking/EnglishChunker.class */
public class EnglishChunker implements Chunker {
    private static final String TOKENIZER_MODEL = "/en-token.bin";
    private static final String POS_TAGGER_MODEL = "/en-pos-maxent.bin";
    private static final String CHUNKER_MODEL = "/en-chunker.bin";
    private static volatile TokenizerModel tokenModel;
    private static volatile POSModel posModel;
    private static volatile ChunkerModel chunkerModel;
    private final EnglishChunkFilter chunkFilter;

    public EnglishChunker() {
        try {
            if (tokenModel == null) {
                tokenModel = new TokenizerModel(Tools.getStream(TOKENIZER_MODEL));
            }
            if (posModel == null) {
                posModel = new POSModel(Tools.getStream(POS_TAGGER_MODEL));
            }
            if (chunkerModel == null) {
                chunkerModel = new ChunkerModel(Tools.getStream(CHUNKER_MODEL));
            }
            this.chunkFilter = new EnglishChunkFilter();
        } catch (IOException e) {
            throw new RuntimeException("Could not initialize English chunker", e);
        }
    }

    @Override // org.languagetool.chunking.Chunker
    public void addChunkTags(List<AnalyzedTokenReadings> list) {
        assignChunksToReadings(this.chunkFilter.filter(getChunkTagsForReadings(list)));
    }

    private List<ChunkTaggedToken> getChunkTagsForReadings(List<AnalyzedTokenReadings> list) {
        String[] strArr = (String[]) cleanZeroWidthWhitespaces(tokenize(getSentence(list))).toArray(new String[0]);
        String[] posTag = posTag(strArr);
        String[] chunk = chunk(strArr, posTag);
        if (strArr.length == posTag.length && strArr.length == chunk.length) {
            return getTokensWithTokenReadings(list, strArr, chunk);
        }
        throw new RuntimeException("Length of results must be the same: " + strArr.length + ", " + posTag.length + ", " + chunk.length);
    }

    @NotNull
    private List<String> cleanZeroWidthWhitespaces(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        for (String str : strArr) {
            for (String str2 : str.split("\ufeff")) {
                if (str2.length() == 0) {
                    arrayList.add("");
                } else {
                    arrayList.add(str);
                }
            }
        }
        return arrayList;
    }

    String[] tokenize(String str) {
        return new TokenizerME(tokenModel).tokenize(str.replace((char) 8217, '\''));
    }

    private String[] posTag(String[] strArr) {
        return new POSTaggerME(posModel).tag(strArr);
    }

    private String[] chunk(String[] strArr, String[] strArr2) {
        return new ChunkerME(chunkerModel).chunk(strArr, strArr2);
    }

    private List<ChunkTaggedToken> getTokensWithTokenReadings(List<AnalyzedTokenReadings> list, String[] strArr, String[] strArr2) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        int i2 = 0;
        for (String str : strArr2) {
            int i3 = i2;
            int length = i3 + strArr[i].length();
            arrayList.add(new ChunkTaggedToken(strArr[i], Collections.singletonList(new ChunkTag(str)), getAnalyzedTokenReadingsFor(i3, length, list)));
            i2 = length;
            i++;
        }
        return arrayList;
    }

    private void assignChunksToReadings(List<ChunkTaggedToken> list) {
        for (ChunkTaggedToken chunkTaggedToken : list) {
            AnalyzedTokenReadings readings = chunkTaggedToken.getReadings();
            if (readings != null) {
                readings.setChunkTags(chunkTaggedToken.getChunkTags());
            }
        }
    }

    private String getSentence(List<AnalyzedTokenReadings> list) {
        StringBuilder sb = new StringBuilder();
        Iterator<AnalyzedTokenReadings> it = list.iterator();
        while (it.hasNext()) {
            sb.append(it.next().getToken());
        }
        return sb.toString();
    }

    @Nullable
    private AnalyzedTokenReadings getAnalyzedTokenReadingsFor(int i, int i2, List<AnalyzedTokenReadings> list) {
        int i3 = 0;
        for (AnalyzedTokenReadings analyzedTokenReadings : list) {
            String token = analyzedTokenReadings.getToken();
            if (!token.trim().isEmpty() && (token.length() != 1 || !Character.isSpaceChar(token.charAt(0)))) {
                int i4 = i3;
                int length = i3 + token.length();
                if (i4 == i && length == i2) {
                    return analyzedTokenReadings;
                }
                i3 = length;
            }
        }
        return null;
    }
}
