package ai.grazie.nlp.tokenizer.spacy;

import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglish;
import ai.grazie.nlp.utils.RangesKt;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import kotlin.sequences.Sequence;
import kotlin.text.CharsKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

/* compiled from: SpacyTokenizer.kt */
@Metadata(mv = {1, 4, 3}, bv = {1, 0, 3}, k = 1, d1 = {"��F\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u000e\n��\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0005\u0018�� \u001e2\u00020\u0001:\u0002\u001e\u001fB%\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007¢\u0006\u0002\u0010\bJ\u0010\u0010\r\u001a\u00020\u000e2\u0006\u0010\u000f\u001a\u00020\u000eH\u0002J\u0016\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0015\u001a\u00020\u00162\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0017\u001a\u00020\u00162\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0018\u001a\u00020\u000e2\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0016\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001a2\u0006\u0010\u001c\u001a\u00020\u0014H\u0016J\u0016\u0010\u001d\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001a2\u0006\u0010\u0013\u001a\u00020\u0014H\u0002R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u0002\u001a\u00020\u0003¢\u0006\b\n��\u001a\u0004\b\t\u0010\nR\u0011\u0010\u0006\u001a\u00020\u0007¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\fR\u000e\u0010\u0005\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��¨\u0006 "}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer;", "Lai/grazie/nlp/tokenizer/Tokenizer;", "prefix", "Lkotlin/text/Regex;", "infix", "suffix", "specialCases", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "(Lkotlin/text/Regex;Lkotlin/text/Regex;Lkotlin/text/Regex;Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;)V", "getPrefix", "()Lkotlin/text/Regex;", "getSpecialCases", "()Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "attachTokens", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$TokenSplits;", "splits", "findInfix", "Lkotlin/sequences/Sequence;", "Lkotlin/text/MatchResult;", "tok", "", "findPrefix", "", "findSuffix", "splitAffixes", "tokenize", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "text", "tokenizeToken", "Companion", "TokenSplits", "nlp-tokenizer"})
/* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizer.class */
public final class SpacyTokenizer implements Tokenizer {

    @NotNull
    private final Regex prefix;
    private final Regex infix;
    private final Regex suffix;

    @NotNull
    private final SpacyTokenizerSpecialCases specialCases;

    @NotNull
    public static final Companion Companion = new Companion(null);

    /* compiled from: SpacyTokenizer.kt */
    @Metadata(mv = {1, 4, 3}, bv = {1, 0, 3}, k = 1, d1 = {"��\u0012\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002J\u0006\u0010\u0003\u001a\u00020\u0004¨\u0006\u0005"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$Companion;", "", "()V", "loadEnglish", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer;", "nlp-tokenizer"})
    /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$Companion.class */
    public static final class Companion {
        @NotNull
        public final SpacyTokenizer loadEnglish() {
            return new SpacyTokenizer(SpacyEnglish.Prefix.INSTANCE.getRegex(), SpacyEnglish.Infix.INSTANCE.getRegex(), SpacyEnglish.Suffix.INSTANCE.getRegex(), SpacyTokenizerSpecialCases.INSTANCE);
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* compiled from: SpacyTokenizer.kt */
    @Metadata(mv = {1, 4, 3}, bv = {1, 0, 3}, k = 1, d1 = {"��.\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\b\f\n\u0002\u0010 \n\u0002\u0018\u0002\n��\b\u0002\u0018��2\u00020\u0001B\u0005¢\u0006\u0002\u0010\u0002J\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00190\u0018R\u001a\u0010\u0003\u001a\u00020\u0004X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0003\u0010\u0005\"\u0004\b\u0006\u0010\u0007R!\u0010\b\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b¢\u0006\b\n��\u001a\u0004\b\f\u0010\rR!\u0010\u000e\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b¢\u0006\b\n��\u001a\u0004\b\u000f\u0010\rR\u001c\u0010\u0010\u001a\u0004\u0018\u00010\nX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0011\u0010\u0012\"\u0004\b\u0013\u0010\u0014R!\u0010\u0015\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b¢\u0006\b\n��\u001a\u0004\b\u0016\u0010\r¨\u0006\u001a"}, d2 = {"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$TokenSplits;", "", "()V", "isSpecial", "", "()Z", "setSpecial", "(Z)V", "prefixes", "Ljava/util/ArrayList;", "", "Lkotlin/collections/ArrayList;", "getPrefixes", "()Ljava/util/ArrayList;", "suffixes", "getSuffixes", "word", "getWord", "()Ljava/lang/String;", "setWord", "(Ljava/lang/String;)V", "wordTokens", "getWordTokens", "toList", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "nlp-tokenizer"})
    /* loaded from: input_file:ai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$TokenSplits.class */
    public static final class TokenSplits {

        @Nullable
        private String word;
        private boolean isSpecial;

        @NotNull
        private final ArrayList<String> prefixes = new ArrayList<>();

        @NotNull
        private final ArrayList<String> suffixes = new ArrayList<>();

        @NotNull
        private final ArrayList<String> wordTokens = new ArrayList<>();

        @NotNull
        public final ArrayList<String> getPrefixes() {
            return this.prefixes;
        }

        @NotNull
        public final ArrayList<String> getSuffixes() {
            return this.suffixes;
        }

        @Nullable
        public final String getWord() {
            return this.word;
        }

        public final void setWord(@Nullable String str) {
            this.word = str;
        }

        @NotNull
        public final ArrayList<String> getWordTokens() {
            return this.wordTokens;
        }

        public final boolean isSpecial() {
            return this.isSpecial;
        }

        public final void setSpecial(boolean z) {
            this.isSpecial = z;
        }

        @NotNull
        public final List<Tokenizer.Token> toList() {
            int i = 0;
            List<String> plus = CollectionsKt.plus(CollectionsKt.plus(this.prefixes, this.wordTokens), CollectionsKt.reversed(this.suffixes));
            ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(plus, 10));
            for (String str : plus) {
                int i2 = i;
                i += str.length();
                arrayList.add(new Tokenizer.Token(str, new IntRange(i2, i - 1)));
            }
            return arrayList;
        }
    }

    @Override // ai.grazie.nlp.tokenizer.Tokenizer
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "text");
        if (str.length() == 0) {
            return CollectionsKt.emptyList();
        }
        int i = 0;
        int i2 = 0;
        ArrayList arrayList = new ArrayList();
        boolean isWhitespace = CharsKt.isWhitespace(str.charAt(0));
        int length = str.length();
        for (int i3 = 0; i3 < length; i3++) {
            char charAt = str.charAt(i3);
            if (CharsKt.isWhitespace(charAt) != isWhitespace) {
                if (i2 < i) {
                    String substring = str.substring(i2, i);
                    Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                    ArrayList arrayList2 = arrayList;
                    List<Tokenizer.Token> list = tokenizeToken(substring);
                    ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list, 10));
                    for (Tokenizer.Token token : list) {
                        arrayList3.add(new Tokenizer.Token(token.getToken(), RangesKt.withOffset(token.getRange(), i2)));
                    }
                    CollectionsKt.addAll(arrayList2, arrayList3);
                }
                i2 = charAt == ' ' ? i + 1 : i;
                isWhitespace = !isWhitespace;
            }
            i++;
        }
        if (i2 < i) {
            String substring2 = str.substring(i2);
            Intrinsics.checkNotNullExpressionValue(substring2, "(this as java.lang.String).substring(startIndex)");
            ArrayList arrayList4 = arrayList;
            List<Tokenizer.Token> list2 = tokenizeToken(substring2);
            ArrayList arrayList5 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list2, 10));
            for (Tokenizer.Token token2 : list2) {
                arrayList5.add(new Tokenizer.Token(token2.getToken(), RangesKt.withOffset(token2.getRange(), i2)));
            }
            CollectionsKt.addAll(arrayList4, arrayList5);
        }
        return arrayList;
    }

    private final List<Tokenizer.Token> tokenizeToken(String str) {
        TokenSplits splitAffixes = splitAffixes(str);
        if (!splitAffixes.isSpecial()) {
            splitAffixes = attachTokens(splitAffixes);
        }
        return splitAffixes.toList();
    }

    private final TokenSplits splitAffixes(String str) {
        int i = 0;
        String str2 = str;
        TokenSplits tokenSplits = new TokenSplits();
        while (true) {
            if (!(!Intrinsics.areEqual(str2, "")) || str2.length() == i) {
                break;
            }
            if (this.specialCases.get(str2) == null) {
                i = str2.length();
                int findPrefix = findPrefix(str2);
                String str3 = (String) null;
                String str4 = (String) null;
                if (findPrefix != 0) {
                    String str5 = str2;
                    if (str5 != null) {
                        String substring = str5.substring(0, findPrefix);
                        Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                        str4 = substring;
                        String str6 = str2;
                        if (str6 != null) {
                            String substring2 = str6.substring(findPrefix);
                            Intrinsics.checkNotNullExpressionValue(substring2, "(this as java.lang.String).substring(startIndex)");
                            str3 = substring2;
                            if ((!Intrinsics.areEqual(str3, "")) && this.specialCases.get(str3) != null) {
                                str2 = str3;
                                tokenSplits.getPrefixes().add(str4);
                                break;
                            }
                        } else {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                    } else {
                        throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                    }
                }
                int findSuffix = findSuffix(str2);
                String str7 = (String) null;
                String str8 = (String) null;
                if (findSuffix != 0) {
                    String str9 = str2;
                    int length = str2.length() - findSuffix;
                    if (str9 != null) {
                        String substring3 = str9.substring(length);
                        Intrinsics.checkNotNullExpressionValue(substring3, "(this as java.lang.String).substring(startIndex)");
                        str8 = substring3;
                        String str10 = str2;
                        int length2 = str2.length() - findSuffix;
                        if (str10 != null) {
                            String substring4 = str10.substring(0, length2);
                            Intrinsics.checkNotNullExpressionValue(substring4, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                            str7 = substring4;
                            if ((!Intrinsics.areEqual(str7, "")) && this.specialCases.get(str7) != null) {
                                str2 = str7;
                                tokenSplits.getSuffixes().add(str8);
                                break;
                            }
                        } else {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                    } else {
                        throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                    }
                }
                if (findPrefix != 0 && findSuffix != 0 && findPrefix + findSuffix <= str2.length()) {
                    String str11 = str2;
                    int length3 = str2.length() - findSuffix;
                    if (str11 == null) {
                        throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                    }
                    String substring5 = str11.substring(findPrefix, length3);
                    Intrinsics.checkNotNullExpressionValue(substring5, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                    str2 = substring5;
                    ArrayList<String> prefixes = tokenSplits.getPrefixes();
                    String str12 = str4;
                    Intrinsics.checkNotNull(str12);
                    prefixes.add(str12);
                    ArrayList<String> suffixes = tokenSplits.getSuffixes();
                    String str13 = str8;
                    Intrinsics.checkNotNull(str13);
                    suffixes.add(str13);
                } else if (findPrefix != 0) {
                    String str14 = str3;
                    Intrinsics.checkNotNull(str14);
                    str2 = str14;
                    ArrayList<String> prefixes2 = tokenSplits.getPrefixes();
                    String str15 = str4;
                    Intrinsics.checkNotNull(str15);
                    prefixes2.add(str15);
                } else if (findSuffix != 0) {
                    String str16 = str7;
                    Intrinsics.checkNotNull(str16);
                    str2 = str16;
                    ArrayList<String> suffixes2 = tokenSplits.getSuffixes();
                    String str17 = str8;
                    Intrinsics.checkNotNull(str17);
                    suffixes2.add(str17);
                }
                if ((!Intrinsics.areEqual(str2, "")) && this.specialCases.get(str2) != null) {
                    break;
                }
            } else {
                List<SpacyTokenInfo> list = this.specialCases.get(str2);
                Intrinsics.checkNotNull(list);
                Iterator<T> it = list.iterator();
                while (it.hasNext()) {
                    tokenSplits.getWordTokens().add(((SpacyTokenInfo) it.next()).getOrth());
                }
                tokenSplits.setSpecial(true);
            }
        }
        tokenSplits.setWord(str2);
        return tokenSplits;
    }

    private final TokenSplits attachTokens(TokenSplits tokenSplits) {
        String word = tokenSplits.getWord();
        Intrinsics.checkNotNull(word);
        if (word.length() > 0) {
            SpacyTokenizerSpecialCases spacyTokenizerSpecialCases = this.specialCases;
            String word2 = tokenSplits.getWord();
            Intrinsics.checkNotNull(word2);
            if (spacyTokenizerSpecialCases.urlMatch(word2)) {
                ArrayList<String> wordTokens = tokenSplits.getWordTokens();
                String word3 = tokenSplits.getWord();
                Intrinsics.checkNotNull(word3);
                wordTokens.add(word3);
            } else {
                String word4 = tokenSplits.getWord();
                Intrinsics.checkNotNull(word4);
                int i = 0;
                for (MatchResult matchResult : findInfix(word4)) {
                    int first = matchResult.getRange().getFirst();
                    int last = matchResult.getRange().getLast() + 1;
                    if (first != 0) {
                        if (first != i) {
                            String word5 = tokenSplits.getWord();
                            Intrinsics.checkNotNull(word5);
                            if (word5 == null) {
                                throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                            }
                            String substring = word5.substring(i, first);
                            Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                            tokenSplits.getWordTokens().add(substring);
                        }
                        if (first != last) {
                            String word6 = tokenSplits.getWord();
                            Intrinsics.checkNotNull(word6);
                            if (word6 == null) {
                                throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                            }
                            String substring2 = word6.substring(first, last);
                            Intrinsics.checkNotNullExpressionValue(substring2, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                            tokenSplits.getWordTokens().add(substring2);
                        }
                        i = last;
                    }
                }
                String word7 = tokenSplits.getWord();
                Intrinsics.checkNotNull(word7);
                String word8 = tokenSplits.getWord();
                Intrinsics.checkNotNull(word8);
                int length = word8.length();
                if (word7 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String substring3 = word7.substring(i, length);
                Intrinsics.checkNotNullExpressionValue(substring3, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                if (!Intrinsics.areEqual(substring3, "")) {
                    tokenSplits.getWordTokens().add(substring3);
                }
            }
        }
        return tokenSplits;
    }

    private final int findPrefix(String str) {
        MatchResult find$default = Regex.find$default(this.prefix, str, 0, 2, (Object) null);
        if (find$default == null) {
            return 0;
        }
        return (((Number) CollectionsKt.last(find$default.getRange())).intValue() + 1) - ((Number) CollectionsKt.first(find$default.getRange())).intValue();
    }

    private final int findSuffix(String str) {
        MatchResult find$default = Regex.find$default(this.suffix, str, 0, 2, (Object) null);
        if (find$default == null) {
            return 0;
        }
        return (((Number) CollectionsKt.last(find$default.getRange())).intValue() + 1) - ((Number) CollectionsKt.first(find$default.getRange())).intValue();
    }

    private final Sequence<MatchResult> findInfix(String str) {
        return Regex.findAll$default(this.infix, str, 0, 2, (Object) null);
    }

    @NotNull
    public final Regex getPrefix() {
        return this.prefix;
    }

    @NotNull
    public final SpacyTokenizerSpecialCases getSpecialCases() {
        return this.specialCases;
    }

    public SpacyTokenizer(@NotNull Regex regex, @NotNull Regex regex2, @NotNull Regex regex3, @NotNull SpacyTokenizerSpecialCases spacyTokenizerSpecialCases) {
        Intrinsics.checkNotNullParameter(regex, "prefix");
        Intrinsics.checkNotNullParameter(regex2, "infix");
        Intrinsics.checkNotNullParameter(regex3, "suffix");
        Intrinsics.checkNotNullParameter(spacyTokenizerSpecialCases, "specialCases");
        this.prefix = regex;
        this.infix = regex2;
        this.suffix = regex3;
        this.specialCases = spacyTokenizerSpecialCases;
    }
}
