package com.ibm.dltj;

import com.ibm.dltj.data.BreakIteratorManager;
import com.ibm.dltj.fst.MatchBuffer;
import com.ibm.dltj.gloss.TokenClassGloss;
import java.text.CharacterIterator;
import java.util.ArrayList;

/* loaded from: input_file:dlt.jar:com/ibm/dltj/SimpleWordTokenizer.class */
public class SimpleWordTokenizer {
    private final Dictionary rbbiDict;
    private final MatchBuffer break_matches;

    /* loaded from: input_file:dlt.jar:com/ibm/dltj/SimpleWordTokenizer$Token.class */
    public static class Token {
        private final int begin;
        private final int end;

        public Token(int i, int i2) {
            this.begin = i;
            this.end = i2;
        }

        public int getBegin() {
            return this.begin;
        }

        public int getEnd() {
            return this.end;
        }
    }

    /* loaded from: input_file:dlt.jar:com/ibm/dltj/SimpleWordTokenizer$TokenWithType.class */
    public static class TokenWithType extends Token {
        private final TokenClassGloss typeGloss;

        public TokenClassGloss getTypeGloss() {
            return this.typeGloss;
        }

        public TokenWithType(int i, int i2, TokenClassGloss tokenClassGloss) {
            super(i, i2);
            this.typeGloss = tokenClassGloss;
        }
    }

    static String getCopyright() {
        return "\n\n(C) Copyright IBM Corp. 2003, 2010.\n\n";
    }

    public SimpleWordTokenizer() throws DLTException {
        this.break_matches = new MatchBuffer();
        this.rbbiDict = BreakIteratorManager.constructBreakIterator(null);
    }

    public SimpleWordTokenizer(String str) throws DLTException {
        this.break_matches = new MatchBuffer();
        this.rbbiDict = BreakIteratorManager.constructBreakIterator(str);
    }

    public ArrayList<Token> tokenize(CharacterIterator characterIterator) throws DLTException {
        ArrayList<Token> arrayList = new ArrayList<>();
        while (characterIterator.getIndex() < characterIterator.getEndIndex()) {
            int index = characterIterator.getIndex();
            if (UniMorphoSyntax.evaluateBreakRules(characterIterator, this.break_matches, this.rbbiDict).is(TokenClassGloss.TAG_TOKEN)) {
                arrayList.add(new Token(index, characterIterator.getIndex()));
            }
        }
        return arrayList;
    }

    public ArrayList<TokenWithType> tokenizeWithType(CharacterIterator characterIterator) throws DLTException {
        ArrayList<TokenWithType> arrayList = new ArrayList<>();
        while (characterIterator.getIndex() < characterIterator.getEndIndex()) {
            int index = characterIterator.getIndex();
            TokenClassGloss evaluateBreakRules = UniMorphoSyntax.evaluateBreakRules(characterIterator, this.break_matches, this.rbbiDict);
            if (evaluateBreakRules.is(TokenClassGloss.TAG_TOKEN)) {
                arrayList.add(new TokenWithType(index, characterIterator.getIndex(), evaluateBreakRules));
            }
        }
        return arrayList;
    }
}
