package weka.core.tokenizers;

import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Vector;
import weka.core.Option;
import weka.core.Utils;

/* loaded from: input_file:weka/core/tokenizers/NGramTokenizer.class */
public class NGramTokenizer extends CharacterDelimitedTokenizer {
    private static final long serialVersionUID = -2181896254171647219L;
    protected int m_NMax = 3;
    protected int m_NMin = 1;
    protected int m_N;
    protected int m_MaxPosition;
    protected int m_CurrentPosition;
    protected String[] m_SplitString;

    @Override // weka.core.tokenizers.Tokenizer
    public String globalInfo() {
        return "Splits a string into an n-gram with min and max grams.";
    }

    @Override // weka.core.tokenizers.CharacterDelimitedTokenizer, weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public Enumeration listOptions() {
        Vector vector = new Vector();
        Enumeration listOptions = super.listOptions();
        while (listOptions.hasMoreElements()) {
            vector.addElement(listOptions.nextElement());
        }
        vector.addElement(new Option("\tThe max size of the Ngram (default = 3).", "max", 1, "-max <int>"));
        vector.addElement(new Option("\tThe min size of the Ngram (default = 1).", "min", 1, "-min <int>"));
        return vector.elements();
    }

    @Override // weka.core.tokenizers.CharacterDelimitedTokenizer, weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        for (String str : super.getOptions()) {
            vector.add(str);
        }
        vector.add("-max");
        vector.add("" + getNGramMaxSize());
        vector.add("-min");
        vector.add("" + getNGramMinSize());
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    @Override // weka.core.tokenizers.CharacterDelimitedTokenizer, weka.core.tokenizers.Tokenizer, weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        super.setOptions(strArr);
        String option = Utils.getOption("max", strArr);
        if (option.length() != 0) {
            setNGramMaxSize(Integer.parseInt(option));
        } else {
            setNGramMaxSize(3);
        }
        String option2 = Utils.getOption("min", strArr);
        if (option2.length() != 0) {
            setNGramMinSize(Integer.parseInt(option2));
        } else {
            setNGramMinSize(1);
        }
    }

    public int getNGramMaxSize() {
        return this.m_NMax;
    }

    public void setNGramMaxSize(int i) {
        if (i < 1) {
            this.m_NMax = 1;
        } else {
            this.m_NMax = i;
        }
    }

    public String NGramMaxSizeTipText() {
        return "The max N of the NGram.";
    }

    public void setNGramMinSize(int i) {
        if (i < 1) {
            this.m_NMin = 1;
        } else {
            this.m_NMin = i;
        }
    }

    public int getNGramMinSize() {
        return this.m_NMin;
    }

    public String NGramMinSizeTipText() {
        return "The min N of the NGram.";
    }

    @Override // weka.core.tokenizers.Tokenizer, java.util.Enumeration
    public boolean hasMoreElements() {
        return this.m_CurrentPosition < this.m_MaxPosition && (this.m_N - 1) + this.m_CurrentPosition < this.m_MaxPosition && this.m_N >= this.m_NMin;
    }

    @Override // weka.core.tokenizers.Tokenizer, java.util.Enumeration
    public Object nextElement() {
        String str = "";
        for (int i = 0; i < this.m_N && i + this.m_CurrentPosition < this.m_MaxPosition; i++) {
            str = str + " " + this.m_SplitString[this.m_CurrentPosition + i];
        }
        this.m_CurrentPosition++;
        if ((this.m_CurrentPosition + this.m_N) - 1 == this.m_MaxPosition) {
            this.m_CurrentPosition = 0;
            this.m_N--;
        }
        return str.trim();
    }

    protected void filterOutEmptyStrings() {
        LinkedList linkedList = new LinkedList();
        for (int i = 0; i < this.m_SplitString.length; i++) {
            if (!this.m_SplitString[i].equals("")) {
                linkedList.add(this.m_SplitString[i]);
            }
        }
        String[] strArr = new String[linkedList.size()];
        for (int i2 = 0; i2 < linkedList.size(); i2++) {
            strArr[i2] = (String) linkedList.get(i2);
        }
        this.m_SplitString = strArr;
    }

    @Override // weka.core.tokenizers.Tokenizer
    public void tokenize(String str) {
        this.m_N = this.m_NMax;
        this.m_SplitString = str.split("[" + getDelimiters() + "]");
        filterOutEmptyStrings();
        this.m_CurrentPosition = 0;
        this.m_MaxPosition = this.m_SplitString.length;
    }

    public static void main(String[] strArr) {
        runTokenizer(new NGramTokenizer(), strArr);
    }
}
