/*
 * Decompiled with CFR 0.152.
 */
package org.bitbucket.eunjeon.mecab_ko_lucene_analyzer;

import java.io.IOException;
import java.util.Queue;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.Pos;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.PosAppender;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.TokenGenerator;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.TokenizerOption;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.tokenattributes.PartOfSpeechAttribute;
import org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.tokenattributes.SemanticClassAttribute;
import org.bitbucket.eunjeon.mecab_ko_mecab_loader.MeCabLoader;
import org.chasen.mecab.Lattice;
import org.chasen.mecab.Model;
import org.chasen.mecab.Tagger;

public final class MeCabKoTokenizer
extends Tokenizer {
    private CharTermAttribute charTermAtt;
    private PositionIncrementAttribute posIncrAtt;
    private PositionLengthAttribute posLenAtt;
    private OffsetAttribute offsetAtt;
    private TypeAttribute typeAtt;
    private PartOfSpeechAttribute posAtt;
    private SemanticClassAttribute semanticClassAtt;
    private String document;
    private TokenizerOption option;
    private Model model;
    private Lattice lattice;
    private Tagger tagger;
    private PosAppender posAppender;
    private TokenGenerator generator;
    private Queue<Pos> tokensQueue;

    public MeCabKoTokenizer(TokenizerOption option, PosAppender appender) {
        this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, option, appender);
    }

    public MeCabKoTokenizer(AttributeFactory factory, TokenizerOption option, PosAppender appender) {
        super(factory);
        this.posAppender = appender;
        this.option = option;
        this.setMeCab();
        this.setAttributes();
    }

    private void setMeCab() {
        this.model = MeCabLoader.getModel((String)this.option.mecabArgs);
        this.lattice = this.model.createLattice();
        this.tagger = this.model.createTagger();
    }

    private void setAttributes() {
        this.charTermAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
        this.posIncrAtt = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);
        this.posLenAtt = (PositionLengthAttribute)this.addAttribute(PositionLengthAttribute.class);
        this.offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
        this.typeAtt = (TypeAttribute)this.addAttribute(TypeAttribute.class);
        this.posAtt = (PartOfSpeechAttribute)this.addAttribute(PartOfSpeechAttribute.class);
        this.semanticClassAtt = (SemanticClassAttribute)this.addAttribute(SemanticClassAttribute.class);
    }

    public boolean incrementToken() throws IOException {
        this.clearAttributes();
        if (this.isBegin()) {
            this.document = this.getDocument();
            this.createTokenGenerator();
        }
        if (this.tokensQueue == null || this.tokensQueue.isEmpty()) {
            this.tokensQueue = this.generator.getNextEojeolTokens();
            if (this.tokensQueue == null) {
                return false;
            }
        }
        Pos token = this.tokensQueue.poll();
        this.setAttributes(token);
        return true;
    }

    private boolean isBegin() {
        return this.generator == null;
    }

    private void createTokenGenerator() {
        this.lattice.set_sentence(this.document);
        this.tagger.parse(this.lattice);
        this.generator = new TokenGenerator(this.posAppender, this.option.compoundNounMinLength, this.lattice.bos_node());
    }

    private void setAttributes(Pos token) {
        this.posIncrAtt.setPositionIncrement(token.getPositionIncr());
        this.posLenAtt.setPositionLength(token.getPositionLength());
        this.offsetAtt.setOffset(this.correctOffset(token.getStartOffset()), this.correctOffset(token.getEndOffset()));
        String term = token.getTokenString();
        this.charTermAtt.copyBuffer(term.toCharArray(), 0, term.length());
        this.typeAtt.setType(token.getPosId().toString());
        this.posAtt.setPartOfSpeech(token.getMophemes());
        this.semanticClassAtt.setSemanticClass(token.getSemanticClass());
    }

    public final void end() throws IOException {
        super.end();
        this.offsetAtt.setOffset(this.correctOffset(this.document.length()), this.correctOffset(this.document.length()));
        this.document = null;
        this.lattice.clear();
    }

    public final void reset() throws IOException {
        super.reset();
        this.generator = null;
        this.tokensQueue = null;
    }

    private String getDocument() throws IOException {
        int len;
        StringBuilder document = new StringBuilder();
        char[] tmp = new char[1024];
        while ((len = this.input.read(tmp)) != -1) {
            document.append(new String(tmp, 0, len));
        }
        return document.toString().toLowerCase();
    }
}

