java ikanalyzer 动态扩展分词

发布时间：2024-12-07 10:58

主动学习新词汇：扩展词汇量，提升语言表达深度 #生活技巧# #自我提升技巧# #写作技能#

最新推荐文章于 2024-10-31 10:41:44 发布

life1024 于 2018-11-03 17:58:55 发布

若要使自定义分词全部生效,请使用细粒度分词

package com.thinkgem.jeesite.common.utils;

import java.io.StringReader;

import java.util.ArrayList;

import java.util.HashSet;

import java.util.List;

import java.util.Set;

import org.apache.lucene.analysis.Analyzer;

import org.wltea.analyzer.cfg.Configuration;

import org.wltea.analyzer.cfg.DefaultConfig;

import org.wltea.analyzer.core.IKSegmenter;

import org.wltea.analyzer.core.Lexeme;

import org.wltea.analyzer.dic.Dictionary;

public class LuceneHelper {

static Analyzer analyzer = new IKAnalyzer5x(true);

static Dictionary dictionary = null;

public static void init(){

Configuration cfg = DefaultConfig.getInstance();

cfg.setUseSmart(false);

Dictionary.initial(cfg);

dictionary = Dictionary.getSingleton();

}

public static void addCustomWords(Set<String>words){

dictionary.addWords(words);

}

public static void disableCustomWords(Set<String>words){

dictionary.disableWords(words);

}

public static List<String> queryWords(String query) {

List<String> list = new ArrayList<String>();

try {

StringReader input = new StringReader(query.trim());

IKSegmenter ikSeg = new IKSegmenter(input, false);

for (Lexeme lexeme = ikSeg.next(); lexeme != null; lexeme = ikSeg.next()){

list.add(lexeme.getLexemeText());

}

} catch (Exception e) {

e.printStackTrace();

}

return list;

}

public static void main(String[] args) {

init();

System.out.println(queryWords("养老证"));

Set<String> words = new HashSet<String>();

words.add("养老证");

addCustomWords(words);

disableCustomWords(words);

System.out.println(queryWords("养老证"));

}

网址：java ikanalyzer 动态扩展分词 https://www.yuejiaxmz.com/news/view/405663

上一篇：小米 Online Judge

下一篇：Python中sort和sort

java ikanalyzer 动态扩展分词

相关内容

随便看看

最新动态分享

热点动态分享

专题

推荐动态分享