本文整理汇总了Java中com.hankcs.hanlp.corpus.io.IOUtil.readLineList方法的典型用法代码示例。如果您正苦于以下问题:Java IOUtil.readLineList方法的具体用法?Java IOUtil.readLineList怎么用?Java IOUtil.readLineList使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.hankcs.hanlp.corpus.io.IOUtil
的用法示例。
在下文中一共展示了IOUtil.readLineList方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testMakeCell
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testMakeCell() throws Exception
{
String root = "D:\\JavaProjects\\SougouDownload\\data\\";
String[] pathArray = new String[]{"最详细的全国地名大全.txt"};
Set<String> wordSet = new TreeSet<String>();
for (String path : pathArray)
{
path = root + path;
for (String word : IOUtil.readLineList(path))
{
word = word.replaceAll("\\s", "");
if (!TextUtility.isAllChinese(word)) continue;
if (CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
wordSet.add(word);
}
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/全国地名大全.txt");
}
示例2: testMakeShanghaiCell
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testMakeShanghaiCell() throws Exception
{
String root = "D:\\JavaProjects\\SougouDownload\\data\\";
String[] pathArray = new String[]{"上海地名街道名.txt", "上海公交线路名", "上海公交站点.txt", "上海市道路名.txt", "上海市地铁站名.txt"};
Set<String> wordSet = new TreeSet<String>();
for (String path : pathArray)
{
path = root + path;
for (String word : IOUtil.readLineList(path))
{
word = word.replaceAll("\\s", "");
if (CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
wordSet.add(word);
}
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/上海地名.txt");
}
示例3: testCombineOuterDictionary
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testCombineOuterDictionary() throws Exception
{
String root = "D:\\JavaProjects\\SougouDownload\\data\\";
String[] pathArray = new String[]{"常用外国人名.txt", "外国人名", "外国姓名大全.txt", "外国诗人名.txt", "英语姓名词典.txt", "俄罗斯人名.txt"};
Set<String> wordSet = new TreeSet<String>();
for (String path : pathArray)
{
path = root + path;
for (String word : IOUtil.readLineList(path))
{
word = word.replaceAll("[a-z]", "");
if (CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
wordSet.add(word);
}
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/person/nrf.txt");
}
示例4: testSpiltToChar
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testSpiltToChar() throws Exception
{
String commonChar = "·-—阿埃艾爱安昂敖奥澳笆芭巴白拜班邦保堡鲍北贝本比毕彼别波玻博勃伯泊卜布才采仓查差柴彻川茨慈次达大戴代丹旦但当道德得登迪狄蒂帝丁东杜敦多额俄厄鄂恩尔伐法范菲芬费佛夫福弗甫噶盖干冈哥戈革葛格各根古瓜哈海罕翰汗汉豪合河赫亨侯呼胡华霍基吉及加贾坚简杰金京久居君喀卡凯坎康考柯科可克肯库奎拉喇莱来兰郎朗劳勒雷累楞黎理李里莉丽历利立力连廉良列烈林隆卢虏鲁路伦仑罗洛玛马买麦迈曼茅茂梅门蒙盟米蜜密敏明摩莫墨默姆木穆那娜纳乃奈南内尼年涅宁纽努诺欧帕潘畔庞培佩彭皮平泼普其契恰强乔切钦沁泉让热荣肉儒瑞若萨塞赛桑瑟森莎沙山善绍舍圣施诗石什史士守斯司丝苏素索塔泰坦汤唐陶特提汀图土吐托陀瓦万王旺威韦维魏温文翁沃乌吾武伍西锡希喜夏相香歇谢辛新牙雅亚彦尧叶依伊衣宜义因音英雍尤于约宰泽增詹珍治中仲朱诸卓孜祖佐伽娅尕腓滕济嘉津赖莲琳律略慕妮聂裴浦奇齐琴茹珊卫欣逊札哲智兹芙汶迦珀琪梵斐胥黛" +
"·-阿安奥巴比彼波布察茨大德得丁杜尔法夫伏甫盖格哈基加坚捷金卡科可克库拉莱兰勒雷里历利连列卢鲁罗洛马梅蒙米姆娜涅宁诺帕泼普奇齐乔切日萨色山申什斯索塔坦特托娃维文乌西希谢亚耶叶依伊以扎佐柴达登蒂戈果海赫华霍吉季津柯理琳玛曼穆纳尼契钦丘桑沙舍泰图瓦万雅卓兹" +
"-·—丁万丘东丝中丹丽乃久义乌乔买于亚亨京什仑仓代以仲伊伍伏伐伦伯伽但佐佛佩依侯俄保儒克兰其兹内冈凯切列利别力加努劳勃勒北华卓南博卜卡卢卫厄历及古可史叶司各合吉吐君吾呼哈哥哲唐喀善喇喜嘉噶因图土圣坎坚坦埃培基堡塔塞增墨士夏多大夫奇奈奎契奥妮姆威娃娅娜孜季宁守安宜宰密察尔尕尤尧尼居山川差巴布希帕帝干平年库庞康廉弗强当彦彭彻彼律得德恩恰慈慕戈戴才扎托拉拜捷提摩敏敖敦文斐斯新施日旦旺昂明普智曼朗木本札朱李杜来杰林果查柯柴根格桑梅梵森楞次欣欧歇武比毕汀汉汗汤汶沁沃沙河治泉泊法波泰泼泽洛津济浦海涅温滕潘澳烈热爱牙特狄王玛玻珀珊珍班理琪琳琴瑞瑟瓜瓦甫申畔略登白皮盖盟相石祖福科穆立笆简米素索累约纳纽绍维罕罗翁翰考耶聂肉肯胡胥腓舍良色艾芙芬芭苏若英茂范茅茨茹荣莉莎莫莱莲菲萨葛蒂蒙虏蜜衣裴西詹让诗诸诺谢豪贝费贾赖赛赫路辛达迈连迦迪逊道那邦郎鄂采里金钦锡门阿陀陶隆雅雍雷霍革韦音额香马魏鲁鲍麦黎默黛齐" +
"·—阿埃艾爱安昂敖奥澳笆芭巴白拜班邦保堡鲍北贝本比毕彼别波玻博勃伯泊卜布才采仓查差柴彻川茨慈次达大戴代丹旦但当道德得的登迪狄蒂帝丁东杜敦多额俄厄鄂恩尔伐法范菲芬费佛夫福弗甫噶盖干冈哥戈革葛格各根古瓜哈海罕翰汗汉豪合河赫亨侯呼胡华霍基吉及加贾坚简杰金京久居君喀卡凯坎康考柯科可克肯库奎拉喇莱来兰郎朗劳勒雷累楞黎理李里莉丽历利立力连廉良列烈林隆卢虏鲁路伦仑罗洛玛马买麦迈曼茅茂梅门蒙盟米蜜密敏明摩莫墨默姆木穆那娜纳乃奈南内尼年涅宁纽努诺欧帕潘畔庞培佩彭皮平泼普其契恰强乔切钦沁泉让热荣肉儒瑞若萨塞赛桑瑟森莎沙山善绍舍圣施诗石什史士守斯司丝苏素索塔泰坦汤唐陶特提汀图土吐托陀瓦万王旺威韦维魏温文翁沃乌吾武伍西锡希喜夏相香歇谢辛新牙雅亚彦尧叶依伊衣宜义因音英雍尤于约宰泽增詹珍治中仲朱诸卓孜祖佐伽娅尕腓滕济嘉津赖莲琳律略慕妮聂裴浦奇齐琴茹珊卫欣逊札哲智兹芙汶迦珀琪梵斐胥黛" +
"·阿安奥巴比彼波布察茨大德得丁杜尔法夫伏甫盖格哈基加坚捷金卡科可克库拉莱兰勒雷里历利连列卢鲁罗洛马梅蒙米姆娜涅宁诺帕泼普奇齐乔切日萨色山申什斯索塔坦特托娃维文乌西希谢亚耶叶依伊以扎佐柴达登蒂戈果海赫华霍吉季津柯理琳玛曼穆纳尼契钦丘桑沙舍泰图瓦万雅卓兹";
Set<String> wordSet = new TreeSet<String>();
LinkedList<String> wordList = IOUtil.readLineList("data/dictionary/person/nrf.txt");
wordList.add(commonChar);
for (String word : wordList)
{
word = word.replaceAll("\\s", "");
for (char c : word.toCharArray())
{
wordSet.add(String.valueOf(c));
}
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/person/音译用字.txt");
}
示例5: testMakeDictionary
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testMakeDictionary() throws Exception
{
Set<String> wordSet = new TreeSet<String>();
Pattern pattern = Pattern.compile("^[a-zA-Z]+ *(\\[.*?])? *([\\u4E00-\\u9FA5]+) ?[::。]");
int found = 0;
for (String line : IOUtil.readLineList("D:\\Doc\\语料库\\英语姓名词典.txt"))
{
Matcher matcher = pattern.matcher(line);
if (matcher.find())
{
wordSet.add(matcher.group(2));
++found;
}
}
System.out.println("一共找到" + found + "条");
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/person/英语姓名词典.txt");
}
示例6: testConvertUnicodeTable
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testConvertUnicodeTable() throws Exception
{
StringDictionary dictionary = new StringDictionary("=");
for (String line : IOUtil.readLineList("D:\\Doc\\语料库\\Uni2Pinyin.txt"))
{
if (line.startsWith("#")) continue;
String[] argArray = line.split("\\s");
if (argArray.length == 1) continue;
String py = argArray[1];
for (int i = 2; i < argArray.length; ++i)
{
py += ',';
py += argArray[i];
}
dictionary.add(String.valueOf((char)(Integer.parseInt(argArray[0], 16))), py);
}
dictionary.save("D:\\Doc\\语料库\\Hanzi2Pinyin.txt");
}
示例7: testSegment
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
/**
* 测试构建和匹配,使用《我的团长我的团》.txt作为测试数据,并且判断匹配是否正确
* @throws Exception
*/
public void testSegment() throws Exception
{
TreeMap<String, String> map = new TreeMap<String, String>();
IOUtil.LineIterator iterator = new IOUtil.LineIterator("data/dictionary/CoreNatureDictionary.txt");
while (iterator.hasNext())
{
String line = iterator.next().split("\\s")[0];
map.put(line, line);
}
Trie trie = new Trie();
trie.addAllKeyword(map.keySet());
AhoCorasickDoubleArrayTrie<String> act = new AhoCorasickDoubleArrayTrie<String>();
long timeMillis = System.currentTimeMillis();
act.build(map);
System.out.println("构建耗时:" + (System.currentTimeMillis() - timeMillis) + " ms");
LinkedList<String> lineList = IOUtil.readLineList("D:\\Doc\\语料库\\《我的团长我的团》.txt");
timeMillis = System.currentTimeMillis();
for (String sentence : lineList)
{
// System.out.println(sentence);
List<AhoCorasickDoubleArrayTrie<String>.Hit<String>> entryList = act.parseText(sentence);
for (AhoCorasickDoubleArrayTrie<String>.Hit<String> entry : entryList)
{
int end = entry.end;
int start = entry.begin;
// System.out.printf("[%d:%d]=%s\n", start, end, entry.value);
assertEquals(sentence.substring(start, end), entry.value);
}
}
System.out.printf("%d ms\n", System.currentTimeMillis() - timeMillis);
}
示例8: testMakeKaiFangDictionary
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testMakeKaiFangDictionary() throws Exception
{
// data/dictionary/tc/
LinkedList<String> lineList = IOUtil.readLineList("D:\\Doc\\语料库\\cidian_zhzh-kfcd-2013122.txt");
StringDictionary dictionaryKFTC = new StringDictionary();
for (String line : lineList)
{
String[] args = line.split("\\s");
// 愛面子 爱面子 ai4 mian4 zi5
List<Pinyin> pinyinList = new ArrayList<Pinyin>(args.length - 2);
for (int i = 2; i < args.length; ++i)
{
pinyinList.add(TonePinyinString2PinyinConverter.convertFromToneNumber(args[i]));
}
if (hasNull(pinyinList) || pinyinList.size() != args[1].length())
{
// System.out.println("忽略 " + line + " " + pinyinList);
continue;
}
// 检查是否实用
List<Pinyin> localPinyinList = PinyinDictionary.convertToPinyin(args[1]);
if (!isEqual(pinyinList, localPinyinList))
{
System.out.println("接受 " + args[1] + "=" + pinyinList + "!=" + localPinyinList);
dictionaryKFTC.add(args[1], convertList2String(pinyinList));
}
}
StringDictionary dictionaryLocal = new StringDictionary();
dictionaryLocal.load(HanLP.Config.PinyinDictionaryPath);
dictionaryLocal.combine(dictionaryKFTC);
dictionaryLocal.save(HanLP.Config.PinyinDictionaryPath);
}
示例9: testFixDiMing
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testFixDiMing() throws Exception
{
Set<String> wordSet = new TreeSet<String>();
for (String word : IOUtil.readLineList("data/dictionary/custom/全国地名大全.txt"))
{
if (!TextUtility.isAllChinese(word)) continue;
wordSet.add(word);
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/custom/全国地名大全.txt");
}
示例10: testCombine
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testCombine() throws Exception
{
String root = "D:\\JavaProjects\\SougouDownload\\data\\";
String[] pathArray = new String[]{"日本名人大合集.txt", "日剧电影动漫和日本明星.txt", "日本女优.txt", "日本AV女优(A片)EXTEND版.txt", "日本女优大全.txt"};
Set<String> wordSet = new TreeSet<String>();
for (String path : pathArray)
{
path = root + path;
for (String word : IOUtil.readLineList(path))
{
word = word.replaceAll("[a-z\r\n]", "");
if (CoreDictionary.contains(word) || CustomDictionary.contains(word)) continue;
wordSet.add(word);
}
}
TreeSet<String> firstNameSet = new TreeSet<String>();
firstNameSet.addAll(IOUtil.readLineList("data/dictionary/person/日本姓氏.txt"));
Iterator<String> iterator = wordSet.iterator();
while (iterator.hasNext())
{
String name = iterator.next();
if (name.length() > 6 || name.length() < 3 || (!firstNameSet.contains(name.substring(0, 1)) && !firstNameSet.contains(name.substring(0, 2)) && !firstNameSet.contains(name.substring(0, 3))))
{
iterator.remove();
}
}
IOUtil.saveCollectionToTxt(wordSet, "data/dictionary/person/日本人名.txt");
}
示例11: testImport
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testImport() throws Exception
{
TreeSet<String> set = new TreeSet<String>();
for (String name : IOUtil.readLineList("D:\\Doc\\语料库\\corpus-master\\日本姓氏.txt"))
{
name = HanLP.convertToSimplifiedChinese(Arrays.toString(name.toCharArray()));
name = name.replaceAll("[\\[\\], ]", "");
if (!TextUtility.isAllChinese(name)) continue;
set.add(name);
}
IOUtil.saveCollectionToTxt(set, "data/dictionary/person/日本姓氏.txt");
}
示例12: testHeadNRF
import com.hankcs.hanlp.corpus.io.IOUtil; //导入方法依赖的package包/类
public void testHeadNRF() throws Exception
{
DijkstraSegment segment = new DijkstraSegment();
segment.enableTranslatedNameRecognize(false);
for (String name : IOUtil.readLineList("data/dictionary/person/nrf.txt"))
{
List<Term> termList = segment.seg(name);
if (termList.get(0).nature != Nature.nrf)
{
System.out.println(name + " : " + termList);
}
}
}