本文整理汇总了Python中stanfordcorenlp.StanfordCoreNLP方法的典型用法代码示例。如果您正苦于以下问题:Python stanfordcorenlp.StanfordCoreNLP方法的具体用法?Python stanfordcorenlp.StanfordCoreNLP怎么用?Python stanfordcorenlp.StanfordCoreNLP使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类stanfordcorenlp
的用法示例。
在下文中一共展示了stanfordcorenlp.StanfordCoreNLP方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_simpq
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def load_simpq(data_dir):
LogInfo.logs('SimpQ initializing ... ')
qa_list = []
corenlp = StanfordCoreNLP(CORENLP_PATH)
for Tvt in ('train', 'valid', 'test'):
fp = '%s/annotated_fb_data_%s.txt' % (data_dir, Tvt)
with codecs.open(fp, 'r', 'utf-8') as br:
for line in br.readlines():
qa = {}
s, p, o, q = line.strip().split('\t')
s = _remove_simpq_header(s)
p = _remove_simpq_header(p)
o = _remove_simpq_header(o)
qa['utterance'] = q
qa['targetValue'] = (s, p, o) # different from other datasets
qa['tokens'] = corenlp.word_tokenize(qa['utterance'])
qa['parse'] = corenlp.dependency_parse(qa['utterance'])
qa_list.append(qa)
if len(qa_list) % 1000 == 0:
LogInfo.logs('%d scanned.', len(qa_list))
pickle_fp = '%s/simpQ.data.pkl' % data_dir
with open(pickle_fp, 'wb') as bw:
pickle.dump(qa_list, bw)
LogInfo.logs('%d SimpleQuestions loaded.' % len(qa_list))
return qa_list
示例2: load_reddit
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def load_reddit(data_dir, mode='train'):
LogInfo.logs('Reddit initializing ... ')
dg_list = []
corenlp = StanfordCoreNLP(CORENLP_PATH)
fp = '%s/%s_v3.txt' % (data_dir, mode)
with open(fp, 'r') as br:
for line in br:
dg_line = json.loads(line)
dialog = {'utterance': dg_line['post'].strip(),
'tokens': dg_line['post'].split(),
'parse': corenlp.dependency_parse(dg_line['post']),
'response': dg_line['response'].strip(),
'corr_responses': dg_line['corr_responses'],
'all_triples': dg_line['all_triples'],
'all_entities': dg_line['all_entities']
}
dg_list.append(dialog)
if len(dg_list) % 10000 == 0:
LogInfo.logs('%d scanned.', len(dg_list))
pickle_fp = '%s/Reddit.%s.pkl' % (data_dir, mode)
with open(pickle_fp, 'wb') as bw:
pickle.dump(dg_list, bw)
LogInfo.logs('%d Reddit saved in [%s].' % (len(dg_list), pickle_fp))
return dg_list
示例3: load_stanford_core_nlp
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def load_stanford_core_nlp(path):
from stanfordcorenlp import StanfordCoreNLP
"""
Load stanford core NLP toolkit object
args:
path: String
output:
Stanford core NLP objects
"""
zh_nlp = StanfordCoreNLP(path, lang='zh')
en_nlp = StanfordCoreNLP(path, lang='en')
return zh_nlp, en_nlp
示例4: test_args
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def test_args(self):
self.assertRaises(IOError, StanfordCoreNLP, '/abc')
self.assertRaises(ValueError, StanfordCoreNLP, r'G:/JavaLibraries/stanford-corenlp-full-2016-10-31/',
lang='abc')
self.assertRaises(ValueError, StanfordCoreNLP, r'G:/JavaLibraries/stanford-corenlp-full-2016-10-31/',
memory='4m')
示例5: __init__
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def __init__(self, url_or_path, port = 9000):
"""Initialize stanford core nlp tokenier.
Args:
url_or_path: Url string of path string of Stanford CoreNLP library.
Provide url string if you already stand up Stanford CoreNLP server.
If not, provide path to directory of library i.e. JavaLibraries/stanford-corenlp-full-2017-06-09/.
When you provide path of librart, Stanford CoreNLP server will be up independent of python process.
"""
self.tokenizer = StanfordCoreNLP(url_or_path, port = port)
示例6: __init__
# 需要导入模块: import stanfordcorenlp [as 别名]
# 或者: from stanfordcorenlp import StanfordCoreNLP [as 别名]
def __init__(self, params):
"""
A simple NLP helper class.
Args:
params(dict): A dict containing some parameters.
"""
self.params = params
self.corenlp = StanfordCoreNLP(self.params['corenlp_path'], quiet=False)
# Pre-fetching the required models.
props = {'annotators': 'coref', 'pipelineLanguage': 'en', 'ner.useSUTime': False}
self.corenlp.annotate('', properties=props)