本文整理汇总了Python中nltk.Tree.pos方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.pos方法的具体用法?Python Tree.pos怎么用?Python Tree.pos使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree
的用法示例。
在下文中一共展示了Tree.pos方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import pos [as 别名]
def extract(rows, target_postags, target_structures, target_word=None, mongodb=True, VERBOSE=True):
print 'anchor pos tags:', color.render(', '.join(target_postags), 'lc')
print 'structures:', color.render(', '.join([x[0]+':'+str(x[1]) for x in target_structures]), 'lc')
print '='*60
collect_cnt, skip_cnt = 0, 0
for entry in rows:
## extract rows
sid, sent, pos, raw_tree, raw_dep = entry if not mongodb else (entry['id'], entry['sent'], entry['pos'], entry['tree'], entry['dep'])
# read dependency and tree objs
deps = dependency.read(raw_dep, return_type=dict)
if not deps: continue
tree = Tree(raw_tree)
# collect certain dependency relations according to pre-specified pos tags
## cdeps: [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7), (u'Yeah', u'JJ', 1), (u'well', u'NN', 2), (u'gotta', u'NN', 6), (u'bowl', u'NN', 11), (u'vinegar', u'NN', 13), (u'put', u'VBN', 9)]
cdeps = extract_anchors(deps, tree, targets=target_postags)
total_word_cnt += len(tree.pos())
anchor_word_cnt += len(cdeps)
## ('is', 'VBZ', 8) in [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7) ...]
for (word, pos, idx) in cdeps:
## check if this is the target word if a target specified
if target_word and word.lower() != target_word.lower():
if VERBOSE:
print color.render('(ancher[x]) '+word+'-'+str(idx)+' #'+pos, 'b')
continue
## extract dependency relations which match the target structures
rdeps = _filter_deps_by_rel(deps, anchor=(word, idx), targets=target_structures)
if rdeps: ## got deps match the target structures
if VERBOSE:
print color.render('(anchor[v]) '+word+'-'+str(idx)+' #'+pos, 'g')
T = [ _transform_to_tuple(dep) for dep in rdeps]
for (rel, (l, li), (r, ri)) in T: print ' ',color.render(rel,'r'),color.render('( '+l+'-'+str(li)+', '+r+'-'+str(ri)+' )','y')
print '='*60
示例2: extract_and_save
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import pos [as 别名]
def extract_and_save(rows, target_postags, target_structures, det_db_cfg, target_word=None, mongodb=True):
lmtzr = WordNetLemmatizer()
print 'anchor pos tags:', color.render(', '.join(target_postags), 'lc')
print 'structures:', color.render(', '.join([x[0]+':'+str(x[1]) for x in target_structures]), 'lc')
print '='*60
collect_cnt, skip_cnt = 0, 0
mc = pymongo.Connection(det_db_cfg['server_addr'])
db = mc[det_db_cfg['db']]
co = db[det_db_cfg['collection']]
sent_cnt, total_word_cnt, anchor_word_cnt, anchor_word_structure_cnt = 0, 0, 0, 0
for entry in rows:
## extract rows
sid, sent, pos, raw_tree, raw_dep = entry if not mongodb else (entry['id'], entry['sent'], entry['pos'], entry['tree'], entry['dep'])
# read dependency and tree objs
deps = dependency.read(raw_dep, return_type=dict)
if not deps: continue
tree = Tree(raw_tree)
# collect certain dependency relations according to pre-specified pos tags
## cdeps: [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7), (u'Yeah', u'JJ', 1), (u'well', u'NN', 2), (u'gotta', u'NN', 6), (u'bowl', u'NN', 11), (u'vinegar', u'NN', 13), (u'put', u'VBN', 9)]
cdeps = extract_anchors(deps, tree, targets=target_postags)
## for stat
sent_cnt += 1
total_word_cnt += len(tree.pos())
anchor_word_cnt += len(cdeps)
## ('is', 'VBZ', 8) in [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7) ...]
for (word, pos, idx) in cdeps:
## check if this is the target word if a target specified
if target_word and word.lower() != target_word.lower(): continue
## extract dependency relations which match the target structures
rdeps = _filter_deps_by_rel(deps, anchor=(word, idx), targets=target_structures)
if rdeps: ## got deps match the target structures
print color.render('(anchor[v]) '+word+'-'+str(idx)+' #'+pos, 'g')
T = [ _transform_to_tuple(dep) for dep in rdeps]
for (rel, (l, li), (r, ri)) in T: print ' ',color.render(rel,'r'),color.render('( '+l+'-'+str(li)+', '+r+'-'+str(ri)+' )','y')
lemma = lmtzr.lemmatize(word, _getWordNetPOS(pos))
# generate mongo obj
mongo_obj = {}
mongo_obj['sid'] = sid # sentence id
mongo_obj['word'] = word # anchor word
mongo_obj['pos'] = pos # pos tag of word
mongo_obj['idx'] = idx # word index
mongo_obj['deps'] = rdeps # related deps
mongo_obj['lemma'] = lemma # word lemma
co.insert(mongo_obj)
anchor_word_structure_cnt += 1
mc.close()
print '='*60
print 'write statistic log'
with open('stat.log','w') as fw:
fw.write('total sent'+'\t'+str(sent_cnt)+'\n')
fw.write('total word'+'\t'+str(total_word_cnt)+'\n')
fw.write('anchor word'+'\t'+str(anchor_word_cnt)+'\n')
fw.write('anchor word with structures'+'\t'+str(anchor_word_structure_cnt)+'\n')