当前位置: 首页>>代码示例>>Python>>正文


Python Tree.pos方法代码示例

本文整理汇总了Python中nltk.Tree.pos方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.pos方法的具体用法?Python Tree.pos怎么用?Python Tree.pos使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.Tree的用法示例。


在下文中一共展示了Tree.pos方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import pos [as 别名]
def extract(rows, target_postags, target_structures, target_word=None, mongodb=True, VERBOSE=True):



	print 'anchor pos tags:', color.render(', '.join(target_postags), 'lc')
	print 'structures:', color.render(', '.join([x[0]+':'+str(x[1]) for x in target_structures]), 'lc')
	print '='*60
	collect_cnt, skip_cnt = 0, 0

	for entry in rows:

		## extract rows
		sid, sent, pos, raw_tree, raw_dep = entry if not mongodb else (entry['id'], entry['sent'], entry['pos'], entry['tree'], entry['dep'])
		
		# read dependency and tree objs
		deps = dependency.read(raw_dep, return_type=dict)
		if not deps: continue
		tree = Tree(raw_tree)

		# collect certain dependency relations according to pre-specified pos tags
		## cdeps: [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7), (u'Yeah', u'JJ', 1), (u'well', u'NN', 2), (u'gotta', u'NN', 6), (u'bowl', u'NN', 11), (u'vinegar', u'NN', 13), (u'put', u'VBN', 9)]
		cdeps = extract_anchors(deps, tree, targets=target_postags)

		total_word_cnt += len(tree.pos())
		anchor_word_cnt += len(cdeps)

		##  ('is', 'VBZ', 8) in [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7) ...]
		for (word, pos, idx) in cdeps:

			## check if this is the target word if a target specified
			if target_word and word.lower() != target_word.lower():
				if VERBOSE:
					print color.render('(ancher[x]) '+word+'-'+str(idx)+' #'+pos, 'b')
				continue

			## extract dependency relations which match the target structures 
			rdeps = _filter_deps_by_rel(deps, anchor=(word, idx), targets=target_structures)

			if rdeps: ## got deps match the target structures

				if VERBOSE:
					print color.render('(anchor[v]) '+word+'-'+str(idx)+' #'+pos, 'g')

				T = [ _transform_to_tuple(dep) for dep in rdeps]
				for (rel, (l, li), (r, ri)) in T: print '  ',color.render(rel,'r'),color.render('( '+l+'-'+str(li)+', '+r+'-'+str(ri)+' )','y')

	print '='*60
开发者ID:maxis1718,项目名称:glance-modules,代码行数:49,代码来源:usage.extract.py

示例2: extract_and_save

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import pos [as 别名]
def extract_and_save(rows, target_postags, target_structures, det_db_cfg, target_word=None, mongodb=True):


	lmtzr = WordNetLemmatizer()


	print 'anchor pos tags:', color.render(', '.join(target_postags), 'lc')
	print 'structures:', color.render(', '.join([x[0]+':'+str(x[1]) for x in target_structures]), 'lc')
	print '='*60
	collect_cnt, skip_cnt = 0, 0	

	mc = pymongo.Connection(det_db_cfg['server_addr'])
	db = mc[det_db_cfg['db']]
	co = db[det_db_cfg['collection']]

	sent_cnt, total_word_cnt, anchor_word_cnt, anchor_word_structure_cnt = 0, 0, 0, 0


	for entry in rows:

		## extract rows
		sid, sent, pos, raw_tree, raw_dep = entry if not mongodb else (entry['id'], entry['sent'], entry['pos'], entry['tree'], entry['dep'])
		
		# read dependency and tree objs
		deps = dependency.read(raw_dep, return_type=dict)
		if not deps: continue
		tree = Tree(raw_tree)


		# collect certain dependency relations according to pre-specified pos tags
		## cdeps: [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7), (u'Yeah', u'JJ', 1), (u'well', u'NN', 2), (u'gotta', u'NN', 6), (u'bowl', u'NN', 11), (u'vinegar', u'NN', 13), (u'put', u'VBN', 9)]
		cdeps = extract_anchors(deps, tree, targets=target_postags)

		## for stat
		sent_cnt += 1
		total_word_cnt += len(tree.pos())
		anchor_word_cnt += len(cdeps)

		##  ('is', 'VBZ', 8) in [(u'is', u'VBZ', 8), (u"'ve", u'VBP', 5), (u'do', u'VBP', 7) ...]
		for (word, pos, idx) in cdeps:

			## check if this is the target word if a target specified
			if target_word and word.lower() != target_word.lower(): continue

			## extract dependency relations which match the target structures 
			rdeps = _filter_deps_by_rel(deps, anchor=(word, idx), targets=target_structures)

			if rdeps: ## got deps match the target structures

				print color.render('(anchor[v]) '+word+'-'+str(idx)+' #'+pos, 'g')

				T = [ _transform_to_tuple(dep) for dep in rdeps]
				for (rel, (l, li), (r, ri)) in T: print '  ',color.render(rel,'r'),color.render('( '+l+'-'+str(li)+', '+r+'-'+str(ri)+' )','y')

				lemma = lmtzr.lemmatize(word, _getWordNetPOS(pos))

				# generate mongo obj
				mongo_obj = {}
				mongo_obj['sid'] = sid 		# sentence id
				mongo_obj['word'] = word 	# anchor word
				mongo_obj['pos'] = pos 		# pos tag of word
				mongo_obj['idx'] = idx 		# word index 
				mongo_obj['deps'] = rdeps	# related deps
				mongo_obj['lemma'] = lemma	# word lemma
				
				co.insert(mongo_obj)

				anchor_word_structure_cnt += 1

	
	mc.close()

	print '='*60
	print 'write statistic log'
	with open('stat.log','w') as fw:
		fw.write('total sent'+'\t'+str(sent_cnt)+'\n')
		fw.write('total word'+'\t'+str(total_word_cnt)+'\n')
		fw.write('anchor word'+'\t'+str(anchor_word_cnt)+'\n')
		fw.write('anchor word with structures'+'\t'+str(anchor_word_structure_cnt)+'\n')
开发者ID:maxis1718,项目名称:glance-modules,代码行数:81,代码来源:usage.extract.py


注:本文中的nltk.Tree.pos方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。