当前位置: 首页>>代码示例>>Python>>正文


Python POSTagger.tag_sents方法代码示例

本文整理汇总了Python中nltk.tag.stanford.POSTagger.tag_sents方法的典型用法代码示例。如果您正苦于以下问题:Python POSTagger.tag_sents方法的具体用法?Python POSTagger.tag_sents怎么用?Python POSTagger.tag_sents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tag.stanford.POSTagger的用法示例。


在下文中一共展示了POSTagger.tag_sents方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: pos_tag

# 需要导入模块: from nltk.tag.stanford import POSTagger [as 别名]
# 或者: from nltk.tag.stanford.POSTagger import tag_sents [as 别名]
def pos_tag(texts):

    from nltk.tag.stanford import POSTagger
    
    jar = config.mainpath+"analyze/SPOS/stanford-postagger.jar"
    if language == "german":
        model = config.mainpath+"analyze/SPOS/models/german-fast.tagger"
    if language == "english":
        model = config.mainpath+"analyze/SPOS/models/english-bidirectional-distsim.tagger"
    tagger = POSTagger(model, path_to_jar = jar, encoding="UTF-8")

    return tagger.tag_sents(texts)
开发者ID:chreman,项目名称:output_BA,代码行数:14,代码来源:parallel_preprocessing.py

示例2: add_POS

# 需要导入模块: from nltk.tag.stanford import POSTagger [as 别名]
# 或者: from nltk.tag.stanford.POSTagger import tag_sents [as 别名]
 def add_POS(self,row_file,target):
     '''
     row_str = '';
     f = open(row_file,'rb');
     for row in f:
         row_str+=row;
     soup = BeautifulSoup(row_str);
     self.soup = soup;
     sentences = soup.find_all('sentence');
     all_token = list();
     for block in sentences:
         text = block.text.strip();
         text_token = self.tf.stanford_tokenize(text);
         all_token.append(text_token);
     '''
     all_token = self.get_token(target);
     stanford_tagger = \
     POSTagger('../stanford-postagger-full-2015-01-30/models/english-bidirectional-distsim.tagger','../stanford-postagger-full-2015-01-30/stanford-postagger.jar');
     tag_list = list();
     for row in all_token:
         temp_list = list();
         for word in row:
             if len(word)>1 and re.match(r'^[A-Z]+',word):
                 temp_list.append(word.lower());
             else:
                 temp_list.append(word);
         tag_list.append(temp_list);1
     #end for
     tagged_result = stanford_tagger.tag_sents(tag_list);
     '''
     for row in tagged_result:
         index_list = list();
         for num,item in enumerate(row):
             if not re.match(r'.*[\w\d]+',item[0]):
                 index_list.append(num);
         for i in index_list:
             row[i]=(row[i][0],row[i][0]);
     #end for
     '''
     w = open('pos_%s'%target,'wb');
     for num1,row in enumerate(tagged_result):
         for num2,item in enumerate(row):
             w.write(all_token[num1][num2]+' '+item[1]+'\n');
         w.write('\n');
     #print tagged_result;
     return;
开发者ID:victormm88,项目名称:SemEval,代码行数:48,代码来源:Feature_Tool.py

示例3: generate_pos_set

# 需要导入模块: from nltk.tag.stanford import POSTagger [as 别名]
# 或者: from nltk.tag.stanford.POSTagger import tag_sents [as 别名]
 def generate_pos_set(self):
     print '正在构建正性集词典....';
     pos_dict = dict();
     pos_set=set();
     sentences = list();
     for row in self.train_label:
         for key in row:
             if ' ' in key:
                 sentences.append(self.tk.word_tokenize(key));
             else:
                 pos_dict[key] = pos_dict.setdefault(key,0) + 1;
                 #pos_set.add(key);
     #end for
     st=POSTagger('../stanford-postagger-full-2015-01-30/models/english-bidirectional-distsim.tagger'\
                     ,'../stanford-postagger-full-2015-01-30/stanford-postagger.jar');
     result = st.tag_sents(sentences);
     for row in result:
         for item in row:
             if item[1].startswith('NN'):
                 pos_dict[item[0]] = pos_dict.setdefault(item[0],0) + 1;
                 #pos_set.add(item[0]);
     #end for
     neg_dict = dict();
     for num,row in enumerate(self.tagged_train_data):
         for item in row :
             if item[1].startswith('NN') and item[0] not in self.train_word_label[num]:
                 neg_dict[item[0]] = neg_dict.setdefault(item[0],0) + 1;
     for key in pos_dict.keys():
         if pos_dict[key] > 1:
             if neg_dict.has_key(key):
                 if neg_dict[key]/pos_dict[key] < 2:
                     pos_set.add(key);
             else:
                 pos_set.add(key);
     self.pos_set=pos_set;
     print '完成!';
     return;
开发者ID:victormm88,项目名称:SemEval,代码行数:39,代码来源:SemEval.py

示例4: __init__

# 需要导入模块: from nltk.tag.stanford import POSTagger [as 别名]
# 或者: from nltk.tag.stanford.POSTagger import tag_sents [as 别名]
class POSTagSelector:

	def __init__(self, pos_model, stanford_tagger, java_path):
		"""
		Creates a POSTagSelector instance.
	
		@param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
		The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
		@param stanford_tagger: Path to the "stanford-postagger.jar" file.
		The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
		@param java_path: Path to the system's "java" executable.
		Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
		"""
		os.environ['JAVAHOME'] = java_path
		self.tagger = POSTagger(pos_model, stanford_tagger)

	def selectCandidates(self, substitutions, victor_corpus):
		"""
		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
	
		@param substitutions: Candidate substitutions to be filtered.
		It can be in two formats:
		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
		Example: substitutions['perched'] = {'sat', 'roosted'}
		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
		@param victor_corpus: Path to a corpus in the VICTOR format.
		For more information about the file's format, refer to the LEXenstein Manual.
		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
		"""
		selected_substitutions = []

		substitution_candidates = []
		if isinstance(substitutions, list):
			substitution_candidates = substitutions
		elif isinstance(substitutions, dict):
			void = VoidSelector()
			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
		else:
			print('ERROR: Substitutions are neither a dictionary or a list!')
			return selected_substitutions
		
		#Read VICTOR corpus:
		lexf = open(victor_corpus)
		sents = []
		targets = []
		heads = []
		words = set([])
		c = -1
		for line in lexf:
			c += 1
			data = line.strip().split('\t')
			sent = data[0].strip().split(' ')
			target = data[1].strip()
			head = int(data[2].strip())
			sents.append(sent)
			targets.append(target)
			heads.append(head)
			words.update(set(substitution_candidates[c]))
		lexf.close()
		
		#Tag sentences:
		tagged_sents = self.tagger.tag_sents(sents)
		
		#Tag words:
		words = list(words)
		words_sents = [[w] for w in words]
		tagged_words = self.tagger.tag_sents(words_sents)
		word_to_tag = {}
		for i in range(0, len(words)):
			word_to_tag[words[i]] = tagged_words[i][0][1]
		
		for i in range(0, len(sents)):
			target = targets[i]
			head = heads[i]
			target_pos = str(tagged_sents[i][head][1])
		
			candidates = []
			candidates = set(substitution_candidates[i])
			candidates = self.getCandidatesWithSamePOS(candidates, word_to_tag, target_pos)
		
			selected_substitutions.append(candidates)
		lexf.close()
		return selected_substitutions
	
	def getTargetPOS(self, sent, target, head):
		pos_data = []
		try:
			pos_data = nltk.pos_tag(sent)
			return pos_data[head][1]
		except UnicodeDecodeError:
			try:
				pos_data = nltk.pos_tag(target)
				return pos_data[0][1]
			except UnicodeDecodeError:
				return 'None'
			
		
	def getCandidatesWithSamePOS(self, candidates, word_to_tag, target_pos):
		result = set([])
#.........这里部分代码省略.........
开发者ID:pombredanne,项目名称:phd-backup,代码行数:103,代码来源:selectors.py


注:本文中的nltk.tag.stanford.POSTagger.tag_sents方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。