当前位置: 首页>>代码示例>>Python>>正文


Python tag.map_tag函数代码示例

本文整理汇总了Python中nltk.tag.map_tag函数的典型用法代码示例。如果您正苦于以下问题:Python map_tag函数的具体用法?Python map_tag怎么用?Python map_tag使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了map_tag函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_sentiment_count_data

def get_sentiment_count_data(train,test):
	sent_count_train = []
	sent_count_test = []
	v = DictVectorizer(sparse=False)
	for id in test:
		dist = nltk.FreqDist(products[id]['all_pos'].split())
		new_dist = Counter()
		for tag, count in dist.iteritems():
			new_dist[map_tag('en-ptb', 'universal', tag)] += count
		Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
		neg_count = 0
		pos_count = 0
		suma = 0
		emotion_words = 0
		for review in products[id]['reviews']:        
			for feature,adjective,score in review['opinions']:
				if score is not None:
					if score < 0:
						neg_count += 1
					else:
						pos_count += 1
					suma += score
					emotion_words += 1
		nwords = len(products[id]['all_text'].split())
		eRatio = emotion_words*1.0/nwords
		posToAllRatio = pos_count*1.0/(pos_count+neg_count)
		emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
		sent_count_test.append(emotionFeatures)
	for id in train:
		dist = nltk.FreqDist(products[id]['all_pos'].split())
		new_dist = Counter()
		for tag, count in dist.iteritems():
			new_dist[map_tag('en-ptb', 'universal', tag)] += count
		Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
		neg_count = 0
		pos_count = 0
		suma = 0
		emotion_words = 0
		for review in products[id]['reviews']:
			for feature,adjective,score in review['opinions']:
				if score is not None:
					if score < 0:
						neg_count += 1
					else:
						pos_count += 1
					suma += score
					emotion_words += 1
		nwords = len(products[id]['all_text'].split())
		eRatio = emotion_words*1.0/nwords
		posToAllRatio = pos_count*1.0/(pos_count+neg_count)
		emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
		sent_count_train.append(emotionFeatures)

	X_sent_train = v.fit_transform(sent_count_train)
	X_sent_test = v.transform(sent_count_test)
	scaler = preprocessing.StandardScaler().fit(X_sent_train)
	X_train = scaler.transform(X_sent_train)
	X_test = scaler.transform(X_sent_test)

	return sent_count_train, sent_count_test, X_train, X_test
开发者ID:sergiooramas,项目名称:music-genre-classification,代码行数:60,代码来源:genre_classification.py

示例2: tag

    def tag(self, tokens):
        tagged = self.model.tag(tokens)

        if not self.tagmap:
            return tagged

        return [(word, map_tag(self.tagmap, "universal", tag)) for word, tag in tagged]
开发者ID:lrei,项目名称:twitter_annotator,代码行数:7,代码来源:seq.py

示例3: _get_parsed_sent

    def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap['tree'])

        treestr = ''
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == '(': word = '-LRB-'
            if word == ')': word = '-RRB-'
            if pos_tag == '(': pos_tag = '-LRB-'
            if pos_tag == ')': pos_tag = '-RRB-'
            (left, right) = parse_tag.split('*')
            right = right.count(')')*')' # only keep ')'.
            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.fromstring(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.fromstring('(%s %s)' %
                                          (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if (isinstance(child, Tree) and len(child)==1 and
                        isinstance(child[0], string_types)):
                        subtree[i] = (child[0], child.label())

        return tree
开发者ID:Weiming-Hu,项目名称:text-based-six-degree,代码行数:30,代码来源:conll.py

示例4: compute_pos_tag

    def compute_pos_tag(tokens):

        pos_tagged = nltk.pos_tag(tokens)
        simplified_tags = [map_tag('en-ptb', 'universal', tag) for word, tag in pos_tagged]
        lookup = {
            'VERB': 0,
            'NOUN': 1,
            'PRON': 2,
            'ADJ': 3,
            'ADV': 4,
            'ADP': 5,
            'CONJ': 6,
            'DET': 7,
            'NUM': 8,
            'PRT': 9,
            'X': 10
        }

        vector_output = []
        for word in simplified_tags:
            word_v = numpy.zeros(11)
            if word in lookup:
                word_v[lookup[word]] = 1

            vector_output.append(word_v.tolist())
        return vector_output
开发者ID:ProjetPP,项目名称:PPP-QuestionParsing-ML-Standalone,代码行数:26,代码来源:preprocessing.py

示例5: _get_parsed_sent

    def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
        words = self._get_column(grid, self._colmap["words"])
        pos_tags = self._get_column(grid, self._colmap["pos"])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        parse_tags = self._get_column(grid, self._colmap["tree"])

        treestr = ""
        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
            if word == "(":
                word = "-LRB-"
            if word == ")":
                word = "-RRB-"
            if pos_tag == "(":
                pos_tag = "-LRB-"
            if pos_tag == ")":
                pos_tag = "-RRB-"
            (left, right) = parse_tag.split("*")
            right = right.count(")") * ")"  # only keep ')'.
            treestr += "%s (%s %s) %s" % (left, pos_tag, word, right)
        try:
            tree = self._tree_class.parse(treestr)
        except (ValueError, IndexError):
            tree = self._tree_class.parse("(%s %s)" % (self._root_label, treestr))

        if not pos_in_tree:
            for subtree in tree.subtrees():
                for i, child in enumerate(subtree):
                    if isinstance(child, Tree) and len(child) == 1 and isinstance(child[0], compat.string_types):
                        subtree[i] = (child[0], child.label())

        return tree
开发者ID:haadkhan,项目名称:cerebri,代码行数:32,代码来源:conll.py

示例6: tagged_paras

 def tagged_paras(self, fileids=None, tagset=None):
     """
     :return: the given file(s) as a list of
         paragraphs, each encoded as a list of sentences, which are
         in turn encoded as lists of ``(word,tag)`` tuples.
     :rtype: list(list(list(tuple(str,str))))
     """
     if tagset and tagset != self._tagset:
         tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
     else:
         tag_mapping_function = None
     return concat(
         [
             TaggedCorpusView(
                 fileid,
                 enc,
                 True,
                 True,
                 True,
                 self._sep,
                 self._word_tokenizer,
                 self._sent_tokenizer,
                 self._para_block_reader,
                 tag_mapping_function,
             )
             for (fileid, enc) in self.abspaths(fileids, True)
         ]
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:28,代码来源:tagged.py

示例7: _get_chunked_words

    def _get_chunked_words(self, grid, chunk_types, tagset=None):
        # n.b.: this method is very similar to conllstr2tree.
        words = self._get_column(grid, self._colmap['words'])
        pos_tags = self._get_column(grid, self._colmap['pos'])
        if tagset and tagset != self._tagset:
            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
        chunk_tags = self._get_column(grid, self._colmap['chunk'])

        stack = [Tree(self._root_label, [])]

        for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
            if chunk_tag == 'O':
                state, chunk_type = 'O', ''
            else:
                (state, chunk_type) = chunk_tag.split('-')
            # If it's a chunk we don't care about, treat it as O.
            if chunk_types is not None and chunk_type not in chunk_types:
                state = 'O'
            # Treat a mismatching I like a B.
            if state == 'I' and chunk_type != stack[-1].label():
                state = 'B'
            # For B or I: close any open chunks
            if state in 'BO' and len(stack) == 2:
                stack.pop()
            # For B: start a new chunk.
            if state == 'B':
                new_chunk = Tree(chunk_type, [])
                stack[-1].append(new_chunk)
                stack.append(new_chunk)
            # Add the word token.
            stack[-1].append((word, pos_tag))

        return stack[0]
开发者ID:Weiming-Hu,项目名称:text-based-six-degree,代码行数:33,代码来源:conll.py

示例8: _get_iob_words

 def _get_iob_words(self, grid, tagset=None):
     pos_tags = self._get_column(grid, self._colmap["pos"])
     if tagset and tagset != self._tagset:
         pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
     return list(
         zip(self._get_column(grid, self._colmap["words"]), pos_tags, self._get_column(grid, self._colmap["chunk"]))
     )
开发者ID:haadkhan,项目名称:cerebri,代码行数:7,代码来源:conll.py

示例9: _tag

 def _tag(self, t, tagset=None):
     tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
     if tagset and tagset != self._tagset:
         tagged_sent = [
             (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
         ]
     return tagged_sent
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:bracket_parse.py

示例10: tagged_words

 def tagged_words(self, fileids=None, tagset=None):
     """
     :return: the given file(s) as a list of tagged
         words and punctuation symbols, encoded as tuples
         ``(word,tag)``.
     :rtype: list(tuple(str,str))
     """
     if tagset and tagset != self._tagset:
         tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
     else:
         tag_mapping_function = None
     return concat(
         [
             TaggedCorpusView(
                 fileid,
                 enc,
                 True,
                 False,
                 False,
                 self._sep,
                 self._word_tokenizer,
                 self._sent_tokenizer,
                 self._para_block_reader,
                 tag_mapping_function,
             )
             for (fileid, enc) in self.abspaths(fileids, True)
         ]
     )
开发者ID:prz3m,项目名称:kind2anki,代码行数:28,代码来源:tagged.py

示例11: _tag

 def _tag(self, sent, tagset=None):
     tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)]
     if tagset and tagset != self._tagset:
         tagged_sent = [
             (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent
         ]
     return tagged_sent
开发者ID:prz3m,项目名称:kind2anki,代码行数:7,代码来源:sinica_treebank.py

示例12: count_ADJ

def count_ADJ(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    adj = tag_fd.get('ADJ')
    if adj is None:
        adj =0
    return adj/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py

示例13: count_DET

def count_DET(text):
    word_list = nltk.word_tokenize(text)
    tag_word = nltk.pos_tag(word_list)
    tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
    det = tag_fd.get('DET')
    if det is None:
        det = 0
    return det/len(word_list)
开发者ID:yunitata,项目名称:PAN15,代码行数:8,代码来源:feature_extractor.py

示例14: get_last_word_types

def get_last_word_types(text):
    text = nltk.word_tokenize(text)
    posTagged = pos_tag(text)
    lastword_tag = map_tag("en-ptb", "universal", posTagged[-1][1])

    # known types
    # ['NOUN','VERB','CONJ','PRON','ADP', 'PRT', 'DET']
    return lastword_tag
开发者ID:helderm,项目名称:shalk,代码行数:8,代码来源:dbload.py

示例15: _tag

 def _tag(self, t, tagset=None):
     tagged_sent = [(int(o), w, p) for (o,p,w) in SORTTAGWRD.findall(self._normalize(t, ordered = True))]
     tagged_sent.sort()
     if tagset and tagset != self._tagset:
         tagged_sent = [(w, map_tag(self._tagset, tagset, p)) for (o,w,p) in tagged_sent]
     else:
         tagged_sent = [(w,p) for (o,w,p) in tagged_sent]
     return tagged_sent
开发者ID:Copper-Head,项目名称:nltk,代码行数:8,代码来源:bracket_parse.py


注:本文中的nltk.tag.map_tag函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。