本文整理汇总了Python中nltk.tag.map_tag函数的典型用法代码示例。如果您正苦于以下问题:Python map_tag函数的具体用法?Python map_tag怎么用?Python map_tag使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了map_tag函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_sentiment_count_data
def get_sentiment_count_data(train,test):
sent_count_train = []
sent_count_test = []
v = DictVectorizer(sparse=False)
for id in test:
dist = nltk.FreqDist(products[id]['all_pos'].split())
new_dist = Counter()
for tag, count in dist.iteritems():
new_dist[map_tag('en-ptb', 'universal', tag)] += count
Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
neg_count = 0
pos_count = 0
suma = 0
emotion_words = 0
for review in products[id]['reviews']:
for feature,adjective,score in review['opinions']:
if score is not None:
if score < 0:
neg_count += 1
else:
pos_count += 1
suma += score
emotion_words += 1
nwords = len(products[id]['all_text'].split())
eRatio = emotion_words*1.0/nwords
posToAllRatio = pos_count*1.0/(pos_count+neg_count)
emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
sent_count_test.append(emotionFeatures)
for id in train:
dist = nltk.FreqDist(products[id]['all_pos'].split())
new_dist = Counter()
for tag, count in dist.iteritems():
new_dist[map_tag('en-ptb', 'universal', tag)] += count
Fscore = 0.5 * ((new_dist['NOUN']+new_dist['ADJ']+new_dist['ADP']+new_dist['DET']) - (dist['UH']+new_dist['VERB']+new_dist['ADV']+new_dist['PRON']) + 100)
neg_count = 0
pos_count = 0
suma = 0
emotion_words = 0
for review in products[id]['reviews']:
for feature,adjective,score in review['opinions']:
if score is not None:
if score < 0:
neg_count += 1
else:
pos_count += 1
suma += score
emotion_words += 1
nwords = len(products[id]['all_text'].split())
eRatio = emotion_words*1.0/nwords
posToAllRatio = pos_count*1.0/(pos_count+neg_count)
emotionFeatures = {'Fscore':Fscore,'eStrength':suma*1.0/emotion_words,'eRatio':eRatio,'posToAllRatio':posToAllRatio}
sent_count_train.append(emotionFeatures)
X_sent_train = v.fit_transform(sent_count_train)
X_sent_test = v.transform(sent_count_test)
scaler = preprocessing.StandardScaler().fit(X_sent_train)
X_train = scaler.transform(X_sent_train)
X_test = scaler.transform(X_sent_test)
return sent_count_train, sent_count_test, X_train, X_test
示例2: tag
def tag(self, tokens):
tagged = self.model.tag(tokens)
if not self.tagmap:
return tagged
return [(word, map_tag(self.tagmap, "universal", tag)) for word, tag in tagged]
示例3: _get_parsed_sent
def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
words = self._get_column(grid, self._colmap['words'])
pos_tags = self._get_column(grid, self._colmap['pos'])
if tagset and tagset != self._tagset:
pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
parse_tags = self._get_column(grid, self._colmap['tree'])
treestr = ''
for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
if word == '(': word = '-LRB-'
if word == ')': word = '-RRB-'
if pos_tag == '(': pos_tag = '-LRB-'
if pos_tag == ')': pos_tag = '-RRB-'
(left, right) = parse_tag.split('*')
right = right.count(')')*')' # only keep ')'.
treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
try:
tree = self._tree_class.fromstring(treestr)
except (ValueError, IndexError):
tree = self._tree_class.fromstring('(%s %s)' %
(self._root_label, treestr))
if not pos_in_tree:
for subtree in tree.subtrees():
for i, child in enumerate(subtree):
if (isinstance(child, Tree) and len(child)==1 and
isinstance(child[0], string_types)):
subtree[i] = (child[0], child.label())
return tree
示例4: compute_pos_tag
def compute_pos_tag(tokens):
pos_tagged = nltk.pos_tag(tokens)
simplified_tags = [map_tag('en-ptb', 'universal', tag) for word, tag in pos_tagged]
lookup = {
'VERB': 0,
'NOUN': 1,
'PRON': 2,
'ADJ': 3,
'ADV': 4,
'ADP': 5,
'CONJ': 6,
'DET': 7,
'NUM': 8,
'PRT': 9,
'X': 10
}
vector_output = []
for word in simplified_tags:
word_v = numpy.zeros(11)
if word in lookup:
word_v[lookup[word]] = 1
vector_output.append(word_v.tolist())
return vector_output
示例5: _get_parsed_sent
def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
words = self._get_column(grid, self._colmap["words"])
pos_tags = self._get_column(grid, self._colmap["pos"])
if tagset and tagset != self._tagset:
pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
parse_tags = self._get_column(grid, self._colmap["tree"])
treestr = ""
for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
if word == "(":
word = "-LRB-"
if word == ")":
word = "-RRB-"
if pos_tag == "(":
pos_tag = "-LRB-"
if pos_tag == ")":
pos_tag = "-RRB-"
(left, right) = parse_tag.split("*")
right = right.count(")") * ")" # only keep ')'.
treestr += "%s (%s %s) %s" % (left, pos_tag, word, right)
try:
tree = self._tree_class.parse(treestr)
except (ValueError, IndexError):
tree = self._tree_class.parse("(%s %s)" % (self._root_label, treestr))
if not pos_in_tree:
for subtree in tree.subtrees():
for i, child in enumerate(subtree):
if isinstance(child, Tree) and len(child) == 1 and isinstance(child[0], compat.string_types):
subtree[i] = (child[0], child.label())
return tree
示例6: tagged_paras
def tagged_paras(self, fileids=None, tagset=None):
"""
:return: the given file(s) as a list of
paragraphs, each encoded as a list of sentences, which are
in turn encoded as lists of ``(word,tag)`` tuples.
:rtype: list(list(list(tuple(str,str))))
"""
if tagset and tagset != self._tagset:
tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
else:
tag_mapping_function = None
return concat(
[
TaggedCorpusView(
fileid,
enc,
True,
True,
True,
self._sep,
self._word_tokenizer,
self._sent_tokenizer,
self._para_block_reader,
tag_mapping_function,
)
for (fileid, enc) in self.abspaths(fileids, True)
]
)
示例7: _get_chunked_words
def _get_chunked_words(self, grid, chunk_types, tagset=None):
# n.b.: this method is very similar to conllstr2tree.
words = self._get_column(grid, self._colmap['words'])
pos_tags = self._get_column(grid, self._colmap['pos'])
if tagset and tagset != self._tagset:
pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
chunk_tags = self._get_column(grid, self._colmap['chunk'])
stack = [Tree(self._root_label, [])]
for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
if chunk_tag == 'O':
state, chunk_type = 'O', ''
else:
(state, chunk_type) = chunk_tag.split('-')
# If it's a chunk we don't care about, treat it as O.
if chunk_types is not None and chunk_type not in chunk_types:
state = 'O'
# Treat a mismatching I like a B.
if state == 'I' and chunk_type != stack[-1].label():
state = 'B'
# For B or I: close any open chunks
if state in 'BO' and len(stack) == 2:
stack.pop()
# For B: start a new chunk.
if state == 'B':
new_chunk = Tree(chunk_type, [])
stack[-1].append(new_chunk)
stack.append(new_chunk)
# Add the word token.
stack[-1].append((word, pos_tag))
return stack[0]
示例8: _get_iob_words
def _get_iob_words(self, grid, tagset=None):
pos_tags = self._get_column(grid, self._colmap["pos"])
if tagset and tagset != self._tagset:
pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
return list(
zip(self._get_column(grid, self._colmap["words"]), pos_tags, self._get_column(grid, self._colmap["chunk"]))
)
示例9: _tag
def _tag(self, t, tagset=None):
tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
if tagset and tagset != self._tagset:
tagged_sent = [
(w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
]
return tagged_sent
示例10: tagged_words
def tagged_words(self, fileids=None, tagset=None):
"""
:return: the given file(s) as a list of tagged
words and punctuation symbols, encoded as tuples
``(word,tag)``.
:rtype: list(tuple(str,str))
"""
if tagset and tagset != self._tagset:
tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
else:
tag_mapping_function = None
return concat(
[
TaggedCorpusView(
fileid,
enc,
True,
False,
False,
self._sep,
self._word_tokenizer,
self._sent_tokenizer,
self._para_block_reader,
tag_mapping_function,
)
for (fileid, enc) in self.abspaths(fileids, True)
]
)
示例11: _tag
def _tag(self, sent, tagset=None):
tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)]
if tagset and tagset != self._tagset:
tagged_sent = [
(w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent
]
return tagged_sent
示例12: count_ADJ
def count_ADJ(text):
word_list = nltk.word_tokenize(text)
tag_word = nltk.pos_tag(word_list)
tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
adj = tag_fd.get('ADJ')
if adj is None:
adj =0
return adj/len(word_list)
示例13: count_DET
def count_DET(text):
word_list = nltk.word_tokenize(text)
tag_word = nltk.pos_tag(word_list)
tag_fd = nltk.FreqDist(map_tag('en-ptb', 'universal', tag) for (word, tag)in tag_word)
det = tag_fd.get('DET')
if det is None:
det = 0
return det/len(word_list)
示例14: get_last_word_types
def get_last_word_types(text):
text = nltk.word_tokenize(text)
posTagged = pos_tag(text)
lastword_tag = map_tag("en-ptb", "universal", posTagged[-1][1])
# known types
# ['NOUN','VERB','CONJ','PRON','ADP', 'PRT', 'DET']
return lastword_tag
示例15: _tag
def _tag(self, t, tagset=None):
tagged_sent = [(int(o), w, p) for (o,p,w) in SORTTAGWRD.findall(self._normalize(t, ordered = True))]
tagged_sent.sort()
if tagset and tagset != self._tagset:
tagged_sent = [(w, map_tag(self._tagset, tagset, p)) for (o,w,p) in tagged_sent]
else:
tagged_sent = [(w,p) for (o,w,p) in tagged_sent]
return tagged_sent