本文整理汇总了Python中nltk.tag.stanford.NERTagger.tag方法的典型用法代码示例。如果您正苦于以下问题:Python NERTagger.tag方法的具体用法?Python NERTagger.tag怎么用?Python NERTagger.tag使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tag.stanford.NERTagger
的用法示例。
在下文中一共展示了NERTagger.tag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def main():
#os.environ['JAVAHOME'] = "C:\Program Files\Java\jdk1.8.0_45/bin"
path="ner"
classifier = path + "/classifiers/" + "english.muc.7class.distsim.crf.ser.gz"
jar = path + "/stanford-ner-3.4.jar"
tagger = NERTagger(classifier, jar)
tokens = tokenize('ada_lovelace.txt')
taggedText = tagger.tag(tokens)
countList=[]
nounList = []
for word, tag in taggedText:
countList.append(tag)
if tag != 'O':
nounList.append(word)
print("Answer to 2.1: \n{} \nThey certainly aren't all correct.".format(Counter(countList)))
print()
print("Answer to 2.2: The other classifiers seem to achieve similar results,\nbut because of the multiple categories it is more interesting to read.")
lemmas = lemmatize(nounList)
taggedLemmas = tagger.tag(lemmas)
print("Answer to 2.3:\n", taggedLemmas)
示例2: ngramTagger
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def ngramTagger(l):
"""
This function takes a list of ngrams, creates bigrams and entity tags them.
:param l: input must be a list of bigrams, formed in tuples
:return: returns a list with words that are tagged. (For example, "El Salvador" would be [("El", "LOCATION"),
("Salvador", "LOCATION")]
"""
bigrams_ner = []
bigrams_wn = []
bigrams = []
tb = []
for i in l:
ngram_ner = i[0] + " " + i[1]
ngram_wn = i[0] + "_" + i[1]
bigrams_ner.append(ngram_ner)
bigrams_wn.append(ngram_wn)
bigrams.append((ngram_ner, ngram_wn))
class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
'stanford-ner/stanford-ner.jar')
tagged_bigrams = class3.tag(bigrams_ner)
for l in tagged_bigrams:
for t in l:
if len(t[1]) > 3:
if t[1] != "LOCATION":
tb.append(t)
for bg in bigrams:
tag_bg = bgWordNetTagger(bg[0], bg[1])
if tag_bg == "COUNTRY" or tag_bg == "STATE" or tag_bg == "CITY" or tag_bg == "TOWN":
words = bg[0].split()
tb.extend([(words[0], tag_bg), (words[1], tag_bg)])
print(tb)
示例3: NERParser
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class NERParser (object):
def __init__(self):
self.st = NERTagger("/Users/trentniemeyer/nltk_data/stanford-ner-2014-06-16/classifiers/english.muc.7class.distsim.crf.ser.gz",
"/Users/trentniemeyer/nltk_data/stanford-ner-2014-06-16/stanford-ner.jar")
self.locations = []
self.organizations = []
def parse (self, text):
ne = self.st.tag(nltk.word_tokenize(text))
for sentence in ne:
lastwordwasentity = False
lastentity = ''
lasttype = ''
for (word, entitytype) in sentence:
if entitytype == 'ORGANIZATION' or entitytype == 'LOCATION':
if lastwordwasentity:
lastentity += ' ' + word
else:
lastentity = word
lastwordwasentity = True
lasttype = entitytype
else:
if lastwordwasentity:
if lasttype == 'LOCATION':
self.locations.append(lastentity)
else:
self.organizations.append(lastentity)
lastentity = ''
lastwordwasentity = False
def locationFrequencies (self):
print collections.Counter (self.locations)
def organizationFrequencies (self):
print collections.Counter (self.organizations)
示例4: entityTagger
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def entityTagger():
"""
Tags nouns in given file, writes them to output file
:rtype : object
"""
class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
'stanford-ner/stanford-ner.jar')
output = open("entity.tagged", "w")
with open("pos.tagged", "r") as inp_file:
for l in inp_file:
line = l.split()
# If words is a noun, go tag it!
print(line)
if line[5] == "NN" or line[5] == "NNP":
ner_tagged = class3.tag([line[4]])
for t in ner_tagged[0]:
# No nertag? Check wordnet tagging
if len(t[1]) < 3:
tag = wordNetTagger(t[0])
data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4],
line[5], tag))
output.write(data+"\n")
else:
data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4],
line[5], t[1]))
output.write(data+"\n")
else:
data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4], line[5],
"-"))
output.write(data+"\n")
output.close()
示例5: sdfprocess
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def sdfprocess(rawexpr):
parser=NERTagger(path_to_model='/home/cosmo/Dropbox/Purdue/nlp/stanford-corenlp-full-2014-08-27/english.all.3class.distsim.crf.ser.gz', path_to_jar='/home/cosmo/Dropbox/Purdue/nlp/stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', java_options='-mx2000m')
expr = preprocess(rawexpr)
named_expr = rechunk(parser.tag(word_tokenize(expr)))
for t in named_expr:
if t[1] == 'PERSON':
return t[0]
return expr
示例6: Parser
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class Parser(object):
def __init__(self):
self.st = NERTagger(os.path.join(STANFORD_PATH,'classifiers/english.all.3class.distsim.crf.ser.gz'), os.path.join(STANFORD_PATH,'stanford-ner-3.4.jar'))
def NER(self, s):
s = s.replace('.',' ')
s = s.encode('utf-8')
return self.st.tag(s.split())
示例7: ngramTagger
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def ngramTagger(l):
"""
this function creates bigrams, tags them via Stanford NER or Word Net, and searches links for wiki pages.
:param l: input must be a list of bigrams, formed in tuples
:return: returns a list with words that are tagged and linked to wikipedia.
"""
print("checking ngrams")
nerts = []
# First, create words which are suited as input for NERTagger.
for i in l:
ngram_ner = i[0] + " " + i[1]
nerts.append(ngram_ner)
# Input the list of suitable bigrams in the NERTagger, and form the output to a wanted format with nerToBG()
class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
'stanford-ner/stanford-ner.jar')
ner_result = class3.tag(nerts)
bigramsAndTags = nerToBG(ner_result)
for t in bigramsAndTags:
# If tagged as location, get rid of location via the same technique as locationTagger(), but then for bigrams,
# using getRidOfLocation()
if t[1] == "LOCATION" or t[2] == "LOCATION":
wn_bg = t[0].split()[0] + "_" + t[0].split()[1]
wAndTag = getRidOfLocation(wn_bg)
t[1] = wAndTag[1]
t[2] = wAndTag[1]
final_list = []
a = 0
for j in range(len(bigramsAndTags)):
# If the 2 words of the bigram are tagged the same, append them to final_list.
if bigramsAndTags[a][1] == bigramsAndTags[a][2]:
final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][1])])
# If word 1 isn't tagged and word 2 is, check if word 1 is tagged in the development set.
# If this tag is the same as the tag of word 2, append to final_list.
elif checkBGTag(bigramsAndTags[a][0].split()[0]) == bigramsAndTags[a][2]:
final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][2])])
# If word 2 isn't tagged and word 1 is, check if word 2 is tagged in the single word tagged development set.
# If this tag is the same as the tag of word 1, append to final_list.
elif checkBGTag(bigramsAndTags[a][0].split()[1]) == bigramsAndTags[a][1]:
final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][1])])
a += 1
taglink_bigrams = []
for bgs in final_list[:]:
# If bigrams are still not tagged, remove them from the list.
if len(bgs[1]) < 4:
final_list.remove(bgs)
else:
# If they are tagged, look up wikipedia links.
links = wiki_lookup(bgs[0], bgs[1])
words = bgs[0].split(" ")
taglink_bigrams.extend([(words[0], bgs[1], links), (words[1], bgs[1], links)])
return taglink_bigrams
示例8: tagger
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def tagger(data):
try:
st=NERTagger('./nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz','./nltk-data/StanfordNER/stanford-ner.jar')
except:
return ret_failure(705)
#try:
tag = st.tag(data.split())
#except:
# return ret_failure(702)
return ret_success(tag)
示例9: queryForEntity2
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def queryForEntity2(expectedEntity,passage):
st = NERTagger('/Users/srinisha/Downloads/stanford-ner-2014-06-16/classifiers/english.all.3class.distsim.crf.ser.gz','/Users/srinisha/Downloads/stanford-ner-2014-06-16/stanford-ner.jar')
answer=st.tag(passage.split())
print answer
answers=[]
for j,currentExpectedEntity in enumerate(expectedEntity):
for i,pair in enumerate(answer):
if(pair[1]==currentExpectedEntity):
answers.append(answer[i])
return answers
示例10: compute_NER
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def compute_NER(corpus):
NER=[]
#fi=open("NER_features_train.txt","w")
st = NERTagger(read_property('StanfordNerClassifier'),read_property('StanfordNerJarPath'))
for sentence in corpus:
ner=st.tag(sentence.split())
ner_tag=""
for n in ner:
ner_tag=ner_tag+n[1]+" "
NER.append(ner_tag)
return NER
示例11: german_ner
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def german_ner(text):
""" Moves the list of words through the NER tagger"""
text = text.encode('utf8')
st = NERTagger('/Users/Lena/src/context/stanford-ner/classifiers/german/dewac_175m_600.crf.ser.gz',
'/Users/Lena/src/context/stanford-ner/stanford-ner.jar', 'utf8')
tagged = st.tag(text.split())
return tagged
示例12: main
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def main():
words = ["Barack Obama", "Holland", "Government", "Tennis", "happiness"]
noun_lemmas = []
nouns = []
final_ner_tagged = []
not_ner_tagged = []
pos_tags = nltk.pos_tag(words)
lemmatizer = WordNetLemmatizer()
class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
'stanford-ner/stanford-ner.jar')
# STANFORD NERTAGGING HAPPENS HERE
for tag in pos_tags:
if tag[1] == 'NNP':
nouns.append(tag[0])
elif tag[1] == 'NN':
nouns.append(tag[0])
ner_tagged = class3.tag(nouns)
for t in ner_tagged[0]:
if t[1] == u'O':
not_ner_tagged.append(t[0])
else:
final_ner_tagged.append(t)
print("NERTagged:")
print(final_ner_tagged)
entities = {
"COUNTRY": wordnet.synsets("country", pos='n'),
"STATE": wordnet.synsets("state", pos='n'),
"CITY": wordnet.synsets("city", pos='n'),
"TOWN": wordnet.synsets("town", pos='n'),
"NAT": wordnet.synsets("natural places", pos='n'),
"PER": wordnet.synsets("person", pos='n'),
"ORG": wordnet.synsets("organisation", pos='n'),
"ANI": wordnet.synsets("animal", pos='n'),
"SPO": wordnet.synsets("sport", pos='n'),
"ENT": wordnet.synsets("entertainment", pos='n'),
}
tagged_top_entities = defaultdict(list)
for word in pos_tags:
if word[1] == "NN" or word[1] == "NNP":
noun_lemmas.append(lemmatizer.lemmatize(word[0], wordnet.NOUN))
word_synset = wordnet.synsets(word[0], pos="n")
for e in list(entities.keys()):
if len(word_synset) != 0 and len(entities[e]) != 0:
if hypernymOf(word_synset[0], entities[e][0]):
tagged_top_entities[word[0]].append(e)
print("WordNet tagged:")
for w in tagged_top_entities:
print("{:15}{:15}".format(w, tagged_top_entities[w]))
示例13: findWord
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def findWord(self):
"""
"""
st = NERTagger('stanford-ner-2014-01-04/classifiers/english.muc.7class.distsim.crf.ser.gz','stanford-ner-2014-01-04/stanford-ner.jar')
tagged= st.tag(self.question.split())
for item in tagged:
if item[1]== self.queryType:
#print item[0]
return item[0]
return -1
示例14: spanish_ner
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def spanish_ner(text):
""" Moves the list of words through the NER tagger"""
text = text.encode('utf8')
st = NERTagger('/Users/Lena/src/context/stanford-ner/edu/stanford/nlp/models/ner/spanish.ancora.distsim.s512.crf.ser.gz',
'/Users/Lena/src/context/stanford-ner/stanford-ner.jar', 'utf8')
tagged = st.tag(text.split())
return tagged
示例15: EventDetectiveNer
# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class EventDetectiveNer(EventDetective):
def loadClassifier(self):
classifier = "ner/classifiers/" + "tweets.ser.gz"
jar = "ner/stanford-ner-3.4.jar"
self.tagger = NERTagger(classifier, jar)
def tagText(self, candidate):
result = defaultdict(list)
text = " ".join([tweet['text'] for tweet in candidate]) #make one long text
for line in self.tagger.tag(nltk.word_tokenize(text)):
for word, tag in line:
result[tag].append(word)
return result
def generateMarkers(self):
print("Creating Google Maps markers & add WIKI links...")
js = open('vis/map/js/markers.js','w')
js.write('var locations = [')
for tweets,label in self.events:
writableCluster = ''
gh = []
i = 0
avgLon = 0
avgLat = 0
#tweets = sorted(tweets, key=itemgetter('unixTime'));
for tweet in tweets:
i = i + 1
gh.append(tweet['geoHash'])
avgLon += float(tweet["lon"])
avgLat += float(tweet["lat"])
# backslashes voor multiline strings in Javascript
writableCluster += "{} {} {} {}<br/><br/>".format(tweet['localTime'], tweet['geoHash'], tweet['user'], tweet['text']).replace("'", "\\'")
# Bepaal het Cartesiaans (normale) gemiddelde van de coordinaten, de afwijking (door vorm
# van de aarde) zal waarschijnlijk niet groot zijn omdat het gaat om een klein vlak op aarde...
# Oftewel, we doen even alsof de aarde plat is ;-)
avgLon /= i
avgLat /= i
nertags = self.tagText(tweets)
for key in nertags:
if key != 'O':
writableCluster += "</br> {} {}".format(key, " ,".join(list(set(nertags[key]))).replace("'", "\\'"))
js.write("['{}', {}, {}, '{}'],".format(writableCluster,avgLat,avgLon,label))
js.write('];')
js.close()