当前位置: 首页>>代码示例>>Python>>正文


Python NERTagger.tag方法代码示例

本文整理汇总了Python中nltk.tag.stanford.NERTagger.tag方法的典型用法代码示例。如果您正苦于以下问题:Python NERTagger.tag方法的具体用法?Python NERTagger.tag怎么用?Python NERTagger.tag使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tag.stanford.NERTagger的用法示例。


在下文中一共展示了NERTagger.tag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def main():
	#os.environ['JAVAHOME'] = "C:\Program Files\Java\jdk1.8.0_45/bin"
	path="ner"
	classifier = path + "/classifiers/" + "english.muc.7class.distsim.crf.ser.gz"
	jar = path + "/stanford-ner-3.4.jar"
	tagger = NERTagger(classifier, jar)

	tokens = tokenize('ada_lovelace.txt')
	

	taggedText = tagger.tag(tokens)
	

	countList=[]
	nounList = []
	for word, tag in taggedText:
		countList.append(tag)
		if tag != 'O':
			nounList.append(word)
			

	
	print("Answer to 2.1: \n{} \nThey certainly aren't all correct.".format(Counter(countList)))
	print()
	print("Answer to 2.2: The other classifiers seem to achieve similar results,\nbut because of the multiple categories it is more interesting to read.")

	lemmas = lemmatize(nounList)
	taggedLemmas = tagger.tag(lemmas)
	print("Answer to 2.3:\n", taggedLemmas)
开发者ID:Martbov,项目名称:pta-group1,代码行数:31,代码来源:assignment2.py

示例2: ngramTagger

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def ngramTagger(l):
    """
    This function takes a list of ngrams, creates bigrams and entity tags them.
    :param l: input must be a list of bigrams, formed in tuples
    :return: returns a list with words that are tagged. (For example, "El Salvador" would be [("El", "LOCATION"),
    ("Salvador", "LOCATION")]
    """
    bigrams_ner = []
    bigrams_wn = []
    bigrams = []
    tb = []
    for i in l:
        ngram_ner = i[0] + " " + i[1]
        ngram_wn = i[0] + "_" + i[1]
        bigrams_ner.append(ngram_ner)
        bigrams_wn.append(ngram_wn)
        bigrams.append((ngram_ner, ngram_wn))

    class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
                       'stanford-ner/stanford-ner.jar')
    tagged_bigrams = class3.tag(bigrams_ner)
    for l in tagged_bigrams:
        for t in l:
            if len(t[1]) > 3:
                if t[1] != "LOCATION":
                    tb.append(t)
    for bg in bigrams:
        tag_bg = bgWordNetTagger(bg[0], bg[1])
        if tag_bg == "COUNTRY" or tag_bg == "STATE" or tag_bg == "CITY" or tag_bg == "TOWN":
            words = bg[0].split()
            tb.extend([(words[0], tag_bg), (words[1], tag_bg)])
    print(tb)
开发者ID:MatthijsBonnema,项目名称:PTA,代码行数:34,代码来源:xyz.py

示例3: NERParser

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class NERParser (object):
    def __init__(self):
        self.st = NERTagger("/Users/trentniemeyer/nltk_data/stanford-ner-2014-06-16/classifiers/english.muc.7class.distsim.crf.ser.gz",
            "/Users/trentniemeyer/nltk_data/stanford-ner-2014-06-16/stanford-ner.jar")
        self.locations = []
        self.organizations = []

    def parse (self, text):
        ne = self.st.tag(nltk.word_tokenize(text))
        for sentence in ne:
            lastwordwasentity = False
            lastentity = ''
            lasttype = ''
            for (word, entitytype) in sentence:
                if entitytype == 'ORGANIZATION' or entitytype == 'LOCATION':
                    if lastwordwasentity:
                        lastentity += ' ' + word
                    else:
                        lastentity = word
                    lastwordwasentity = True
                    lasttype = entitytype
                else:
                    if lastwordwasentity:
                        if lasttype == 'LOCATION':
                            self.locations.append(lastentity)
                        else:
                            self.organizations.append(lastentity)
                    lastentity = ''
                    lastwordwasentity = False

    def locationFrequencies (self):
        print collections.Counter (self.locations)

    def organizationFrequencies (self):
        print collections.Counter (self.organizations)
开发者ID:trentniemeyer,项目名称:BlogParse,代码行数:37,代码来源:Util.py

示例4: entityTagger

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def entityTagger():
    """
    Tags nouns in given file, writes them to output file
    :rtype : object
    """
    class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
                       'stanford-ner/stanford-ner.jar')
    output = open("entity.tagged", "w")
    with open("pos.tagged", "r") as inp_file:
        for l in inp_file:
            line = l.split()
            # If words is a noun, go tag it!
            print(line)
            if line[5] == "NN" or line[5] == "NNP":
                ner_tagged = class3.tag([line[4]])
                for t in ner_tagged[0]:
                    # No nertag? Check wordnet tagging
                    if len(t[1]) < 3:
                        tag = wordNetTagger(t[0])
                        data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4],
                                                                        line[5], tag))
                        output.write(data+"\n")
                    else:
                        data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4],
                                                                        line[5], t[1]))
                        output.write(data+"\n")
            else:
                data = ("{:8}{:8}{:8}{:8}{:60}{:6}{:13}".format(line[0], line[1], line[2], line[3], line[4], line[5],
                                                                "-"))
                output.write(data+"\n")
    output.close()
开发者ID:MatthijsBonnema,项目名称:PTA,代码行数:33,代码来源:wiki.py

示例5: sdfprocess

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def sdfprocess(rawexpr):
    parser=NERTagger(path_to_model='/home/cosmo/Dropbox/Purdue/nlp/stanford-corenlp-full-2014-08-27/english.all.3class.distsim.crf.ser.gz', path_to_jar='/home/cosmo/Dropbox/Purdue/nlp/stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', java_options='-mx2000m')
    expr = preprocess(rawexpr)
    named_expr = rechunk(parser.tag(word_tokenize(expr)))
    for t in named_expr:
        if t[1] == 'PERSON':
            return t[0]
    return expr
开发者ID:cosmozhang,项目名称:satire,代码行数:10,代码来源:freebq.py

示例6: Parser

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class Parser(object):
    def __init__(self):
        self.st = NERTagger(os.path.join(STANFORD_PATH,'classifiers/english.all.3class.distsim.crf.ser.gz'), os.path.join(STANFORD_PATH,'stanford-ner-3.4.jar'))

    def NER(self, s):
        s = s.replace('.',' ')
        s = s.encode('utf-8')
        return self.st.tag(s.split())
开发者ID:Miyayx,项目名称:BigSci-EntityLinking,代码行数:10,代码来源:stanford_parser.py

示例7: ngramTagger

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def ngramTagger(l):
    """
    this function creates bigrams, tags them via Stanford NER or Word Net, and searches links for wiki pages.
    :param l: input must be a list of bigrams, formed in tuples
    :return: returns a list with words that are tagged and linked to wikipedia.
    """
    print("checking ngrams")
    nerts = []

    # First, create words which are suited as input for NERTagger.
    for i in l:
        ngram_ner = i[0] + " " + i[1]
        nerts.append(ngram_ner)

    # Input the list of suitable bigrams in the NERTagger, and form the output to a wanted format with nerToBG()
    class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
                       'stanford-ner/stanford-ner.jar')
    ner_result = class3.tag(nerts)
    bigramsAndTags = nerToBG(ner_result)

    for t in bigramsAndTags:
        # If tagged as location, get rid of location via the same technique as locationTagger(), but then for bigrams,
        # using getRidOfLocation()
        if t[1] == "LOCATION" or t[2] == "LOCATION":
            wn_bg = t[0].split()[0] + "_" + t[0].split()[1]
            wAndTag = getRidOfLocation(wn_bg)
            t[1] = wAndTag[1]
            t[2] = wAndTag[1]

    final_list = []
    a = 0
    for j in range(len(bigramsAndTags)):
        # If the 2 words of the bigram are tagged the same, append them to final_list.
        if bigramsAndTags[a][1] == bigramsAndTags[a][2]:
            final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][1])])
        # If word 1 isn't tagged and word 2 is, check if word 1 is tagged in the development set.
        # If this tag is the same as the tag of word 2, append to final_list.
        elif checkBGTag(bigramsAndTags[a][0].split()[0]) == bigramsAndTags[a][2]:
            final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][2])])
        # If word 2 isn't tagged and word 1 is, check if word 2 is tagged in the single word tagged development set.
        # If this tag is the same as the tag of word 1, append to final_list.
        elif checkBGTag(bigramsAndTags[a][0].split()[1]) == bigramsAndTags[a][1]:
            final_list.extend([(bigramsAndTags[a][0], bigramsAndTags[a][1])])
        a += 1

    taglink_bigrams = []
    for bgs in final_list[:]:
        # If bigrams are still not tagged, remove them from the list.
        if len(bgs[1]) < 4:
            final_list.remove(bgs)
        else:
            # If they are tagged, look up wikipedia links.
            links = wiki_lookup(bgs[0], bgs[1])
            words = bgs[0].split(" ")
            taglink_bigrams.extend([(words[0], bgs[1], links), (words[1], bgs[1], links)])

    return taglink_bigrams
开发者ID:MatthijsBonnema,项目名称:PTA,代码行数:59,代码来源:wiki_fast.py

示例8: tagger

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def tagger(data):
	try:
		st=NERTagger('./nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz','./nltk-data/StanfordNER/stanford-ner.jar')
	except:
		return ret_failure(705)
	#try:
	tag = st.tag(data.split())
	#except:
	#	return ret_failure(702)
	return ret_success(tag)
开发者ID:nishfreak,项目名称:nltk-server,代码行数:12,代码来源:stanfordner.py

示例9: queryForEntity2

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def queryForEntity2(expectedEntity,passage):
    st = NERTagger('/Users/srinisha/Downloads/stanford-ner-2014-06-16/classifiers/english.all.3class.distsim.crf.ser.gz','/Users/srinisha/Downloads/stanford-ner-2014-06-16/stanford-ner.jar') 
    answer=st.tag(passage.split()) 
    print answer
    answers=[]
    for j,currentExpectedEntity in enumerate(expectedEntity):
        for i,pair in enumerate(answer):
            if(pair[1]==currentExpectedEntity):
                answers.append(answer[i])   
    return answers
开发者ID:shubhangikumar,项目名称:NLP_QA_Project2,代码行数:12,代码来源:NER_PA2.py

示例10: compute_NER

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def compute_NER(corpus):
      NER=[]
      #fi=open("NER_features_train.txt","w")
      st = NERTagger(read_property('StanfordNerClassifier'),read_property('StanfordNerJarPath'))
      for sentence in corpus:
            ner=st.tag(sentence.split())
            ner_tag=""
            for n in ner:
                  ner_tag=ner_tag+n[1]+" "
            NER.append(ner_tag)
      return NER
开发者ID:StevenLOL,项目名称:QuestionClassification,代码行数:13,代码来源:Training_Coarse_Classification1.py

示例11: german_ner

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def german_ner(text):
	""" Moves the list of words through the NER tagger"""

	text = text.encode('utf8')  

	st = NERTagger('/Users/Lena/src/context/stanford-ner/classifiers/german/dewac_175m_600.crf.ser.gz',
                '/Users/Lena/src/context/stanford-ner/stanford-ner.jar', 'utf8') 

	tagged = st.tag(text.split())

	return tagged  
开发者ID:lenazun,项目名称:context,代码行数:13,代码来源:german_processing.py

示例12: main

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def main():
    words = ["Barack Obama", "Holland", "Government", "Tennis", "happiness"]

    noun_lemmas = []
    nouns = []
    final_ner_tagged = []
    not_ner_tagged = []
    pos_tags = nltk.pos_tag(words)
    lemmatizer = WordNetLemmatizer()

    class3 = NERTagger('stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
                       'stanford-ner/stanford-ner.jar')

    # STANFORD NERTAGGING HAPPENS HERE
    for tag in pos_tags:
        if tag[1] == 'NNP':
            nouns.append(tag[0])
        elif tag[1] == 'NN':
            nouns.append(tag[0])

    ner_tagged = class3.tag(nouns)
    for t in ner_tagged[0]:
        if t[1] == u'O':
            not_ner_tagged.append(t[0])
        else:
            final_ner_tagged.append(t)
    print("NERTagged:")
    print(final_ner_tagged)

    entities = {
        "COUNTRY": wordnet.synsets("country", pos='n'),
        "STATE": wordnet.synsets("state", pos='n'),
        "CITY": wordnet.synsets("city", pos='n'),
        "TOWN": wordnet.synsets("town", pos='n'),
        "NAT": wordnet.synsets("natural places", pos='n'),
        "PER": wordnet.synsets("person", pos='n'),
        "ORG": wordnet.synsets("organisation", pos='n'),
        "ANI": wordnet.synsets("animal", pos='n'),
        "SPO": wordnet.synsets("sport", pos='n'),
        "ENT": wordnet.synsets("entertainment", pos='n'),
    }

    tagged_top_entities = defaultdict(list)
    for word in pos_tags:
        if word[1] == "NN" or word[1] == "NNP":
            noun_lemmas.append(lemmatizer.lemmatize(word[0], wordnet.NOUN))
            word_synset = wordnet.synsets(word[0], pos="n")
            for e in list(entities.keys()):
                if len(word_synset) != 0 and len(entities[e]) != 0:
                    if hypernymOf(word_synset[0], entities[e][0]):
                        tagged_top_entities[word[0]].append(e)
    print("WordNet tagged:")
    for w in tagged_top_entities:
        print("{:15}{:15}".format(w, tagged_top_entities[w]))
开发者ID:MatthijsBonnema,项目名称:PTA,代码行数:56,代码来源:taggertest.py

示例13: findWord

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
	def findWord(self):
		"""

		"""
		st = NERTagger('stanford-ner-2014-01-04/classifiers/english.muc.7class.distsim.crf.ser.gz','stanford-ner-2014-01-04/stanford-ner.jar')
		tagged= st.tag(self.question.split())
		for item in tagged:
			if item[1]== self.queryType:
				#print item[0]
				return item[0]

		return -1
开发者ID:BhaviJagwani,项目名称:FactoidQASytem,代码行数:14,代码来源:AnswerModule.py

示例14: spanish_ner

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
def spanish_ner(text):
	""" Moves the list of words through the NER tagger"""

	text = text.encode('utf8')


	st = NERTagger('/Users/Lena/src/context/stanford-ner/edu/stanford/nlp/models/ner/spanish.ancora.distsim.s512.crf.ser.gz',
                '/Users/Lena/src/context/stanford-ner/stanford-ner.jar', 'utf8') 

	tagged = st.tag(text.split())

	return tagged  
开发者ID:lenazun,项目名称:context,代码行数:14,代码来源:spanish_processing.py

示例15: EventDetectiveNer

# 需要导入模块: from nltk.tag.stanford import NERTagger [as 别名]
# 或者: from nltk.tag.stanford.NERTagger import tag [as 别名]
class EventDetectiveNer(EventDetective):
    
    def loadClassifier(self):
        classifier = "ner/classifiers/" + "tweets.ser.gz"
        jar = "ner/stanford-ner-3.4.jar"
        self.tagger = NERTagger(classifier, jar)

    def tagText(self, candidate):
        result = defaultdict(list)
        text = " ".join([tweet['text'] for tweet in candidate]) #make one long text     
        for line in self.tagger.tag(nltk.word_tokenize(text)):
            for word, tag in line:
                result[tag].append(word)
        return result
            
    def generateMarkers(self):
        print("Creating Google Maps markers & add WIKI links...")
        
        js = open('vis/map/js/markers.js','w')
        js.write('var locations = [')

        
        for tweets,label in self.events:
            writableCluster = ''
            gh = []
            i = 0
            avgLon = 0
            avgLat = 0
            #tweets = sorted(tweets, key=itemgetter('unixTime'));
                              
            for tweet in tweets:
                i = i + 1
                gh.append(tweet['geoHash'])
                avgLon += float(tweet["lon"])
                avgLat += float(tweet["lat"])
                # backslashes voor multiline strings in Javascript
                writableCluster += "{} {} {} {}<br/><br/>".format(tweet['localTime'], tweet['geoHash'], tweet['user'], tweet['text']).replace("'", "\\'")
            # Bepaal het Cartesiaans (normale) gemiddelde van de coordinaten, de afwijking (door vorm
            # van de aarde) zal waarschijnlijk niet groot zijn omdat het gaat om een klein vlak op aarde...
            # Oftewel, we doen even alsof de aarde plat is ;-)
            avgLon /= i
            avgLat /= i
            nertags = self.tagText(tweets)
            for key in nertags:
                if key != 'O':
                    writableCluster += "</br> {} {}".format(key, " ,".join(list(set(nertags[key]))).replace("'", "\\'")) 


           
            js.write("['{}', {}, {}, '{}'],".format(writableCluster,avgLat,avgLon,label))
        js.write('];')
        js.close()
开发者ID:daviddekleer,项目名称:EventDetective,代码行数:54,代码来源:EventDetectiveNer.py


注:本文中的nltk.tag.stanford.NERTagger.tag方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。