本文整理汇总了Python中nltk.tag.StanfordNERTagger.tag_sents方法的典型用法代码示例。如果您正苦于以下问题:Python StanfordNERTagger.tag_sents方法的具体用法?Python StanfordNERTagger.tag_sents怎么用?Python StanfordNERTagger.tag_sents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tag.StanfordNERTagger
的用法示例。
在下文中一共展示了StanfordNERTagger.tag_sents方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: sent_tokenize
# 需要导入模块: from nltk.tag import StanfordNERTagger [as 别名]
# 或者: from nltk.tag.StanfordNERTagger import tag_sents [as 别名]
if originalSize < 5000000 and originalSize > 1000:
sentences = sent_tokenize(text)
for sent in sentences:
if (re.search('([A-Z]\w+ [1-9]*(1[0-9])*(2[0-9])*(3[0,1])*, \d{4})', sent))and is502:
sentences_with_date.append(sent)
words = pos_tag(word_tokenize(sent))
for word in words:
if word[1] == "VB":
sets = wn.synsets(word[0])
for s in sets:
for w in wordList:
if w.path_similarity(s) > 0.3:
relevant_sents.append(sent)
sents = st.tag_sents([word_tokenize(sent) for sent in sentences])
for classedSent in sents:
for word in classedSent:
if'PERSON' in word[1] and not previousPerson:
i+=1
names.append(word[0])
previousPerson = True
elif 'PERSON' in word[1]:
names[i] = names[i]+" " + word[0]
previousPerson = True
else:
previousPerson = False
elif originalSize > 1000:
chunks = (text[0+i:10000+i] for i in range(0, len(text), 10000))
for chunk in chunks:
sentences = sent_tokenize(chunk)
示例2: loads
# 需要导入模块: from nltk.tag import StanfordNERTagger [as 别名]
# 或者: from nltk.tag.StanfordNERTagger import tag_sents [as 别名]
# for each line from stdin
for line in stdin:
try:
# load json-tweet
tweet = loads(line)
tweetText = tweet['text']
# tokenize tweet-text
listOfWords = word_tokenize(tweetText)
listOfListOfWords.append(listOfWords)
except:
pass
# StandfordNER Instance
nerClf = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz')
nerPair = nerClf.tag_sents(listOfListOfWords)
# word is location and greater than 2 character
locations = []
for ner in nerPair:
for word, nerType in ner:
if nerType == 'LOCATION' and len(word) > 2:
locations.append(word.lower())
for location in locations:
print((location, frequency))
示例3: list
# 需要导入模块: from nltk.tag import StanfordNERTagger [as 别名]
# 或者: from nltk.tag.StanfordNERTagger import tag_sents [as 别名]
client = pymongo.MongoClient('mongodb://localhost:27017')
db = client['yahoofinance_news']
news = list(db['news'].find({}))
path = 'stanford-ner-2015-04-20/stanford-ner.jar'
os.environ['STANFORD_MODELS'] = 'stanford-ner-2015-04-20/classifiers'
st = StanfordNERTagger('english.all.3class.distsim.crf.ser.gz', path, java_options='-mx2g')
def find_orgs(token_tags):
nes = groupby(token_tags, key=lambda d: d[1])
nes2 = []
for k, v in nes:
if k == 'ORGANIZATION':
nes2.append(' '.join([t[0] for t in v]))
return nes2
doc_tokens = [wordpunct_tokenize(n['content']) for n in news]
nes = map(find_orgs, st.tag_sents(doc_tokens))
nes = map(np.unique, nes)
requests = []
for n, ne in zip(news, nes):
requests.append(UpdateOne({'_id':n['_id']}, {"$set":{'nes':ne}}))
db['news'].bulk_write(requests)