本文整理汇总了Python中nltk.corpus.names.words函数的典型用法代码示例。如果您正苦于以下问题:Python words函数的具体用法?Python words怎么用?Python words使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了words函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
def main():
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(names)
train_names = names[1500:]
devtest_names = names[500:1500]
test_names = names[:500]
train_set = [(gender_features(n), g) for (n,g) in train_names]
devtest_set = [(gender_features(n), g) for (n,g) in devtest_names]
classifier = nltk.NaiveBayesClassifier.train(train_set)
print classifier.classify(gender_features('Neo'))
print classifier.classify(gender_features('Trinity'))
print 'attila:', classifier.classify(gender_features('Attila'))
print classifier.classify(gender_features('Bori'))
print classifier.classify(gender_features('Gabi'))
print 'andy:', classifier.classify(gender_features('Andy'))
print 'dom:', classifier.classify(gender_features('Dom'))
print 'monica:', classifier.classify(gender_features('Monica'))
print 'donnie:', classifier.classify(gender_features('Donald'))
print "accuracy:", nltk.classify.accuracy(classifier, devtest_set)
print classifier.show_most_informative_features(5)
errors = []
for (name, tag) in devtest_names:
guess = classifier.classify(gender_features(name))
if guess != tag:
errors.append((tag, guess, name))
for (tag, guess, name) in sorted(errors): # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
print 'correct=%-8s guess=%-8s name=%-30s' % (tag, guess, name)
示例2: initGenderClassifier
def initGenderClassifier():
"""Initialize gender classifier"""
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
featuresets = [(gender_features(n), g) for (n,g) in names]
return nltk.NaiveBayesClassifier.train(featuresets)
示例3: __calculateAgreement
def __calculateAgreement(self):
if len(self.np) == 1:
if self.np[0,0] in names.words('male.txt'): self.gender = 'male'
elif self.np[0,0] in names.words('female.txt'): self.gender = 'female'
if {'NNS', 'NNPS'}.intersection({b for (a, b) in self.np.pos()}) or {',','and'}.intersection(self.np.leaves()):
self.number = {'plural'}
else:
self.number = {'singular'}
if 'PRP' in self.np[0].label():
if self.np[0,0].lower() in {'they', 'them', 'themselves', 'their'}: self.number = {'plural'}
elif self.np[0,0].lower() in {'him', 'he', 'himself'}:
self.gender = 'male'
self.number = {'singular'}
elif self.np[0,0].lower() in {'her', 'herself' , 'she'}:
self.number = {'singular'}
self.gender = 'female'
elif self.np[0,0].lower() in {'it', 'itself'}: self.number = {'singular'}
elif self.np[0,0].lower() in {'us', 'we', 'our', 'ourselves'}:
self.number = {'plural'}
self.person = 'first'
elif self.np[0,0].lower() in {'I', 'me', 'my', 'myself'}:
self.number = {'singular'}
self.person = 'first'
elif self.np[0,0].lower() in {'yourself'}:
self.number = {'singular'}
self.person = 'second'
elif self.np[0,0].lower() in {'you', 'your'}:
self.number = {'singular', 'plural'}
self.person = 'second'
elif self.np[0,0].lower() in {'yourselves'}:
self.number = {'plural'}
self.person = 'second'
示例4: gender
def gender(word):
"""Method to determine the gender of given word by comparing it to name dictionaries.
Args:
word (str): Word. (usually a name)
Keyword Args:
is_server (bool): Is Dragonfire running as an API server?
user_id (int): User's ID.
Returns:
str: Male or Female
.. note::
This method is a very naive and not very useful. So it will be deprecated in the future.
"""
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female')
for name in names.words('female.txt')])
shuffle(labeled_names)
featuresets = [(Classifier.gender_features(n), gender)
for (n, gender) in labeled_names]
train_set = featuresets[500:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
return classifier.classify(Classifier.gender_features(word))
示例5: createName
def createName():
mynames = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(mynames)
firstname = str(mynames[0][0]).replace(' ','')
return firstname
示例6: semanticClassify
def semanticClassify(self, s):
"""
对分段进行语义分类,仅动词和名词具有语义标签,需要先进行POS标记
Input: [('i', 'PRON'), ('love', 'VERB'), ('you', 'PRON')]
Output: [('i', 'PRON', ' '), ('love', 'VERB', 'love.n.01'), ('you', 'PRON', ' ')]
"""
classified_seg = []
for seg in s:
male_name = [w.lower() for w in names.words('male.txt')]
female_name = [w.lower() for w in names.words('female.txt')]
month = ['january', 'february', 'march', 'april', 'may', 'june',
'july', 'august', 'september', 'october', 'november', 'december']
if seg[1] == 'NP':
if seg[0] in male_name:
classified_seg.append((seg[0], seg[1], 'male_name'))
elif seg[0] in female_name:
classified_seg.append((seg[0], seg[1], 'female_name'))
elif seg[0] in month:
classified_seg.append((seg[0], seg[1], 'month'))
else:
classified_seg.append((seg[0], seg[1], ' '))
elif (seg[1] == 'VERB' or seg[1] == 'NOUN'):
classified = wn.synsets(seg[0])
if len(classified) > 0:
classified_seg.append(
(seg[0], seg[1], classified[0].name()))
else:
classified_seg.append((seg[0], seg[1], ' '))
else:
classified_seg.append((seg[0], seg[1], ' '))
return self.encodeutf8(classified_seg)
示例7: demo
def demo():
def gender_features(word):
return {'last_letter': word[-1], 'penultimate_letter': word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n,g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print '--- nltk.classify.svm demo ---'
print 'Number of training examples:', len(train_set)
classifier = SvmClassifier.train(train_set)
print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
print 'Label mapping:', classifier._labelmapping
print '--- Processing an example instance ---'
print 'Reference instance:', names[0]
print 'NLTK-format features:\n ' + str(test_set[0])
print 'SVMlight-format features:\n ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
distr = classifier.prob_classify(test_set[0][0])
print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
print '--- Measuring classifier performance ---'
print 'Overall accuracy:', accuracy(classifier, test_set)
示例8: demo
def demo():
def gender_features(word):
return {"last_letter": word[-1], "penultimate_letter": word[-2]}
from nltk.classify import accuracy
from nltk.corpus import names
import random
names = [(name, "male") for name in names.words("male.txt")] + [
(name, "female") for name in names.words("female.txt")
]
import random
random.seed(60221023)
random.shuffle(names)
featuresets = [(gender_features(n), g) for (n, g) in names]
train_set, test_set = featuresets[500:], featuresets[:500]
print "--- nltk.classify.svm demo ---"
print "Number of training examples:", len(train_set)
classifier = SvmClassifier.train(train_set)
print "Total SVM dimensions:", len(classifier._svmfeatureindex)
print "Label mapping:", classifier._labelmapping
print "--- Processing an example instance ---"
print "Reference instance:", names[0]
print "NLTK-format features:\n " + str(test_set[0])
print "SVMlight-format features:\n " + str(
map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)
)
distr = classifier.prob_classify(test_set[0][0])
print "Instance classification and confidence:", distr.max(), distr.prob(distr.max())
print "--- Measuring classifier performance ---"
print "Overall accuracy:", accuracy(classifier, test_set)
示例9: new_naive_bayes_classifier
def new_naive_bayes_classifier():
# Create feature set consisting of male and female names for training
global CLASSIFIER_CACHE
if CLASSIFIER_CACHE:
return CLASSIFIER_CACHE
else:
male_word_seq = _new_training_set(
'male', names.words('male.txt'), MALE_PRONOUN_SEQ)
female_word_seq = _new_training_set(
'female', names.words('female.txt'), FEMALE_PRONOUN_SEQ)
neutral_pronoun_seq = _new_training_set(
'neutral', NEUTRAL_PRONOUN_SEQ)
excess_seq = _new_training_set(
'excess', ABBREVIATION_SEQ, PREPOSITION_SEQ, string.punctuation,
('looking', 'is'),
)
featureset_seq = (
(_gender_features(word), gender)
for word, gender in chain(
male_word_seq,
female_word_seq,
neutral_pronoun_seq,
excess_seq,
))
CLASSIFIER_CACHE = nltk.NaiveBayesClassifier.train(featureset_seq)
return CLASSIFIER_CACHE
示例10: make_classifier
def make_classifier():
from nltk.corpus import names
training_names = [(name, 'male') for name in names.words('male.txt')] + \
[(name, 'female') for name in names.words('female.txt')]
feature_sets = [(name_features(name), gender) for (name, gender) in training_names]
classifier = nltk.NaiveBayesClassifier.train(feature_sets)
return classifier
示例11: identify_gender3
def identify_gender3():
import random
from nltk.corpus import names
names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
random.shuffle(names)
featuresets = [(gender_features3(n), g) for n, g in names]
return classify(nltk.NaiveBayesClassifier, featuresets, 500)
示例12: feature_nameList
def feature_nameList(word):
if word in names.words('male.txt'):
return 1
elif word in names.words('female.txt'):
return 1
elif GeoText(word):
return 1
else:
return 0
示例13: get_variations
def get_variations(s):
base = s.split()
variations = []
for n in base:
if n in names.words():
variations.append(n)
if s in names.words():
variations.append(s)
return variations
示例14: create_featuresets
def create_featuresets(self):
'''
Create featuresets of name, gender based on the names corpora
'''
train_names = ([(name,'male') for name in names.words('male.txt')] +
[(name,'female') for name in names.words('female.txt')])
random.shuffle(train_names)
return [(self.gender_features(n), g) for (n,g) in train_names]
示例15: nltkTest
def nltkTest():
labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
[(name, 'female') for name in names.words('female.txt')])
import random
random.shuffle(labeled_names)
featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
train_set, test_set = featuresets[500:], featuresets[:500]
classifier = nltk.NaiveBayesClassifier.train(train_set)
val = classifier.classify(gender_features('Neo'))
print val