当前位置: 首页>>代码示例>>Python>>正文


Python names.words函数代码示例

本文整理汇总了Python中nltk.corpus.names.words函数的典型用法代码示例。如果您正苦于以下问题:Python words函数的具体用法?Python words怎么用?Python words使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了words函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

def main():
    from nltk.corpus import names
    names = ([(name, 'male') for name in names.words('male.txt')] +
        [(name, 'female') for name in names.words('female.txt')])
    random.shuffle(names)
    train_names = names[1500:]
    devtest_names = names[500:1500]
    test_names = names[:500]

    train_set = [(gender_features(n), g) for (n,g) in train_names]
    devtest_set = [(gender_features(n), g) for (n,g) in devtest_names]

    classifier = nltk.NaiveBayesClassifier.train(train_set)

    print classifier.classify(gender_features('Neo'))
    print classifier.classify(gender_features('Trinity'))
    print 'attila:', classifier.classify(gender_features('Attila'))
    print classifier.classify(gender_features('Bori'))
    print classifier.classify(gender_features('Gabi'))
    print 'andy:', classifier.classify(gender_features('Andy'))
    print 'dom:', classifier.classify(gender_features('Dom'))
    print 'monica:', classifier.classify(gender_features('Monica'))
    print 'donnie:', classifier.classify(gender_features('Donald'))

    print "accuracy:", nltk.classify.accuracy(classifier, devtest_set)
    print classifier.show_most_informative_features(5)

    errors = []
    for (name, tag) in devtest_names:
        guess = classifier.classify(gender_features(name))
        if guess != tag:
            errors.append((tag, guess, name))
    for (tag, guess, name) in sorted(errors): # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
        print 'correct=%-8s guess=%-8s name=%-30s' % (tag, guess, name)
开发者ID:attibalazs,项目名称:nltk-examples,代码行数:34,代码来源:6_Gender_Classification.py

示例2: initGenderClassifier

def initGenderClassifier():
    """Initialize gender classifier"""
    from nltk.corpus import names
    names = ([(name, 'male') for name in names.words('male.txt')] +
              [(name, 'female') for name in names.words('female.txt')])
    featuresets = [(gender_features(n), g) for (n,g) in names]
    return nltk.NaiveBayesClassifier.train(featuresets)
开发者ID:hjfu,项目名称:LinkedIngine,代码行数:7,代码来源:classifier.py

示例3: __calculateAgreement

    def __calculateAgreement(self):
        if len(self.np) == 1:
            if self.np[0,0] in names.words('male.txt'): self.gender = 'male'
            elif self.np[0,0] in names.words('female.txt'): self.gender = 'female'

        if {'NNS', 'NNPS'}.intersection({b for (a, b) in self.np.pos()}) or {',','and'}.intersection(self.np.leaves()):
            self.number = {'plural'}
        else:
            self.number = {'singular'}
        if 'PRP' in self.np[0].label():
            if self.np[0,0].lower() in {'they', 'them', 'themselves', 'their'}: self.number = {'plural'}
            elif self.np[0,0].lower() in {'him', 'he', 'himself'}:
                self.gender = 'male'
                self.number = {'singular'}
            elif self.np[0,0].lower() in {'her', 'herself' , 'she'}:
                self.number = {'singular'}
                self.gender = 'female'
            elif self.np[0,0].lower() in {'it', 'itself'}: self.number = {'singular'}
            elif self.np[0,0].lower() in {'us', 'we', 'our', 'ourselves'}:
                self.number = {'plural'}
                self.person = 'first'
            elif self.np[0,0].lower() in {'I', 'me', 'my', 'myself'}:
                self.number = {'singular'}
                self.person = 'first'
            elif self.np[0,0].lower() in {'yourself'}:
                self.number = {'singular'}
                self.person = 'second'
            elif self.np[0,0].lower() in {'you', 'your'}:
                self.number = {'singular', 'plural'}
                self.person = 'second'
            elif self.np[0,0].lower() in {'yourselves'}:
                self.number = {'plural'}
                self.person = 'second'
开发者ID:5aurabhpathak,项目名称:all-I-ve-done,代码行数:33,代码来源:classes.py

示例4: gender

    def gender(word):
        """Method to determine the gender of given word by comparing it to name dictionaries.

        Args:
            word (str):  Word. (usually a name)

        Keyword Args:
            is_server (bool):   Is Dragonfire running as an API server?
            user_id (int):      User's ID.

        Returns:
            str:  Male or Female

        .. note::

            This method is a very naive and not very useful. So it will be deprecated in the future.

        """

        labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
                         [(name, 'female')
                          for name in names.words('female.txt')])
        shuffle(labeled_names)
        featuresets = [(Classifier.gender_features(n), gender)
                       for (n, gender) in labeled_names]
        train_set = featuresets[500:]
        classifier = nltk.NaiveBayesClassifier.train(train_set)
        return classifier.classify(Classifier.gender_features(word))
开发者ID:ismlkrkmz,项目名称:Dragonfire,代码行数:28,代码来源:nlplib.py

示例5: createName

def createName():
	mynames = ([(name, 'male') for name in names.words('male.txt')] +
			[(name, 'female') for name in names.words('female.txt')])
	random.shuffle(mynames)
	firstname = str(mynames[0][0]).replace(' ','')
	
	return firstname
开发者ID:dattasaurabh82,项目名称:Decoy-Browsing,代码行数:7,代码来源:DecoyFacebookBrowsing.py

示例6: semanticClassify

    def semanticClassify(self, s):
        """
        对分段进行语义分类,仅动词和名词具有语义标签,需要先进行POS标记
        Input: [('i', 'PRON'), ('love', 'VERB'), ('you', 'PRON')]
        Output: [('i', 'PRON', ' '), ('love', 'VERB', 'love.n.01'), ('you', 'PRON', ' ')]
        """
        classified_seg = []
        for seg in s:

            male_name = [w.lower() for w in names.words('male.txt')]
            female_name = [w.lower() for w in names.words('female.txt')]

            month = ['january', 'february', 'march', 'april', 'may', 'june',
                     'july', 'august', 'september', 'october', 'november', 'december']

            if seg[1] == 'NP':
                if seg[0] in male_name:
                    classified_seg.append((seg[0], seg[1], 'male_name'))
                elif seg[0] in female_name:
                    classified_seg.append((seg[0], seg[1], 'female_name'))
                elif seg[0] in month:
                    classified_seg.append((seg[0], seg[1], 'month'))
                else:
                    classified_seg.append((seg[0], seg[1], ' '))
            elif (seg[1] == 'VERB' or seg[1] == 'NOUN'):
                classified = wn.synsets(seg[0])
                if len(classified) > 0:
                    classified_seg.append(
                        (seg[0], seg[1], classified[0].name()))
                else:
                    classified_seg.append((seg[0], seg[1], ' '))
            else:
                classified_seg.append((seg[0], seg[1], ' '))
        return self.encodeutf8(classified_seg)
开发者ID:nichijouyc,项目名称:SemanticGuessGenerator,代码行数:34,代码来源:semanticclassify.py

示例7: demo

def demo():

    def gender_features(word):
        return {'last_letter': word[-1], 'penultimate_letter': word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names
    
    
    import random
    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    import random
    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n,g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print '--- nltk.classify.svm demo ---'
    print 'Number of training examples:', len(train_set)
    classifier = SvmClassifier.train(train_set)
    print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
    print 'Label mapping:', classifier._labelmapping
    print '--- Processing an example instance ---'
    print 'Reference instance:', names[0]
    print 'NLTK-format features:\n    ' + str(test_set[0])
    print 'SVMlight-format features:\n    ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
    distr = classifier.prob_classify(test_set[0][0])
    print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
    print '--- Measuring classifier performance ---'
    print 'Overall accuracy:', accuracy(classifier, test_set)
开发者ID:approximatelylinear,项目名称:nltk,代码行数:32,代码来源:svm.py

示例8: demo

def demo():
    def gender_features(word):
        return {"last_letter": word[-1], "penultimate_letter": word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names

    import random

    names = [(name, "male") for name in names.words("male.txt")] + [
        (name, "female") for name in names.words("female.txt")
    ]
    import random

    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n, g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print "--- nltk.classify.svm demo ---"
    print "Number of training examples:", len(train_set)
    classifier = SvmClassifier.train(train_set)
    print "Total SVM dimensions:", len(classifier._svmfeatureindex)
    print "Label mapping:", classifier._labelmapping
    print "--- Processing an example instance ---"
    print "Reference instance:", names[0]
    print "NLTK-format features:\n    " + str(test_set[0])
    print "SVMlight-format features:\n    " + str(
        map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)
    )
    distr = classifier.prob_classify(test_set[0][0])
    print "Instance classification and confidence:", distr.max(), distr.prob(distr.max())
    print "--- Measuring classifier performance ---"
    print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:trunghlt,项目名称:nltk,代码行数:35,代码来源:svm.py

示例9: new_naive_bayes_classifier

def new_naive_bayes_classifier():
    # Create feature set consisting of male and female names for training
    global CLASSIFIER_CACHE
    if CLASSIFIER_CACHE:
        return CLASSIFIER_CACHE
    else:
        male_word_seq = _new_training_set(
            'male', names.words('male.txt'), MALE_PRONOUN_SEQ)
        female_word_seq = _new_training_set(
            'female', names.words('female.txt'), FEMALE_PRONOUN_SEQ)
        neutral_pronoun_seq = _new_training_set(
            'neutral', NEUTRAL_PRONOUN_SEQ)
        excess_seq = _new_training_set(
            'excess', ABBREVIATION_SEQ, PREPOSITION_SEQ, string.punctuation,
            ('looking', 'is'),
        )

        featureset_seq = (
            (_gender_features(word), gender)
            for word, gender in chain(
                    male_word_seq,
                    female_word_seq,
                    neutral_pronoun_seq,
                    excess_seq,
            ))
        CLASSIFIER_CACHE = nltk.NaiveBayesClassifier.train(featureset_seq)

        return CLASSIFIER_CACHE
开发者ID:emilisto,项目名称:pygenus,代码行数:28,代码来源:pygenus.py

示例10: make_classifier

def make_classifier():
    from nltk.corpus import names

    training_names = [(name, 'male') for name in names.words('male.txt')] + \
                     [(name, 'female') for name in names.words('female.txt')]
    feature_sets = [(name_features(name), gender) for (name, gender) in training_names]
    classifier = nltk.NaiveBayesClassifier.train(feature_sets)
    return classifier
开发者ID:mitchpowell1,项目名称:Bechdel,代码行数:8,代码来源:Script_Tagger.py

示例11: identify_gender3

def identify_gender3():
    import random
    from nltk.corpus import names

    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    random.shuffle(names)
    featuresets = [(gender_features3(n), g) for n, g in names]
    return classify(nltk.NaiveBayesClassifier, featuresets, 500)
开发者ID:fishmacs,项目名称:mycode,代码行数:9,代码来源:chap06.py

示例12: feature_nameList

def feature_nameList(word):
    if word in names.words('male.txt'):
        return 1
    elif word in names.words('female.txt'):
        return 1
    elif GeoText(word):
        return 1
    else:
        return 0
开发者ID:DomWag,项目名称:TMP_Andre,代码行数:9,代码来源:Project2.py

示例13: get_variations

def get_variations(s):
    base = s.split()
    variations = []
    for n in base:
        if n in names.words():
            variations.append(n)
        if s in names.words():
            variations.append(s)
    return variations
开发者ID:GregoryElliott,项目名称:TGMA_NLP_Project,代码行数:9,代码来源:gg_api.py

示例14: create_featuresets

	def create_featuresets(self):
		'''
		Create featuresets of name, gender based on the names corpora
		'''
		train_names = ([(name,'male') for name in names.words('male.txt')] +
				 [(name,'female') for name in names.words('female.txt')])

		random.shuffle(train_names)
		return [(self.gender_features(n), g) for (n,g) in train_names]
开发者ID:neviim,项目名称:Georgetown-Capstone,代码行数:9,代码来源:Genders.py

示例15: nltkTest

def nltkTest():
    labeled_names = ([(name, 'male') for name in names.words('male.txt')] +
                     [(name, 'female') for name in names.words('female.txt')])
    import random
    random.shuffle(labeled_names)
    featuresets = [(gender_features(n), gender) for (n, gender) in labeled_names]
    train_set, test_set = featuresets[500:], featuresets[:500]
    classifier = nltk.NaiveBayesClassifier.train(train_set)
    val = classifier.classify(gender_features('Neo'))
    print val
开发者ID:saurabhc123,项目名称:SharedTask,代码行数:10,代码来源:nltkPoc.py


注:本文中的nltk.corpus.names.words函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。