当前位置: 首页>>代码示例>>Python>>正文


Python names.words方法代码示例

本文整理汇总了Python中nltk.corpus.names.words方法的典型用法代码示例。如果您正苦于以下问题:Python names.words方法的具体用法?Python names.words怎么用?Python names.words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.corpus.names的用法示例。


在下文中一共展示了names.words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def __init__(self):
        males = [(name, 'male') for name in names.words('male.txt')]
        females = [(name, 'female') for name in names.words('female.txt')]
        combined = males + females
        random.shuffle(combined)
        training = [(self.feature(name), gender) for (name, gender) in combined]
        self._classifier = nltk.NaiveBayesClassifier.train(training) 
开发者ID:PacktPublishing,项目名称:Natural-Language-Processing-with-Python-Cookbook,代码行数:9,代码来源:Anaphora.py

示例2: demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def demo():

    def gender_features(word):
        return {'last_letter': word[-1], 'penultimate_letter': word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names


    import random
    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    import random
    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n,g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print '--- nltk.classify.svm demo ---'
    print 'Number of training examples:', len(train_set)
    classifier = SvmClassifier.train(train_set)
    print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
    print 'Label mapping:', classifier._labelmapping
    print '--- Processing an example instance ---'
    print 'Reference instance:', names[0]
    print 'NLTK-format features:\n    ' + str(test_set[0])
    print 'SVMlight-format features:\n    ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
    distr = classifier.prob_classify(test_set[0][0])
    print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
    print '--- Measuring classifier performance ---'
    print 'Overall accuracy:', accuracy(classifier, test_set) 
开发者ID:blackye,项目名称:luscan-devel,代码行数:34,代码来源:svm.py

示例3: maximum_overlap

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def maximum_overlap(w, i, text):
    """Return the candidate expansion with the highest overlap."""
    best = 0
    current = []
    curr = ''
    t_matches = tag_matches(i, text)
    if t_matches:
        if len(t_matches) == 1:
            if t_matches[0] in brown_common:
                return t_matches[0]
            else:
                return w
        for cand in t_matches:
            olap = overlap(i, cand, text)
            if olap > best and cand in words:
                best = olap
                current = [cand]
            elif olap == best and best != 0:
                current.append(cand)
        best = 0
        for c in current:
            if c in brown_common:
                freq = brown_common[c]
            else:
                freq = 0
            if freq < best:
                best = freq
                curr = c
            elif freq == best and len(tag_matches(i, text)) == 1:
                best = freq
                curr = c
            return curr
    if curr == '':
        return w
    else:
        return curr 
开发者ID:EFord36,项目名称:normalise,代码行数:38,代码来源:expand_EXPN.py

示例4: overlap

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def overlap(i, word, text):
    """Return overlap between words in the context of the abbreviation and
       words in the signatures generated for each candidate expansion."""
    overlap = 0
    sig = gen_signature(word)
    context = gen_context(i, text)
    for w in context:
        if w in sig:
            if w in brown_common:
                overlap += brown_common[w]
            else:
                overlap += log(1161192 / 1)
    return overlap 
开发者ID:EFord36,项目名称:normalise,代码行数:15,代码来源:expand_EXPN.py

示例5: gen_signature

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def gen_signature(word):
    """Generate a signature for each candidate expansion, using contextual
       information from the Brown corpus, as well as WordNet definitions and
       examples (if applicable)."""
    if word in gen_signature.dict:
        return gen_signature.dict[word]
    inds = find_matches(word)
    if len(inds) > 50:
        f = len(inds) / 50
        inds = [inds[int(i * f)] for i in range(50)]
    signature = defaultdict(int)
    for i in inds:
        for w in gen_context(i, brown):
            signature[w] += 1
    sig = {w for w in signature
           if signature[w] > 1
           and w not in stopwords.words('english') and w != ','}
    if word in wn.words():
        if wn.synsets(word) and str(wn.synsets(word)[0]).count("'") == 2:
            define = (eval("wn.{}.definition()".format(
                      str(wn.synsets(word)[0]).lower())))
            examples = (eval("wn.{}.examples()".format(
                        str(wn.synsets(word)[0]).lower())))
            if examples:
                for ex in examples:
                        sig.update([w for w in wt(ex)
                                   if w not in stopwords.words('english')])
            if define:
                        sig.update([w for w in wt(define)
                                   if w not in stopwords.words('english')])
    gen_signature.dict[word] = sig
    return sig 
开发者ID:EFord36,项目名称:normalise,代码行数:34,代码来源:expand_EXPN.py

示例6: gen_context

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def gen_context(i, text):
    """Generate context for the abbreviation - 4 words either side unless
       sentence is too short."""
    ind = i
    context = []
    text = text[:]
    if not isinstance(i, int):
        ind = int(i)
        split_token = text[ind]
        del text[ind]
        parts = split({ind: (split_token, 'SPLT')}, verbose=False)
        for it in sorted(parts, reverse=True):
            text.insert(ind, parts[it][0])
    start = ind
    end = ind + 1
    sloop = True
    while sloop and start > 0:
        if text[start - 1] not in ['.', '!', '?']:
            start -= 1
        else:
            sloop = False
    eloop = True
    while eloop and end <= len(text) - 1:
        if text[end] in ['.', '!', '?']:
            eloop = False
        else:
            end += 1
    if ind - start < 4:
        if end - start >= 9:
            context += text[start: start + 9]
        else:
            context += text[start: end]
    elif end - ind < 5:
        if end - start >= 9:
            context += text[end - 9: end]
        else:
            context += text[start: end]
    else:
        context += text[ind - 4: ind + 5]
    return context 
开发者ID:EFord36,项目名称:normalise,代码行数:42,代码来源:expand_EXPN.py

示例7: gen_candidates

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def gen_candidates(word):
    """Generate a list of candidate expansions given an abbreviation."""
    vowel_cands = []
    start_cands = []
    start_and_end_cands = []
    reg_cons = ''
    reg_start = ''
    reg_start_and_end = ''
    for lt in word.lower():
        if lt.isalpha():
            reg_cons += lt
            reg_cons += '[aeiou]*'
    reg_cons += '$'
    regex_cons = re.compile(reg_cons)
    for lt in word.lower():
        if lt.isalpha():
            reg_start += lt
    regex_start = re.compile(reg_start)
    last = find_last_letter(word)
    if last == 's':
        last = find_last_letter(word[:word.rfind(last)]) + last
    for lt in word[:word.rfind(last)].lower():
        if lt.isalpha():
            reg_start_and_end += lt
    reg_start_and_end += '.*{}$'.format(last)
    regex_start_and_end = re.compile(reg_start_and_end)
    for w in words:
        if regex_cons.match(w):
            vowel_cands.append(w)
        elif regex_start_and_end.match(w):
            start_and_end_cands.append(w)
        elif regex_start.match(w):
            start_cands.append(w)
    return vowel_cands, start_and_end_cands, start_cands 
开发者ID:EFord36,项目名称:normalise,代码行数:36,代码来源:expand_EXPN.py

示例8: gender_match

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def gender_match(tree, pos, pro):
    """ Takes a proposed antecedent and pronoun and checks whether
    they match in gender. Only checks for mismatches between singular
    proper name antecedents and singular pronouns.
    """
    male_names = (name.lower() for name in names.words('male.txt'))
    female_names = (name.lower() for name in names.words('female.txt'))
    male_pronouns = ["he", "him", "himself"]
    female_pronouns = ["she", "her", "herself"]
    neuter_pronouns = ["it", "itself"]
    
    for c in tree[pos]:
        if isinstance(c, nltk.Tree) and c.label() in nominal_labels:
            # If the proposed antecedent is a recognized male name,
            # but the pronoun being resolved is either female or
            # neuter, they don't match
            if c.leaves()[0].lower() in male_names:
                if pro in female_pronouns:
                    return False
                elif pro in neuter_pronouns:
                    return False
            # If the proposed antecedent is a recognized female name,
            # but the pronoun being resolved is either male or 
            # neuter, they don't match
            elif c.leaves()[0].lower() in female_names:
                if pro in male_pronouns:
                    return False
                elif pro in neuter_pronouns:
                    return False
            # If the proposed antecedent is a numeral, but the 
            # pronoun being resolved is not neuter, they don't match
            elif c.leaves()[0].isdigit():
                if pro in male_pronouns:
                    return False
                elif pro in female_pronouns:
                    return False

    return True 
开发者ID:cmward,项目名称:hobbs,代码行数:40,代码来源:hobbs.py

示例9: splitter

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def splitter(data,num_words):
    words=data.split(' ')
    output=[]
    cur_count=0
    cur_words=[]
    for word in words:
        cur_words.append(word)
        cur_count+=1
        if cur_count==num_words:
            output.append(' '.join(cur_words))
            cur_words=[]
            cur_count=0
    output.append(' '.join(cur_words))
    return output 
开发者ID:richieBao,项目名称:python-urbanPlanning,代码行数:16,代码来源:testingNLP.py

示例10: names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer( [(features(n), g) for (n, g) in train] )

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:46,代码来源:util.py

示例11: partial_names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll)/len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n'+'-'*40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:55,代码来源:util.py

示例12: names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = ([(name, 'male') for name in names.words('male.txt')] +
                [(name, 'female') for name in names.words('female.txt')])

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print 'Training classifier...'
    classifier = trainer( [(features(n), g) for (n,g) in train] )

    # Run the classifier on the test data.
    print 'Testing classifier...'
    acc = accuracy(classifier, [(features(n),g) for (n,g) in test])
    print 'Accuracy: %6.4f' % acc

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,g) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print 'Avg. log likelihood: %6.4f' % (sum(ll)/len(test))
        print
        print 'Unseen Names      P(Male)  P(Female)\n'+'-'*40
        for ((name, gender), pdist) in zip(test, pdists)[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print fmt % (name, pdist.prob('male'), pdist.prob('female'))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:blackye,项目名称:luscan-devel,代码行数:46,代码来源:util.py

示例13: partial_names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] \
        + [(name, False) for name in female_names[500:750]]

    random.shuffle(test)

    # Train up a classifier.
    print 'Training classifier...'
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print 'Testing classifier...'
    acc = accuracy(classifier, [(features(n),m) for (n,m) in test])
    print 'Accuracy: %6.4f' % acc

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n,m) in test]
        pdists = classifier.batch_prob_classify(test_featuresets)
        ll = [pdist.logprob(gold)
              for ((name, gold), pdist) in zip(test, pdists)]
        print 'Avg. log likelihood: %6.4f' % (sum(ll)/len(test))
        print
        print 'Unseen Names      P(Male)  P(Female)\n'+'-'*40
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print fmt % (name, pdist.prob(True), pdist.prob(False))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:blackye,项目名称:luscan-devel,代码行数:55,代码来源:util.py

示例14: names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    # Construct a list of classified names, using the names corpus.
    namelist = [(name, 'male') for name in names.words('male.txt')] + [
        (name, 'female') for name in names.words('female.txt')
    ]

    # Randomly split the names into a test & train set.
    random.seed(123456)
    random.shuffle(namelist)
    train = namelist[:5000]
    test = namelist[5000:5500]

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer([(features(n), g) for (n, g) in train])

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), g) for (n, g) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, g) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n' + '-' * 40)
        for ((name, gender), pdist) in list(zip(test, pdists))[:5]:
            if gender == 'male':
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob('male'), pdist.prob('female')))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:46,代码来源:util.py

示例15: partial_names_demo

# 需要导入模块: from nltk.corpus import names [as 别名]
# 或者: from nltk.corpus.names import words [as 别名]
def partial_names_demo(trainer, features=names_demo_features):
    from nltk.corpus import names
    import random

    male_names = names.words('male.txt')
    female_names = names.words('female.txt')

    random.seed(654321)
    random.shuffle(male_names)
    random.shuffle(female_names)

    # Create a list of male names to be used as positive-labeled examples for training
    positive = map(features, male_names[:2000])

    # Create a list of male and female names to be used as unlabeled examples
    unlabeled = map(features, male_names[2000:2500] + female_names[:500])

    # Create a test set with correctly-labeled male and female names
    test = [(name, True) for name in male_names[2500:2750]] + [
        (name, False) for name in female_names[500:750]
    ]

    random.shuffle(test)

    # Train up a classifier.
    print('Training classifier...')
    classifier = trainer(positive, unlabeled)

    # Run the classifier on the test data.
    print('Testing classifier...')
    acc = accuracy(classifier, [(features(n), m) for (n, m) in test])
    print('Accuracy: %6.4f' % acc)

    # For classifiers that can find probabilities, show the log
    # likelihood and some sample probability distributions.
    try:
        test_featuresets = [features(n) for (n, m) in test]
        pdists = classifier.prob_classify_many(test_featuresets)
        ll = [pdist.logprob(gold) for ((name, gold), pdist) in zip(test, pdists)]
        print('Avg. log likelihood: %6.4f' % (sum(ll) / len(test)))
        print()
        print('Unseen Names      P(Male)  P(Female)\n' + '-' * 40)
        for ((name, is_male), pdist) in zip(test, pdists)[:5]:
            if is_male == True:
                fmt = '  %-15s *%6.4f   %6.4f'
            else:
                fmt = '  %-15s  %6.4f  *%6.4f'
            print(fmt % (name, pdist.prob(True), pdist.prob(False)))
    except NotImplementedError:
        pass

    # Return the classifier
    return classifier 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:55,代码来源:util.py


注:本文中的nltk.corpus.names.words方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。