当前位置: 首页>>代码示例>>Python>>正文


Python FreqDist.get方法代码示例

本文整理汇总了Python中nltk.FreqDist.get方法的典型用法代码示例。如果您正苦于以下问题:Python FreqDist.get方法的具体用法?Python FreqDist.get怎么用?Python FreqDist.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.FreqDist的用法示例。


在下文中一共展示了FreqDist.get方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: find_abbreviations

# 需要导入模块: from nltk import FreqDist [as 别名]
# 或者: from nltk.FreqDist import get [as 别名]
def find_abbreviations():
    import db
    from tokenizers import es
    from nltk import FreqDist

    corpus = db.connect()
    #text = '\n'.join([a['text'] for a in corpus.articles.find().limit(10)])
    text = '\n'.join([a['text'] for a in corpus.articles.find()])
    tokens = es.tokenize(text, ignore_abbreviations=True)

    fd = FreqDist()
    fd_abbr = FreqDist()
    fd_n_abbr = FreqDist()
    n_tokens = len(tokens)
    for i in range(n_tokens):
        fd.inc(tokens[i])
        if i < (n_tokens - 1) and tokens[i + 1] == u'.':
            fd_abbr.inc(tokens[i])
        else:
            fd_n_abbr.inc(tokens[i])

    adjusted = {}
    f_avg = len(fd.keys()) / fd.N()
    for t, n in fd_abbr.iteritems():
        f = fd.get(t, 0) / fd.N()
        deviation = 1 + (f - f_avg)
        adjusted[t] = n * deviation / fd_n_abbr.get(t, 1) / len(t)

    items = adjusted.items()
    items.sort(key=lambda i: i[1], reverse=True)
    for t, n in items[:100]:
        print u'%s. %f (%d, %d)' % (t, n, fd_abbr[t], fd_n_abbr.get(t, 0))
开发者ID:nosamanuel,项目名称:nlp,代码行数:34,代码来源:punctuation.py

示例2: featureset

# 需要导入模块: from nltk import FreqDist [as 别名]
# 或者: from nltk.FreqDist import get [as 别名]
def featureset(sample):
  comment, label = sample
  features = {}
#  tags = map(lambda statement: map(lambda (w,t):t, statement), comment)
  words = map(lambda statement: map(lambda (w,t):w, statement), comment)
  words = sum(words, [])
#  tags = sum(tags, [])
  size_= sum([len(word) for word in words])
  features['stmt_len'] = len(words)/float(len(comment))
  features['word_len'] = size_/float(len(words))
  features['size'] = size_
#  tags_dist = FreqDist(sum(tags, []))
#  for tag in TAGS:
#    features[tag] = tags_dist.get(tag, 0)
  dist = FreqDist([word.lower() for word in words])
#  num_stop_words = float(sum([dist.get(word, 0) for word in EN_STOPWORDS]))
#  features['prob_stop_words'] = num_stop_words/len(words)
  for word in EN_STOPWORDS:
    features[word] = dist.get(word, 0)/float(len(words))
  features['alwayson'] = 1.0
  for language in LANGUAGES:
    for i in range(1,n+1):
      word_sim, tag_sim, char_sim, w_s_sim = comment_similarity(GRAMS[language], comment, i)
      features['w_sim_%d_%s' % (i, language)] = word_sim
      features['t_sim_%d_%s' % (i, language)] = tag_sim
      features['c_sim_%d_%s' % (i, language)] = char_sim
#     features['s_sim_%d_%s' % (i, language)] = w_s_sim
  return (features, label)
开发者ID:aboSamoor,项目名称:NLP,代码行数:30,代码来源:rami_learning.py

示例3: transfer

# 需要导入模块: from nltk import FreqDist [as 别名]
# 或者: from nltk.FreqDist import get [as 别名]
def transfer(fileDj,vocabulary):
    fo=open(fileDj,"r")
    content=fo.read()
    tokens=nltk.word_tokenize(content)
    # st=[SBStemmer.stem(t) for t in tokens]
    st=tokens
    fo.close()

    fdist=FreqDist(st)
    BOWDj = []
    for key in vocabulary:
        if key in fdist.keys():
            BOWDj.append(fdist.get(key))
        else:
            BOWDj.append(0)
    return BOWDj
开发者ID:tonyzhang1231,项目名称:Machine-Learning-CS6316-Assignments,代码行数:18,代码来源:loadData.py

示例4: doc_features

# 需要导入模块: from nltk import FreqDist [as 别名]
# 或者: from nltk.FreqDist import get [as 别名]
def doc_features(doc):
    doc_words = FreqDist(w for w in doc if not isStopWord(w))
    features = {}
    for word in word_features:
        features['count (%s)' % word] = (doc_words.get(word, 0))
    return features
开发者ID:denotepython,项目名称:pythonbook,代码行数:8,代码来源:sentiment.py


注:本文中的nltk.FreqDist.get方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。