当前位置: 首页>>代码示例>>Python>>正文


Python brown.sents方法代码示例

本文整理汇总了Python中nltk.corpus.brown.sents方法的典型用法代码示例。如果您正苦于以下问题:Python brown.sents方法的具体用法?Python brown.sents怎么用?Python brown.sents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.corpus.brown的用法示例。


在下文中一共展示了brown.sents方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: demo

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print(s[i],'--', t[i])
        print() 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:21,代码来源:tnt.py

示例2: info_content

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def info_content(lookup_word):
    """
    Uses the Brown corpus available in NLTK to calculate a Laplace
    smoothed frequency distribution of words, then uses this information
    to compute the information content of the lookup_word.
    """
    global N
    if N == 0:
        # poor man's lazy evaluation
        for sent in brown.sents():
            for word in sent:
                word = word.lower()
                if not word in brown_freqs:
                    brown_freqs[word] = 0
                brown_freqs[word] = brown_freqs[word] + 1
                N = N + 1
    lookup_word = lookup_word.lower()
    n = 0 if not lookup_word in brown_freqs else brown_freqs[lookup_word]
    return 1.0 - (math.log(n + 1) / math.log(N + 1)) 
开发者ID:rgtjf,项目名称:Semantic-Texual-Similarity-Toolkits,代码行数:21,代码来源:short_sentence_similarity.py

示例3: demo

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
    from nltk.tag import tnt
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = tnt.TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print s[i],'--', t[i]
        print 
开发者ID:blackye,项目名称:luscan-devel,代码行数:22,代码来源:tnt.py

示例4: demo

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def demo():
    from nltk.corpus import brown

    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j + 100]
        for i in range(len(s)):
            print(s[i], '--', t[i])
        print() 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:22,代码来源:tnt.py

示例5: load_data

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def load_data(mode="train"):
    word2idx, idx2word = load_vocab()

    from nltk.corpus import brown
    sents = [" ".join(words) for words in brown.sents()]

    xs, ys = [], []
    for sent in sents:
        sent = re.sub(r"[^ A-Za-z']", "", sent)
        if hp.minlen <= len(sent) <= hp.maxlen:
            x, y = [], []
            for word in sent.split():
                for char in word:
                    x.append(word2idx[char])
                    y.append(0) # 0: no space
                y[-1] = 1 # space for end of a word
            y[-1] = 0 # no space for end of sentence

            xs.append(x + [0] * (hp.maxlen-len(x)))
            ys.append(y + [0] * (hp.maxlen-len(x)))

    # Convert to ndarrays
    X = np.array(xs, np.int32)
    Y = np.array(ys, np.int32)

    # mode
    if mode=="train":
        X, Y = X[: int(len(X) * .8)], Y[: int(len(Y) * .8)]
        # X, Y = X[: 128], Y[: 128]
    elif mode=="val":
        X, Y = X[int(len(X) * .8): -int(len(X) * .1)], Y[int(len(X) * .8): -int(len(X) * .1)]
    else:
        X, Y = X[-int(len(X) * .1):], Y[-int(len(X) * .1):]

    return X, Y 
开发者ID:Kyubyong,项目名称:neural_tokenizer,代码行数:37,代码来源:data_load.py

示例6: test_clause

# 需要导入模块: from nltk.corpus import brown [as 别名]
# 或者: from nltk.corpus.brown import sents [as 别名]
def test_clause():

  """                            
  """

  print ('Measuring time performance on # {} sentences over # {} iterations for recognizing Clause'.format(size, iteration_number))

  from nltk.corpus import brown
  brown_sents = brown.sents()[:size]
  import nltk
  global brown_pos_tag_sents
  brown_pos_tag_sents = [nltk.pos_tag(sentence) for sentence in brown_sents] 
  #print (brown_pos_tag_sents[0])


  # ----------------------------------------------------
  # nltk_parser 
  # ----------------------------------------------------
  analyzer_name='nltk_parser'
  

  times, averagetime, mintime = measure_time(nltk_parse_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))


  # ----------------------------------------------------
  # pyrata 
  # ----------------------------------------------------
  analyzer_name='pyrata'

  global sentences_dict_list_list
  sentences_dict_list_list = []

  for s in brown_pos_tag_sents:
    sentences_dict_list_list.append([{'raw':w, 'pos':p} for (w, p) in s])
  # data -> sentences_dict_list_list
  #data = data[0]
  # flatten a list of list i.e. sentences of words becomes a text of words 
  # data = [val for sublist in data for val in sublist]
  #print (data[:10])
  #print ('len(data):', len(data))

  times, averagetime, mintime = measure_time(pyrata_recognize_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime)) 
开发者ID:nicolashernandez,项目名称:PyRATA,代码行数:48,代码来源:do_benchmark.py


注:本文中的nltk.corpus.brown.sents方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。