當前位置: 首頁>>代碼示例>>Python>>正文


Python brown.sents方法代碼示例

本文整理匯總了Python中nltk.corpus.brown.sents方法的典型用法代碼示例。如果您正苦於以下問題:Python brown.sents方法的具體用法?Python brown.sents怎麽用?Python brown.sents使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.corpus.brown的用法示例。


在下文中一共展示了brown.sents方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: demo

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def demo():
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print(s[i],'--', t[i])
        print() 
開發者ID:rafasashi,項目名稱:razzy-spinner,代碼行數:21,代碼來源:tnt.py

示例2: info_content

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def info_content(lookup_word):
    """
    Uses the Brown corpus available in NLTK to calculate a Laplace
    smoothed frequency distribution of words, then uses this information
    to compute the information content of the lookup_word.
    """
    global N
    if N == 0:
        # poor man's lazy evaluation
        for sent in brown.sents():
            for word in sent:
                word = word.lower()
                if not word in brown_freqs:
                    brown_freqs[word] = 0
                brown_freqs[word] = brown_freqs[word] + 1
                N = N + 1
    lookup_word = lookup_word.lower()
    n = 0 if not lookup_word in brown_freqs else brown_freqs[lookup_word]
    return 1.0 - (math.log(n + 1) / math.log(N + 1)) 
開發者ID:rgtjf,項目名稱:Semantic-Texual-Similarity-Toolkits,代碼行數:21,代碼來源:short_sentence_similarity.py

示例3: demo

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def demo():
    from nltk.tag import tnt
    from nltk.corpus import brown
    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = tnt.TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j+100]
        for i in range(len(s)):
            print s[i],'--', t[i]
        print 
開發者ID:blackye,項目名稱:luscan-devel,代碼行數:22,代碼來源:tnt.py

示例4: demo

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def demo():
    from nltk.corpus import brown

    sents = list(brown.tagged_sents())
    test = list(brown.sents())

    # create and train the tagger
    tagger = TnT()
    tagger.train(sents[200:1000])

    # tag some data
    tagged_data = tagger.tagdata(test[100:120])

    # print results
    for j in range(len(tagged_data)):
        s = tagged_data[j]
        t = sents[j + 100]
        for i in range(len(s)):
            print(s[i], '--', t[i])
        print() 
開發者ID:V1EngineeringInc,項目名稱:V1EngineeringInc-Docs,代碼行數:22,代碼來源:tnt.py

示例5: load_data

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def load_data(mode="train"):
    word2idx, idx2word = load_vocab()

    from nltk.corpus import brown
    sents = [" ".join(words) for words in brown.sents()]

    xs, ys = [], []
    for sent in sents:
        sent = re.sub(r"[^ A-Za-z']", "", sent)
        if hp.minlen <= len(sent) <= hp.maxlen:
            x, y = [], []
            for word in sent.split():
                for char in word:
                    x.append(word2idx[char])
                    y.append(0) # 0: no space
                y[-1] = 1 # space for end of a word
            y[-1] = 0 # no space for end of sentence

            xs.append(x + [0] * (hp.maxlen-len(x)))
            ys.append(y + [0] * (hp.maxlen-len(x)))

    # Convert to ndarrays
    X = np.array(xs, np.int32)
    Y = np.array(ys, np.int32)

    # mode
    if mode=="train":
        X, Y = X[: int(len(X) * .8)], Y[: int(len(Y) * .8)]
        # X, Y = X[: 128], Y[: 128]
    elif mode=="val":
        X, Y = X[int(len(X) * .8): -int(len(X) * .1)], Y[int(len(X) * .8): -int(len(X) * .1)]
    else:
        X, Y = X[-int(len(X) * .1):], Y[-int(len(X) * .1):]

    return X, Y 
開發者ID:Kyubyong,項目名稱:neural_tokenizer,代碼行數:37,代碼來源:data_load.py

示例6: test_clause

# 需要導入模塊: from nltk.corpus import brown [as 別名]
# 或者: from nltk.corpus.brown import sents [as 別名]
def test_clause():

  """                            
  """

  print ('Measuring time performance on # {} sentences over # {} iterations for recognizing Clause'.format(size, iteration_number))

  from nltk.corpus import brown
  brown_sents = brown.sents()[:size]
  import nltk
  global brown_pos_tag_sents
  brown_pos_tag_sents = [nltk.pos_tag(sentence) for sentence in brown_sents] 
  #print (brown_pos_tag_sents[0])


  # ----------------------------------------------------
  # nltk_parser 
  # ----------------------------------------------------
  analyzer_name='nltk_parser'
  

  times, averagetime, mintime = measure_time(nltk_parse_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime))


  # ----------------------------------------------------
  # pyrata 
  # ----------------------------------------------------
  analyzer_name='pyrata'

  global sentences_dict_list_list
  sentences_dict_list_list = []

  for s in brown_pos_tag_sents:
    sentences_dict_list_list.append([{'raw':w, 'pos':p} for (w, p) in s])
  # data -> sentences_dict_list_list
  #data = data[0]
  # flatten a list of list i.e. sentences of words becomes a text of words 
  # data = [val for sublist in data for val in sublist]
  #print (data[:10])
  #print ('len(data):', len(data))

  times, averagetime, mintime = measure_time(pyrata_recognize_clause_in_the_whole_text, iteration_number)
  grammar = "clause"
  print ('{}\t{}\t{}\t{}'.format(analyzer_name, grammar, averagetime, mintime)) 
開發者ID:nicolashernandez,項目名稱:PyRATA,代碼行數:48,代碼來源:do_benchmark.py


注:本文中的nltk.corpus.brown.sents方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。