当前位置: 首页>>代码示例>>Python>>正文


Python brown.categories函数代码示例

本文整理汇总了Python中nltk.corpus.brown.categories函数的典型用法代码示例。如果您正苦于以下问题:Python categories函数的具体用法?Python categories怎么用?Python categories使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了categories函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: print_brown

def print_brown():
    from nltk.corpus import brown
    print brown.categories()
    print brown.words(categories='news')
    print brown.words(fileids=['cg22'])
    print brown.sents(categories=['news','reviews'])
    news_text=brown.words(categories='news')
    fdist=nltk.FreqDist([w.lower() for w in news_text])
    modals=['can','could','may','might','must','will']
    for m in modals:
        print m+':',fdist[m]
开发者ID:Paul-Lin,项目名称:misc,代码行数:11,代码来源:toturial.py

示例2: get_training_test_sentences

    def get_training_test_sentences(self):
        brown_cats = ",".join(brown.categories())
        self.news_text = brown.words(categories= brown.categories())
        self.news_tagged_sentences = brown.tagged_sents(categories= brown.categories())

        size = int(len(self.news_tagged_sentences) * .9)
        brown_train = self.news_tagged_sentences[:size]
        brown_test = self.news_tagged_sentences[size:]

        self.train_sents = brown_train
        self.test_sents  = brown_test
开发者ID:TheFourMonkeysProject,项目名称:Alfred,代码行数:11,代码来源:trainers.py

示例3: build_all_brown

def build_all_brown(subset_size=None):
    documents = []
    categories = []

    all_categories = set()

    try:
        fileids = brown.fileids()

        for fileid in fileids:
            if subset_size:
                if len(all_categories) > subset_size:
                    break
            category = brown.categories(fileid)[0]
            words = [x.lower() for x in brown.words(fileid)]

            documents.append(words)
            categories.append(category)

            all_categories.add(category)

        if subset_size != len(brown.categories()):
            # exclude the final item, since it's the sole member of the next group
            documents = documents[:-1]
            categories = categories[:-1]

        documents = [" ".join(d) for d in documents]

    except LookupError:
        """ we don't have the Brown corpus via nltk on this machine """
        try:
            with open("brown_docs_cats.pickle") as f:
                documents, categories = pickle.load(f)
        except IOError:
            raise Exception("can't load Brown Corpus via NLTK or file")

    # documents = [' '.join(d) for d in documents]

    """
    # let's NOT get tempted to hide away the encoding
    # we'll probably need to access, e.g., the vectorizer, to do reverse
    # transformations once we want to interpret/evaluate the model

    doc_vectorizer = CountVectorizer()
    doc_vec = doc_vectorizer.fit_transform(documents)
    """

    return documents, categories
开发者ID:kinguistics,项目名称:naivebayes,代码行数:48,代码来源:brown_testing.py

示例4: import_brown_pos

def import_brown_pos(ds, simplify_tags=False, silent=False, log=sys.stdout):
    """
    Import the brown corpus into `ds`. E.g.
    
    >>> from nathan.core import Dataspace
    >>> ds = Dataspace()
    >>> %time brown.import_brown(ds, silent=True)
    CPU times: user 12min 28s, sys: 536 ms, total: 12min 29s
    Wall time: 12min 29s
    """
    if not silent:
        total = len(brown.sents())
        counter = 0
    for category in brown.categories():
        cat_handle = ds.insert("#%s" % category)
        for sent in brown.tagged_sents(categories=category):
            if simplify_tags:
                norm = (simplify_tag(t) for t in sent)
            norm = [nltk.tuple2str(t) for t in norm]
            sen_handle = ds.insert(norm)
            ds.link(cat_handle, sen_handle)
            if not silent:
                counter += 1
                if (counter % 100 == 0):
                    print("importing %s of %s sentences..." % (counter, total), 
                        file=log)
开发者ID:tdiggelm,项目名称:nltk-playground,代码行数:26,代码来源:train.py

示例5: ex11

def ex11():
  from nltk.corpus import brown
  modals = set(["can", "could", "may", "might", "shall", "should", "will", "would", "must", "ought"])
  cfd = nltk.ConditionalFreqDist(
    (genre, modal)
    for genre in brown.categories()
    for modal in [w.lower() for w in brown.words(categories=genre) if w.lower() in modals])
  cfd.plot()
开发者ID:447327642,项目名称:nltk-examples,代码行数:8,代码来源:ch02_ex.py

示例6: brown_diversity

def brown_diversity():
	"""calculate and display lexical diversity score (token/token_type) for each brown corpus category"""
	cfd = nltk.ConditionalFreqDist((category, word)
		for category in brown.categories()
		for word in brown.words(categories=category))
	print "{0:15s} {1:10s}".format("CATEGORY", "DIVERSITY")
	for category in cfd.conditions():
		print "{0:15s} {1:10f}".format(category, (cfd[category].N() * 1.0 / cfd[category].B()))
开发者ID:jyzhang,项目名称:py-nlp,代码行数:8,代码来源:ch2.py

示例7: fun08

def fun08():
    """fun08"""
    cfd = nltk.ConditionalFreqDist((genre, word) \
        for genre in brown.categories() \
        for word in brown.words(categories=genre))
    genres = ['news', 'religion', 'hobbies', 'science_fiction', 'romance', 'humor']
    modals = ['can', 'could', 'may', 'might', 'must', 'will']
    cfd.tabulate(conditions=genres, samples=modals)
开发者ID:gree2,项目名称:hobby,代码行数:8,代码来源:ch02.py

示例8: makeWordSet

def makeWordSet(args=None):
    '''Use the Brown corpus to see how many words used'''
    word_set = set()
    for cat in brown.categories():
        word_set = word_set.union(set(brown.words(categories=cat)))
    for cat in reuters.categories():
        word_set = word_set.union(set(reuters.words(categories=cat)))
    return word_set
开发者ID:divanshugarg,项目名称:Kaggle-Projects-Stuff,代码行数:8,代码来源:wordCheck.py

示例9: exercise_brown2

def exercise_brown2():
    """带条件的频率分布函数"""
    cfd = nltk.ConditionalFreqDist(
        (genre, word) for genre in brown.categories() for word in brown.words(categories=genre)
    )

    genres = ["news", "religion", "hobbies", "science_fiction", "romance", "humor"]
    modals = ["can", "could", "may", "might", "must", "will"]
    cfd.tabulate(conditions=genres, samples=modals)
开发者ID:BurnellLiu,项目名称:LiuProject,代码行数:9,代码来源:chapter_02.py

示例10: ex16

def ex16():
  from nltk.corpus import brown
  lex_div = {}
  for category in brown.categories():
    words = brown.words(categories=category)
    ld = len(words) / len(set(words))
    print category, ld
    lex_div[category] = ld
  print sorted(lex_div.iteritems(), key=operator.itemgetter(1))
开发者ID:447327642,项目名称:nltk-examples,代码行数:9,代码来源:ch02_ex.py

示例11: exercise_brown

def exercise_brown():
    # 打印布朗语料库中的分类
    print brown.categories()
    # 打印分类为新闻的文本词汇
    print brown.words(categories="news")
    # 打印文本'cg22'
    print brown.words(fileids=["cg22"])
    # 打印句子
    print brown.sents(categories=["news", "reviews"])

    """比较不同文体中的情态动词的用法"""
    # 获取文本
    news_text = brown.words(categories="news")
    # 单词定义频率
    fdist = nltk.FreqDist([w.lower() for w in news_text])
    # 定义情态动词表
    modals = ["can", "could", "may", "might", "must", "will"]
    for m in modals:
        print m + ":", fdist[m]
开发者ID:BurnellLiu,项目名称:LiuProject,代码行数:19,代码来源:chapter_02.py

示例12: print_modals

def print_modals():
    from nltk.corpus import brown
    cfd=nltk.ConditionalFreqDist(
        (genre,word)
        for genre in brown.categories()
        for word in brown.words(categories=genre)
    )
    genres=['news','religion','hobbies','science_fiction','romance','humor']
    modals=['can','could','may','might','must','will']
    cfd.tabulate(conditions=genres,samples=modals)
开发者ID:Paul-Lin,项目名称:misc,代码行数:10,代码来源:toturial.py

示例13: test_sentences

def test_sentences(categories=[]):
	"""returns a test sentence set: [[(word, tag), ..], [(word, tag), ..], ..]"""
	if len(categories) == 0:
		categories = brown.categories() # use all of the brown categories
	sents = []
	for category in categories:
		total = len(brown.tagged_sents(categories=category))
		start = int(TEST_PROPORTION * total) # use the last k sentences for test
		sents += brown.tagged_sents(categories=category, simplify_tags=True)[-start:-1]
	return sents
开发者ID:jyzhang,项目名称:py-nlp,代码行数:10,代码来源:pos.py

示例14: training_sentences

def training_sentences(use=1.0, categories=[]):
	"""returns a training sentence set: [[(word, tag), ..], [(word, tag), ..], ..]"""
	if len(categories) == 0:
		categories = brown.categories() # use all of the brown categories
	sents = []
	for category in categories:
		total = len(brown.tagged_sents(categories=category))
		max = int((1-TEST_PROPORTION) * use * total) - 1 # use the first n sentences for training
		sents += brown.tagged_sents(categories=category, simplify_tags=True)[0:max]
	return sents
开发者ID:jyzhang,项目名称:py-nlp,代码行数:10,代码来源:pos.py

示例15: init_corpus

def init_corpus():
    print 'init corpus.. ', 
    global categories, category_sentences
    categories = brown.categories()
    half_cat = int(len(categories) * 0.5)
    categories = categories[:half_cat]
    for category in categories:
        sents = brown.tagged_sents(categories = category)
        category_sentences[category] = sents
    print 'done'
开发者ID:haje01,项目名称:enser,代码行数:10,代码来源:application.py


注:本文中的nltk.corpus.brown.categories函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。