当前位置: 首页>>代码示例>>Python>>正文


Python Article.sentence_stream方法代码示例

本文整理汇总了Python中article.Article.sentence_stream方法的典型用法代码示例。如果您正苦于以下问题:Python Article.sentence_stream方法的具体用法?Python Article.sentence_stream怎么用?Python Article.sentence_stream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在article.Article的用法示例。


在下文中一共展示了Article.sentence_stream方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: article_test

# 需要导入模块: from article import Article [as 别名]
# 或者: from article.Article import sentence_stream [as 别名]
 def article_test(session):
     sentence_stream = Article.sentence_stream(limit = TEST_SET)
     for sent in sentence_stream:
         txt = " ".join(t["x"] for t in sent if "x" in t)
         if txt:
             toklist = tokenize(txt, enclosing_session = session)
             dlist = tagger.tag(toklist)
             print("Sentence: '{0}'".format(txt))
             print("Tagging result:\n{0}".format("\n".join(str(d) for d in dlist)))
开发者ID:vthorsteinsson,项目名称:Reynir,代码行数:11,代码来源:tagtest.py

示例2: train_tagger

# 需要导入模块: from article import Article [as 别名]
# 或者: from article.Article import sentence_stream [as 别名]
def train_tagger():
    """ Train the TnT tagger and store its model in a pickle file """

    # Number of training and test sentences
    TRAINING_SET = 0 # 25000
    TEST_SET = 400
    BEAM_SIZE = 250 # A higher number does not seem to yield improved results

    tnt_tagger = TnT(N = BEAM_SIZE, C = True)
    if TRAINING_SET:
        with timeit(f"Train TnT tagger on {TRAINING_SET} sentences from articles"):
            # Get a sentence stream from parsed articles
            # Number of sentences, size of training set
            sentence_stream = Article.sentence_stream(limit = TRAINING_SET, skip = TEST_SET)
            word_tag_stream = IFD_Tagset.word_tag_stream(sentence_stream)
            tnt_tagger.train(word_tag_stream)
    with timeit(f"Train TnT tagger on IFD training set"):
        # Get a sentence stream from parsed articles
        # Number of sentences, size of training set
        sample_ratio = 50
        word_tag_stream = IFD_Corpus().word_tag_stream(skip = lambda n: n % sample_ratio == 0)
        tnt_tagger.train(word_tag_stream)
    with timeit(f"Store TnT model trained on {tnt_tagger.count} sentences"):
        tnt_tagger.store(_TNT_MODEL_FILE)
开发者ID:vthorsteinsson,项目名称:Reynir,代码行数:26,代码来源:trainer.py

示例3: test_tagger

# 需要导入模块: from article import Article [as 别名]
# 或者: from article.Article import sentence_stream [as 别名]
def test_tagger():

    print("Initializing tagger")

    # Number of training and test sentences
    TRAINING_SET = 500
    IFD_TRAINING_SET = 21000 # There are only about 20.800 sentences in the IFD corpus
    TEST_SET = 400
    BEAM_SIZE = 250 # A higher number does not seem to yield improved results

    # noinspection PyUnreachableCode
    if False:
        tnt_tagger = TnT(N = BEAM_SIZE, C = True)
        tagger = NgramTagger(n = 3, verbose = False)
        # Create a new model and store it
        with timeit("Train NgramTagger"):
            # Get a sentence stream from parsed articles
            # Number of sentences, size of training set
            sentence_stream = Article.sentence_stream(limit = TRAINING_SET, skip = TEST_SET)
            tagger.train(sentence_stream)
        with timeit("Train TnT_Tagger on articles"):
            # Get a sentence stream from parsed articles
            # Number of sentences, size of training set
            sentence_stream = Article.sentence_stream(limit = TRAINING_SET, skip = TEST_SET)
            word_tag_stream = IFD_Tagset.word_tag_stream(sentence_stream)
            tnt_tagger.train(word_tag_stream)
        with timeit("Train TnT_Tagger on IFD"):
            # Get a sentence stream from parsed articles
            # Number of sentences, size of training set
            word_tag_stream = IFD_Corpus().word_tag_stream(limit = IFD_TRAINING_SET, skip = TEST_SET)
            tnt_tagger.train(word_tag_stream)
        with timeit("Store TnT model"):
            tnt_tagger.store(_TNT_MODEL_FILE)
    else:
        tagger = None
        # Load an existing model
        with timeit("load_model()"):
            tnt_tagger = TnT.load(_TNT_MODEL_FILE)
            if tnt_tagger is None:
                print(f"Unable to load TnT model from {_TNT_MODEL_FILE}, test aborted")
                return
    #tagger.show_model()
    #return

    total_tags = 0
    correct_tag = 0
    partial_tag = 0
    missing_tag = 0
    correct_tag_tnt = 0
    partial_tag_tnt = 0
    missing_tag_tnt = 0


    def simple_test(session):
        txt = "Þau segja að börn hafi gott af því."
        toklist = tokenize(txt, enclosing_session = session)
        dlist = tagger.tag(toklist)
        print("Sentence: '{0}'".format(txt))
        print("Tagging result:\n{0}".format("\n".join(str(d) for d in dlist)))


    def article_test(session):
        sentence_stream = Article.sentence_stream(limit = TEST_SET)
        for sent in sentence_stream:
            txt = " ".join(t["x"] for t in sent if "x" in t)
            if txt:
                toklist = tokenize(txt, enclosing_session = session)
                dlist = tagger.tag(toklist)
                print("Sentence: '{0}'".format(txt))
                print("Tagging result:\n{0}".format("\n".join(str(d) for d in dlist)))


    def test_ifd_file(session):
        print("\n\n*** IFD TEST SET ***\n\n")
        gen = IFD_Corpus().raw_sentence_stream(limit = TEST_SET)
        dlist = None
        for sent in gen:
            orðalisti = [ triple[0] for triple in sent ]
            mörk_OTB = [ triple[1] for triple in sent ]
            lemmur_OTB = [ triple[2] for triple in sent ]
            txt = " ".join(orðalisti)
            if tagger is not None:
                toklist = tokenize(txt, enclosing_session = session)
                dlist = tagger.tag(toklist)
            tntlist = tnt_tagger.tag(orðalisti)
            ix = 0
            print("\n{0}\n".format(txt))
            for tag, lemma, word, tnt_wt in zip(mörk_OTB, lemmur_OTB, orðalisti, tntlist):
                tnt_tag = tnt_wt[1]
                j = ix
                if dlist is None:
                    gtag = "?"
                else:
                    while j < len(dlist) and dlist[j].get("x", "") != word:
                        j += 1
                    if j < len(dlist):
                        ix = j
                        gtag = dlist[ix].get("i", "?")
                        if gtag == "?" and dlist[ix].get("k") == TOK.PUNCTUATION:
                            gtag = word
#.........这里部分代码省略.........
开发者ID:vthorsteinsson,项目名称:Reynir,代码行数:103,代码来源:tagtest.py


注:本文中的article.Article.sentence_stream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。