当前位置: 首页>>代码示例>>Python>>正文


Python data.Sentence方法代码示例

本文整理汇总了Python中flair.data.Sentence方法的典型用法代码示例。如果您正苦于以下问题:Python data.Sentence方法的具体用法?Python data.Sentence怎么用?Python data.Sentence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在flair.data的用法示例。


在下文中一共展示了data.Sentence方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_ngram

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def create_ngram(num_context, card, card_tag):
    #card = set_card()
    card_words_org = card.split()
    ngram_list = []
    #print(len(card_words_org))

    for i in range(0, len(card_words_org)):
        # Get the sliding window.
        lower_bound = i - num_context if i - num_context > 0 else 0
        upper_bound = i + num_context if i + num_context < len(card_words_org) else len(card_words_org) - 1
        new_word = card_words_org[lower_bound : upper_bound]
        print(new_word)

        # Join the window.
        new_string = " ".join(new_word)
        if new_string == "":
          new_string = " "
        ngram_list.append(Sentence(new_string))

    return ngram_list, card_words_org 
开发者ID:Hellisotherpeople,项目名称:CX_DB8,代码行数:22,代码来源:cx_db8_flair.py

示例2: predict

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def predict(self, texts: List[str]) -> np.array([float, ...]):
        "Generate an array of predicted scores using the Flair NLP library"
        from flair.data import Sentence
        labels, probs = [], []
        for text in tqdm(texts):
            # Iterate through text list and make predictions
            doc = Sentence(text)
            self.classifier.predict(doc, multi_class_prob=True)
            labels.append([x.value for x in doc.labels])
            probs.append([x.score for x in doc.labels])
        probs = np.array(probs)   # Convert probabilities to Numpy array

        # For each prediction, sort the probability scores in the same order for all texts
        result = []
        for label, prob in zip(labels, probs):
            order = np.argsort(np.array(label))
            result.append(prob[order])
        return np.array(result) 
开发者ID:prrao87,项目名称:fine-grained-sentiment,代码行数:20,代码来源:explainer.py

示例3: benchmark_flair_mdl

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def benchmark_flair_mdl():
    tagger = load_flair_ner_model()

    start = time.time()

    flair_sentences = []
    for i, sentence in enumerate(sentences_tokens):
        flair_sentence = Sentence()

        for token_txt in sentence:
            flair_sentence.add_token(Token(token_txt))
        flair_sentences.append(flair_sentence)

    tagger.predict(flair_sentences, verbose=True)
    predictions = [[tok.tags['ner'].value for tok in fs] for fs in flair_sentences]

    print("Made predictions on {} sentences and {} tokens in {}s".format(num_sentences, num_tokens, time.time() - start))

    assert len(predictions) == num_sentences

    print(classification_report(sentences_entities, remove_miscs(predictions), digits=4)) 
开发者ID:alexandrainst,项目名称:danlp,代码行数:23,代码来源:ner_benchmarks.py

示例4: test_flair_tagger

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def test_flair_tagger(self):
        # Download model beforehand
        download_model('flair.pos', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
        print("Downloaded the flair model")

        # Load the POS tagger using the DaNLP wrapper
        flair_model = load_flair_pos_model()

        # Using the flair POS tagger
        sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
        flair_model.predict(sentence)

        expected_string = "jeg <PRON> hopper <VERB> på <ADP> en <DET> bil <NOUN> som <ADP> er " \
                          "<AUX> rød <ADJ> sammen <ADV> med <ADP> Jens-Peter <PROPN> E. <PROPN> Hansen <PROPN>"

        self.assertEqual(sentence.to_tagged_string(), expected_string) 
开发者ID:alexandrainst,项目名称:danlp,代码行数:18,代码来源:test_flair_models.py

示例5: read_group_file

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def read_group_file(path_to_file, entities):
    sentences: List[Sentence] = []
    for line in open(path_to_file):
        sentence: Sentence = Sentence()
        labels_data, text = line.rstrip().split('\t')
        labels, tokens = data_to_bio(labels_data, text, entities)
        for label, token in zip(labels, tokens):
            token = Token(token)
            token.add_tag('ner', label)
            sentence.add_token(token)
        sentences.append(sentence)
    return sentences 
开发者ID:applicaai,项目名称:poleval-2018,代码行数:14,代码来源:corpora.py

示例6: tag_file

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def tag_file(input_name='data/test.tsv',
	         output_name='data/out.tsv',
	         models_pattern='data/models/*/best-model.pt'):
	taggers = list()
	for file in glob.glob(models_pattern):
		taggers.append(SequenceTagger.load_from_file(file))
	with open(input_name) as input, open(output_name, 'w') as output:
		for line in input:
			s = Sentence(line.rstrip())
			res = list()
			for tagger in taggers:
				tagger.predict(s)
				res += pop_results(s)
			output.write(' '.join(res) + '\n') 
开发者ID:applicaai,项目名称:poleval-2018,代码行数:16,代码来源:tag.py

示例7: _add_embeddings_internal

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
        for i, sentence in enumerate(sentences):
            for token, token_idx in zip(sentence.tokens, range(len(sentence.tokens))):
                token: Token = token
                if token.text in self.known_words:
                    word_embedding = self.precomputed_word_embeddings[token.text]
                elif token.text.lower() in self.known_words:
                    word_embedding = self.precomputed_word_embeddings[token.text.lower()]
                else:
                    word_embedding = self.precomputed_word_embeddings['<unk>']
                word_embedding = torch.FloatTensor(word_embedding)
                token.set_embedding(self.name, word_embedding)
        return sentences 
开发者ID:applicaai,项目名称:poleval-2018,代码行数:15,代码来源:embeddings.py

示例8: embed_sent

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def embed_sent(embeder, sent):
    sent = Sentence(' '.join(sent))
    embeder.embed(sent)
    return sent 
开发者ID:allanj,项目名称:ner_with_dependency,代码行数:6,代码来源:preflair.py

示例9: predict

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def predict(self, sentences):
        mentions = []
        for sent_idx, sent in enumerate(sentences):
            sent = Sentence(sent, use_tokenizer=True)
            self.model.predict(sent)
            sent_mentions = sent.to_dict(tag_type="ner")["entities"]
            for mention in sent_mentions:
                mention["sent_idx"] = sent_idx
            mentions.extend(sent_mentions)
        return {"sentences": sentences, "mentions": mentions} 
开发者ID:facebookresearch,项目名称:BLINK,代码行数:12,代码来源:ner.py

示例10: main

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def main(data_folder: str, model_folder: str, top_n: int) -> None:
    print(f"keep only top {top_n} examples per file")
    nlp: Language = spacy.blank('fr')
    nlp.tokenizer = get_tokenizer(nlp)
    tokenizer = build_spacy_tokenizer(nlp)
    filenames = [filename for filename in os.listdir(data_folder) if filename.endswith(".xml")]
    sentences: List[Sentence] = list()
    with tqdm(total=len(filenames), unit=" XML", desc="Parsing XML") as progress_bar:
        for filename in filenames:
            paragraphs: List[Paragraph] = get_paragraph_from_file(path=os.path.join(data_folder, filename),
                                                                  keep_paragraph_without_annotation=True)
            if len(paragraphs) > top_n:
                for paragraph in paragraphs[:top_n]:
                    if len(paragraph.text) > 0:
                        s = Sentence(text=paragraph.text, tokenizer=tokenizer)
                        sentences.append(s)
            progress_bar.update()
    if len(sentences) == 0:
        raise Exception("No example loaded, causes: no cases in provided path or sample size is to high")

    tagger: SequenceTagger = SequenceTagger.load(os.path.join(model_folder, 'best-model.pt'))
    _ = tagger.predict(sentences=sentences,
                       mini_batch_size=32,
                       verbose=True)

    print("prepare html")
    page_html = render_ner_html(sentences, colors=colors)
    print("write html")
    with open("sentence.html", "w") as writer:
        writer.write(page_html) 
开发者ID:ELS-RD,项目名称:anonymisation,代码行数:32,代码来源:flair_generate_html_from_xml.py

示例11: set_card

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def set_card(card_path=None):
    card = set_card_text(card_path)
    card_tag = input("Input the card_tag, or a -1 to summarize in-terms of the card itself: ")
    card = str(card)
    if str(card_tag) == "-1": #This will not work with large documents when bert is enabled
        card_tag = Sentence(str(card))
        tag_str = ""
    else:
        tag_str = str(card_tag)
        card_tag = Sentence(str(card_tag))
    return card, card_tag, tag_str 
开发者ID:Hellisotherpeople,项目名称:CX_DB8,代码行数:13,代码来源:cx_db8_flair.py

示例12: embed

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def embed(card_tag, card_as_sentence, card_words, card_words_org):
    stacked_embeddings.embed(card_tag)
    #stacked_embeddings.embed(card_as_sentence)
    #print(card_as_sentence.get_embedding().reshape(1,-1))
    word_list = []
    token_removed_ct = 0
    card_tag_emb = card_tag.get_embedding()
    if granularity_level == "Word":
        for word, count in zip(card_words_org, range(0, len(card_words_org))):
            n_gram_word = card_words[count]
            stacked_embeddings.embed(n_gram_word)
            n_gram_emb = n_gram_word.get_embedding()
            if graph:
                doc_embeddings.append(n_gram_emb.cpu().detach().numpy())
            word_sim = cos(card_tag_emb.reshape(1,-1), n_gram_emb.reshape(1, -1))
            word_tup = (card_words_org[count], word_sim) #card_words_org[count]
            word_list.append(word_tup)
        if graph:
            doc_embeddings.append(card_tag_emb.cpu().detach().numpy())
        print(len(word_list))
        print(len(card_words))
        print(len(card_words_org))
    else: 
        for sentence in card_as_sentence:
            set_obj = Sentence(sentence)
            stacked_embeddings.embed(set_obj)
            sentence_emb = set_obj.get_embedding()
            word_sim = cos(card_tag_emb.reshape(1,-1), sentence_emb.reshape(1, -1))
            sentence_tup = (sentence, word_sim)
            word_list.append(sentence_tup)
    return word_list 
开发者ID:Hellisotherpeople,项目名称:CX_DB8,代码行数:33,代码来源:cx_db8_flair.py

示例13: run_loop

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def run_loop(context, card, card_tag):
    list_of_sentences = []
    list_of_paragraphs = []
    if granularity_level == "Sent":
        for paragraph in segmenter.analyze(card):
            for sentence in paragraph: ## sentence level summarization
                set_str = ""
                for token in sentence:
                    set_str += token.spacing
                    set_str += token.value
                list_of_sentences.append(set_str)
        word_list = embed(card_tag, list_of_sentences, 0, 0)
    elif granularity_level == "Paragraph":
        for paragraph in segmenter.analyze(card):
            set_str = ""
            for sentence in paragraph: ## sentence level summarization
                #set_str = ""
                for token in sentence:
                    set_str += token.spacing
                    set_str += token.value
            list_of_paragraphs.append(set_str)
        word_list = embed(card_tag, list_of_paragraphs, 0, 0)
    elif granularity_level == "Word":
        card_as_sentence = Sentence(card)
        card_words, card_words_org = create_ngram(context, card, card_tag)
        word_list = embed(card_tag, card_as_sentence, card_words, card_words_org)
    #print(word_list)
    return word_list 
开发者ID:Hellisotherpeople,项目名称:CX_DB8,代码行数:30,代码来源:cx_db8_flair.py

示例14: score

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def score(self, text: str) -> int:
        from flair.data import Sentence
        doc = Sentence(text)
        self.model.predict(doc)
        pred = int(doc.labels[0].value)
        return pred 
开发者ID:prrao87,项目名称:fine-grained-sentiment,代码行数:8,代码来源:classifiers.py

示例15: forward

# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def forward(self, lines: List[Line]):
        sentences = []
        for line in lines:
            sentence = Sentence(line.text)
            sentences.append(sentence)

        len_tokens = [len(line.tokens[self.word_tokens_namespace]) for line in lines]
        max_len = max(len_tokens)

        _ = self.embedder_forward.embed(sentences)
        _ = self.embedder_backward.embed(sentences)

        batch_embeddings = []
        for sentence in sentences:
            sentence_embeddings = []
            padding_length = max_len - len(sentence)
            for token in sentence:
                embedding = token.get_embedding()
                embedding = embedding.to(self.device)
                sentence_embeddings.append(embedding)
            for i in range(padding_length):
                embedding = torch.randn(
                    self.get_embedding_dimension(),
                    dtype=torch.float,
                    device=self.device,
                )
                sentence_embeddings.append(embedding)

            sentence_embeddings = torch.stack(sentence_embeddings)
            batch_embeddings.append(sentence_embeddings)

        # batch_size, num_tokens, embedding_dim
        batch_embeddings = torch.stack(batch_embeddings)
        batch_embeddings = batch_embeddings.to(self.device)

        for idx, line in enumerate(lines):
            line_embeddings = batch_embeddings[idx]
            for token, emb in zip(
                line.tokens[self.word_tokens_namespace], line_embeddings
            ):
                token.set_embedding(name=self.embedder_name, value=emb)

        return batch_embeddings 
开发者ID:abhinavkashyap,项目名称:sciwing,代码行数:45,代码来源:flair_embedder.py


注:本文中的flair.data.Sentence方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。