本文整理汇总了Python中flair.data.Sentence方法的典型用法代码示例。如果您正苦于以下问题:Python data.Sentence方法的具体用法?Python data.Sentence怎么用?Python data.Sentence使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类flair.data
的用法示例。
在下文中一共展示了data.Sentence方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_ngram
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def create_ngram(num_context, card, card_tag):
#card = set_card()
card_words_org = card.split()
ngram_list = []
#print(len(card_words_org))
for i in range(0, len(card_words_org)):
# Get the sliding window.
lower_bound = i - num_context if i - num_context > 0 else 0
upper_bound = i + num_context if i + num_context < len(card_words_org) else len(card_words_org) - 1
new_word = card_words_org[lower_bound : upper_bound]
print(new_word)
# Join the window.
new_string = " ".join(new_word)
if new_string == "":
new_string = " "
ngram_list.append(Sentence(new_string))
return ngram_list, card_words_org
示例2: predict
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def predict(self, texts: List[str]) -> np.array([float, ...]):
"Generate an array of predicted scores using the Flair NLP library"
from flair.data import Sentence
labels, probs = [], []
for text in tqdm(texts):
# Iterate through text list and make predictions
doc = Sentence(text)
self.classifier.predict(doc, multi_class_prob=True)
labels.append([x.value for x in doc.labels])
probs.append([x.score for x in doc.labels])
probs = np.array(probs) # Convert probabilities to Numpy array
# For each prediction, sort the probability scores in the same order for all texts
result = []
for label, prob in zip(labels, probs):
order = np.argsort(np.array(label))
result.append(prob[order])
return np.array(result)
示例3: benchmark_flair_mdl
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def benchmark_flair_mdl():
tagger = load_flair_ner_model()
start = time.time()
flair_sentences = []
for i, sentence in enumerate(sentences_tokens):
flair_sentence = Sentence()
for token_txt in sentence:
flair_sentence.add_token(Token(token_txt))
flair_sentences.append(flair_sentence)
tagger.predict(flair_sentences, verbose=True)
predictions = [[tok.tags['ner'].value for tok in fs] for fs in flair_sentences]
print("Made predictions on {} sentences and {} tokens in {}s".format(num_sentences, num_tokens, time.time() - start))
assert len(predictions) == num_sentences
print(classification_report(sentences_entities, remove_miscs(predictions), digits=4))
示例4: test_flair_tagger
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def test_flair_tagger(self):
# Download model beforehand
download_model('flair.pos', DEFAULT_CACHE_DIR, process_func=_unzip_process_func, verbose=True)
print("Downloaded the flair model")
# Load the POS tagger using the DaNLP wrapper
flair_model = load_flair_pos_model()
# Using the flair POS tagger
sentence = Sentence('jeg hopper på en bil som er rød sammen med Jens-Peter E. Hansen')
flair_model.predict(sentence)
expected_string = "jeg <PRON> hopper <VERB> på <ADP> en <DET> bil <NOUN> som <ADP> er " \
"<AUX> rød <ADJ> sammen <ADV> med <ADP> Jens-Peter <PROPN> E. <PROPN> Hansen <PROPN>"
self.assertEqual(sentence.to_tagged_string(), expected_string)
示例5: read_group_file
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def read_group_file(path_to_file, entities):
sentences: List[Sentence] = []
for line in open(path_to_file):
sentence: Sentence = Sentence()
labels_data, text = line.rstrip().split('\t')
labels, tokens = data_to_bio(labels_data, text, entities)
for label, token in zip(labels, tokens):
token = Token(token)
token.add_tag('ner', label)
sentence.add_token(token)
sentences.append(sentence)
return sentences
示例6: tag_file
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def tag_file(input_name='data/test.tsv',
output_name='data/out.tsv',
models_pattern='data/models/*/best-model.pt'):
taggers = list()
for file in glob.glob(models_pattern):
taggers.append(SequenceTagger.load_from_file(file))
with open(input_name) as input, open(output_name, 'w') as output:
for line in input:
s = Sentence(line.rstrip())
res = list()
for tagger in taggers:
tagger.predict(s)
res += pop_results(s)
output.write(' '.join(res) + '\n')
示例7: _add_embeddings_internal
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def _add_embeddings_internal(self, sentences: List[Sentence]) -> List[Sentence]:
for i, sentence in enumerate(sentences):
for token, token_idx in zip(sentence.tokens, range(len(sentence.tokens))):
token: Token = token
if token.text in self.known_words:
word_embedding = self.precomputed_word_embeddings[token.text]
elif token.text.lower() in self.known_words:
word_embedding = self.precomputed_word_embeddings[token.text.lower()]
else:
word_embedding = self.precomputed_word_embeddings['<unk>']
word_embedding = torch.FloatTensor(word_embedding)
token.set_embedding(self.name, word_embedding)
return sentences
示例8: embed_sent
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def embed_sent(embeder, sent):
sent = Sentence(' '.join(sent))
embeder.embed(sent)
return sent
示例9: predict
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def predict(self, sentences):
mentions = []
for sent_idx, sent in enumerate(sentences):
sent = Sentence(sent, use_tokenizer=True)
self.model.predict(sent)
sent_mentions = sent.to_dict(tag_type="ner")["entities"]
for mention in sent_mentions:
mention["sent_idx"] = sent_idx
mentions.extend(sent_mentions)
return {"sentences": sentences, "mentions": mentions}
示例10: main
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def main(data_folder: str, model_folder: str, top_n: int) -> None:
print(f"keep only top {top_n} examples per file")
nlp: Language = spacy.blank('fr')
nlp.tokenizer = get_tokenizer(nlp)
tokenizer = build_spacy_tokenizer(nlp)
filenames = [filename for filename in os.listdir(data_folder) if filename.endswith(".xml")]
sentences: List[Sentence] = list()
with tqdm(total=len(filenames), unit=" XML", desc="Parsing XML") as progress_bar:
for filename in filenames:
paragraphs: List[Paragraph] = get_paragraph_from_file(path=os.path.join(data_folder, filename),
keep_paragraph_without_annotation=True)
if len(paragraphs) > top_n:
for paragraph in paragraphs[:top_n]:
if len(paragraph.text) > 0:
s = Sentence(text=paragraph.text, tokenizer=tokenizer)
sentences.append(s)
progress_bar.update()
if len(sentences) == 0:
raise Exception("No example loaded, causes: no cases in provided path or sample size is to high")
tagger: SequenceTagger = SequenceTagger.load(os.path.join(model_folder, 'best-model.pt'))
_ = tagger.predict(sentences=sentences,
mini_batch_size=32,
verbose=True)
print("prepare html")
page_html = render_ner_html(sentences, colors=colors)
print("write html")
with open("sentence.html", "w") as writer:
writer.write(page_html)
示例11: set_card
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def set_card(card_path=None):
card = set_card_text(card_path)
card_tag = input("Input the card_tag, or a -1 to summarize in-terms of the card itself: ")
card = str(card)
if str(card_tag) == "-1": #This will not work with large documents when bert is enabled
card_tag = Sentence(str(card))
tag_str = ""
else:
tag_str = str(card_tag)
card_tag = Sentence(str(card_tag))
return card, card_tag, tag_str
示例12: embed
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def embed(card_tag, card_as_sentence, card_words, card_words_org):
stacked_embeddings.embed(card_tag)
#stacked_embeddings.embed(card_as_sentence)
#print(card_as_sentence.get_embedding().reshape(1,-1))
word_list = []
token_removed_ct = 0
card_tag_emb = card_tag.get_embedding()
if granularity_level == "Word":
for word, count in zip(card_words_org, range(0, len(card_words_org))):
n_gram_word = card_words[count]
stacked_embeddings.embed(n_gram_word)
n_gram_emb = n_gram_word.get_embedding()
if graph:
doc_embeddings.append(n_gram_emb.cpu().detach().numpy())
word_sim = cos(card_tag_emb.reshape(1,-1), n_gram_emb.reshape(1, -1))
word_tup = (card_words_org[count], word_sim) #card_words_org[count]
word_list.append(word_tup)
if graph:
doc_embeddings.append(card_tag_emb.cpu().detach().numpy())
print(len(word_list))
print(len(card_words))
print(len(card_words_org))
else:
for sentence in card_as_sentence:
set_obj = Sentence(sentence)
stacked_embeddings.embed(set_obj)
sentence_emb = set_obj.get_embedding()
word_sim = cos(card_tag_emb.reshape(1,-1), sentence_emb.reshape(1, -1))
sentence_tup = (sentence, word_sim)
word_list.append(sentence_tup)
return word_list
示例13: run_loop
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def run_loop(context, card, card_tag):
list_of_sentences = []
list_of_paragraphs = []
if granularity_level == "Sent":
for paragraph in segmenter.analyze(card):
for sentence in paragraph: ## sentence level summarization
set_str = ""
for token in sentence:
set_str += token.spacing
set_str += token.value
list_of_sentences.append(set_str)
word_list = embed(card_tag, list_of_sentences, 0, 0)
elif granularity_level == "Paragraph":
for paragraph in segmenter.analyze(card):
set_str = ""
for sentence in paragraph: ## sentence level summarization
#set_str = ""
for token in sentence:
set_str += token.spacing
set_str += token.value
list_of_paragraphs.append(set_str)
word_list = embed(card_tag, list_of_paragraphs, 0, 0)
elif granularity_level == "Word":
card_as_sentence = Sentence(card)
card_words, card_words_org = create_ngram(context, card, card_tag)
word_list = embed(card_tag, card_as_sentence, card_words, card_words_org)
#print(word_list)
return word_list
示例14: score
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def score(self, text: str) -> int:
from flair.data import Sentence
doc = Sentence(text)
self.model.predict(doc)
pred = int(doc.labels[0].value)
return pred
示例15: forward
# 需要导入模块: from flair import data [as 别名]
# 或者: from flair.data import Sentence [as 别名]
def forward(self, lines: List[Line]):
sentences = []
for line in lines:
sentence = Sentence(line.text)
sentences.append(sentence)
len_tokens = [len(line.tokens[self.word_tokens_namespace]) for line in lines]
max_len = max(len_tokens)
_ = self.embedder_forward.embed(sentences)
_ = self.embedder_backward.embed(sentences)
batch_embeddings = []
for sentence in sentences:
sentence_embeddings = []
padding_length = max_len - len(sentence)
for token in sentence:
embedding = token.get_embedding()
embedding = embedding.to(self.device)
sentence_embeddings.append(embedding)
for i in range(padding_length):
embedding = torch.randn(
self.get_embedding_dimension(),
dtype=torch.float,
device=self.device,
)
sentence_embeddings.append(embedding)
sentence_embeddings = torch.stack(sentence_embeddings)
batch_embeddings.append(sentence_embeddings)
# batch_size, num_tokens, embedding_dim
batch_embeddings = torch.stack(batch_embeddings)
batch_embeddings = batch_embeddings.to(self.device)
for idx, line in enumerate(lines):
line_embeddings = batch_embeddings[idx]
for token, emb in zip(
line.tokens[self.word_tokens_namespace], line_embeddings
):
token.set_embedding(name=self.embedder_name, value=emb)
return batch_embeddings