本文整理汇总了Python中spacy.language.Language方法的典型用法代码示例。如果您正苦于以下问题:Python language.Language方法的具体用法?Python language.Language怎么用?Python language.Language使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.language
的用法示例。
在下文中一共展示了language.Language方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_from_path
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load_from_path(
lang: str,
path: str,
meta: Optional[Dict] = {"description": "custom model"},
**kwargs
) -> UDPipeLanguage:
"""Convenience function for initializing the Language class and loading
a custom UDPipe model via the path argument.
lang: ISO 639-1 language code or shorthand UDPipe model name.
path: Path to the UDPipe model.
meta: Optional meta-information about the UDPipe model.
kwargs: Optional config parameters.
RETURNS: The UDPipeLanguage object.
"""
model = UDPipeModel(lang=lang, path=path, meta=meta)
nlp = UDPipeLanguage(udpipe_model=model, meta=model._meta, **kwargs)
return nlp
示例2: load_nlp
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load_nlp(vectors_loc, lang=None):
if lang is None:
nlp = Language()
else:
# create empty language class – this is required if you're planning to
# save the model to disk and load it back later (models always need a
# "lang" setting). Use 'xx' for blank multi-language class.
nlp = spacy.blank(lang)
with open(vectors_loc, 'rb') as file_:
header = file_.readline()
nr_row, nr_dim = header.split()
nlp.vocab.reset_vectors(width=int(nr_dim))
for line in file_:
line = line.rstrip().decode('utf8')
pieces = line.rsplit(' ', int(nr_dim))
word = pieces[0]
vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
nlp.vocab.set_vector(word, vector) # add the vectors to the vocab
return nlp
示例3: ensure_proper_language_model
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp: Optional['Language']) -> None:
"""Checks if the spacy language model is properly loaded.
Raises an exception if the model is invalid."""
if nlp is None:
raise Exception("Failed to load spacy language model. "
"Loading the model returned 'None'.")
if nlp.path is None:
# Spacy sets the path to `None` if
# it did not load the model from disk.
# In this case `nlp` is an unusable stub.
raise Exception("Failed to load spacy language model for "
"lang '{}'. Make sure you have downloaded the "
"correct model (https://spacy.io/docs/usage/)."
"".format(nlp.lang))
示例4: get_spacy_model
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def get_spacy_model(spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool) -> SpacyModelType:
"""
In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
keyed by the options we used to create the spacy model, so any particular configuration only
gets loaded once.
"""
options = (spacy_model_name, pos_tags, parse, ner)
if options not in LOADED_SPACY_MODELS:
disable = ['vectors', 'textcat']
if not pos_tags:
disable.append('tagger')
if not parse:
disable.append('parser')
if not ner:
disable.append('ner')
try:
spacy_model = spacy.load(spacy_model_name, disable=disable)
except OSError:
logger.warning(f"Spacy models '{spacy_model_name}' not found. Downloading and installing.")
spacy_download(spacy_model_name)
spacy_model = spacy.load(spacy_model_name, disable=disable)
LOADED_SPACY_MODELS[options] = spacy_model
return LOADED_SPACY_MODELS[options]
示例5: count_frequencies
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def count_frequencies(language_class: Language, input_path: Path):
"""
Given a file containing single documents per line
(in this case, sentences for the ICLR case law corpus), split the text
using a science specific tokenizer and compute word and
document frequencies for all words.
"""
print(f"Processing {input_path}.")
nlp = English()
#tokenizer = combined_rule_tokenizer(language_class())
tokenizer = Tokenizer(nlp.vocab)
counts = Counter()
doc_counts = Counter()
for line in tqdm.tqdm(open(input_path, "r")):
words = [t.text for t in tokenizer(line)]
counts.update(words)
doc_counts.update(set(words))
return counts, doc_counts
示例6: ensure_proper_language_model
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp: Optional["Language"]) -> None:
"""Checks if the spacy language model is properly loaded.
Raises an exception if the model is invalid."""
if nlp is None:
raise Exception(
"Failed to load spacy language model. "
"Loading the model returned 'None'."
)
if nlp.path is None:
# Spacy sets the path to `None` if
# it did not load the model from disk.
# In this case `nlp` is an unusable stub.
raise Exception(
"Failed to load spacy language model for "
"lang '{}'. Make sure you have downloaded the "
"correct model (https://spacy.io/docs/usage/)."
"".format(nlp.lang)
)
示例7: convert_to_flair_format
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def convert_to_flair_format(spacy_model: Language, data: List[Tuple[str, List[Offset]]]) -> List[str]:
result: List[str] = list()
for text, offsets in data:
doc: Doc = spacy_model(text)
# remove duplicated offsets
offsets = normalize_offsets(offsets=offsets)
offset_tuples = list(set([offset.to_tuple() for offset in offsets]))
gold_annotations = GoldParse(doc, entities=offset_tuples)
annotations: List[str] = gold_annotations.ner
assert len(annotations) == len(doc)
# Flair uses BIOES and Spacy BILUO
# BILUO for Begin, Inside, Last, Unit, Out
# BIOES for Begin, Inside, Outside, End, Single
annotations = [a.replace('L-', 'E-') for a in annotations]
annotations = [a.replace('U-', 'S-') for a in annotations]
annotations = ["O" if a == "-" else a for a in annotations] # replace unknown
result += [f"{word} {tag}\n" for word, tag in zip(doc, annotations)]
result.append('\n')
return result
示例8: main
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def main(data_folder: str, output_folder: str, model_folder: str) -> None:
nlp: Language = spacy.blank('fr')
nlp.tokenizer = get_tokenizer(nlp)
tokenizer = build_spacy_tokenizer(nlp)
filenames = [filename for filename in os.listdir(data_folder) if filename.endswith(".txt")]
tagger: SequenceTagger = SequenceTagger.load(os.path.join(model_folder, 'best-model.pt'))
for filename in tqdm(iterable=filenames, unit=" txt", desc="anonymize cases"):
with open(os.path.join(data_folder, filename), 'r') as input_f:
sentences = tagger.predict(sentences=input_f.readlines(),
mini_batch_size=32,
verbose=False,
use_tokenizer=tokenizer)
case_name = filename.split('.')[0]
page_html = render_ner_html(sentences, colors=colors, title=case_name)
with open(os.path.join(output_folder, case_name + ".html"), "w") as output:
output.write(page_html)
示例9: count_frequencies
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def count_frequencies(language_class: Language, input_path: Path):
"""
Given a file containing single documents per line
(for scispacy, these are Pubmed abstracts), split the text
using a science specific tokenizer and compute word and
document frequencies for all words.
"""
print(f"Processing {input_path}.")
tokenizer = combined_rule_tokenizer(language_class())
counts = Counter()
doc_counts = Counter()
for line in open(input_path, "r"):
words = [t.text for t in tokenizer(line)]
counts.update(words)
doc_counts.update(set(words))
return counts, doc_counts
示例10: ensure_proper_language_model
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp):
# type: (Optional[Language]) -> None
"""Checks if the spacy language model is properly loaded.
Raises an exception if the model is invalid."""
if nlp is None:
raise Exception("Failed to load spacy language model. "
"Loading the model returned 'None'.")
if nlp.path is None:
# Spacy sets the path to `None` if
# it did not load the model from disk.
# In this case `nlp` is an unusable stub.
raise Exception("Failed to load spacy language model for "
"lang '{}'. Make sure you have downloaded the "
"correct model (https://spacy.io/docs/usage/)."
"".format(nlp.lang))
示例11: test_neg_spacy_processor
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def test_neg_spacy_processor(self):
spacy = Pipeline[DataPack]()
spacy.set_reader(StringReader())
config = {
"processors": 'ner',
"lang": "xx_ent_wiki_sm",
# Language code for the language to build the Pipeline
"use_gpu": False
}
spacy.add(SpacyProcessor(), config=config)
spacy.initialize()
sentences = ["This tool is called Forte.",
"The goal of this project to help you build NLP "
"pipelines.",
"NLP has never been made this easy before."]
document = ' '.join(sentences)
with self.assertRaises(ProcessExecutionException):
_ = spacy.process(document)
示例12: __init__
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def __init__(
self,
udpipe_model: UDPipeModel,
meta: Optional[Dict] = None,
**kwargs
):
"""Initialize the Language class.
The language is called "udpipe_en" instead of "en" in order to
avoid any potential conflicts with spaCy's built-in languages.
Using entry points, this enables serializing and deserializing
the language class and "lang": "udpipe_en" in the meta.json will
automatically instantiate this class if this package is available.
udpipe_model: The loaded UDPipe model.
meta: spaCy model metadata.
kwargs: Optional config parameters.
"""
self.udpipe = udpipe_model
self.Defaults = get_defaults(lang=udpipe_model._lang)
self.lang = f"udpipe_{udpipe_model._lang}"
ignore_tag_map = kwargs.get("ignore_tag_map", False)
if ignore_tag_map:
self.Defaults.tag_map = {} # workaround for ValueError: [E167]
self.vocab = self.Defaults.create_vocab()
self.tokenizer = UDPipeTokenizer(model=self.udpipe, vocab=self.vocab)
self.pipeline = []
self.max_length = kwargs.get("max_length", 10 ** 6)
self._meta = self.udpipe._meta if meta is None else dict(meta)
self._path = None
self._optimizer = None
示例13: load
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load(lang: str, **kwargs) -> UDPipeLanguage:
"""Convenience function for initializing the Language class that
mimicks spacy.load.
lang: ISO 639-1 language code or shorthand UDPipe model name.
kwargs: Optional config parameters.
RETURNS: The UDPipeLanguage object.
"""
model = UDPipeModel(lang=lang, path=None, meta=None)
nlp = UDPipeLanguage(udpipe_model=model, meta=model._meta, **kwargs)
return nlp
示例14: __init__
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def __init__(
self, nlp: Language, input_id_col: str = "id", input_text_col: str = "text"
):
"""Initialize the SpacyExtractor pipeline.
nlp (spacy.language.Language): pre-loaded spacy language model
input_text_col (str): property on each document to run the model on
input_id_col (str): property on each document to correlate with request
RETURNS (EntityRecognizer): The newly constructed object.
"""
self.nlp = nlp
self.input_id_col = input_id_col
self.input_text_col = input_text_col
示例15: get_spacy_model
# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def get_spacy_model(
spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool
) -> SpacyModelType:
"""
In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
keyed by the options we used to create the spacy model, so any particular configuration only
gets loaded once.
"""
options = (spacy_model_name, pos_tags, parse, ner)
if options not in LOADED_SPACY_MODELS:
disable = ["vectors", "textcat"]
if not pos_tags:
disable.append("tagger")
if not parse:
disable.append("parser")
if not ner:
disable.append("ner")
try:
spacy_model = spacy.load(spacy_model_name, disable=disable)
except OSError:
logger.warning(
f"Spacy models '{spacy_model_name}' not found. Downloading and installing."
)
spacy_download(spacy_model_name)
# Import the downloaded model module directly and load from there
spacy_model_module = __import__(spacy_model_name)
spacy_model = spacy_model_module.load(disable=disable) # type: ignore
LOADED_SPACY_MODELS[options] = spacy_model
return LOADED_SPACY_MODELS[options]