Python language.Language方法代码示例

本文整理汇总了Python中spacy.language.Language方法的典型用法代码示例。如果您正苦于以下问题：Python language.Language方法的具体用法？Python language.Language怎么用？Python language.Language使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.language的用法示例。

在下文中一共展示了language.Language方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_from_path

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load_from_path(
    lang: str,
    path: str,
    meta: Optional[Dict] = {"description": "custom model"},
    **kwargs
) -> UDPipeLanguage:
    """Convenience function for initializing the Language class and loading
    a custom UDPipe model via the path argument.

    lang: ISO 639-1 language code or shorthand UDPipe model name.
    path: Path to the UDPipe model.
    meta: Optional meta-information about the UDPipe model.
    kwargs: Optional config parameters.
    RETURNS: The UDPipeLanguage object.
    """
    model = UDPipeModel(lang=lang, path=path, meta=meta)
    nlp = UDPipeLanguage(udpipe_model=model, meta=model._meta, **kwargs)
    return nlp

开发者ID:TakeLab，项目名称:spacy-udpipe，代码行数:20，代码来源:language.py

示例2: load_nlp

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load_nlp(vectors_loc, lang=None):
    if lang is None:
        nlp = Language()
    else:
        # create empty language class – this is required if you're planning to
        # save the model to disk and load it back later (models always need a
        # "lang" setting). Use 'xx' for blank multi-language class.
        nlp = spacy.blank(lang)
    with open(vectors_loc, 'rb') as file_:
        header = file_.readline()
        nr_row, nr_dim = header.split()
        nlp.vocab.reset_vectors(width=int(nr_dim))
        for line in file_:
            line = line.rstrip().decode('utf8')
            pieces = line.rsplit(' ', int(nr_dim))
            word = pieces[0]
            vector = numpy.asarray([float(v) for v in pieces[1:]], dtype='f')
            nlp.vocab.set_vector(word, vector)  # add the vectors to the vocab
    return nlp

开发者ID:sonvx，项目名称:word2vecVN，代码行数:21，代码来源:spacy-fastext.py

示例3: ensure_proper_language_model

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp: Optional['Language']) -> None:
        """Checks if the spacy language model is properly loaded.

        Raises an exception if the model is invalid."""

        if nlp is None:
            raise Exception("Failed to load spacy language model. "
                            "Loading the model returned 'None'.")
        if nlp.path is None:
            # Spacy sets the path to `None` if
            # it did not load the model from disk.
            # In this case `nlp` is an unusable stub.
            raise Exception("Failed to load spacy language model for "
                            "lang '{}'. Make sure you have downloaded the "
                            "correct model (https://spacy.io/docs/usage/)."
                            "".format(nlp.lang))

开发者ID:weizhenzhao，项目名称:rasa_nlu，代码行数:18，代码来源:spacy_utils.py

示例4: get_spacy_model

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def get_spacy_model(spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool) -> SpacyModelType:
    """
    In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
    keyed by the options we used to create the spacy model, so any particular configuration only
    gets loaded once.
    """

    options = (spacy_model_name, pos_tags, parse, ner)
    if options not in LOADED_SPACY_MODELS:
        disable = ['vectors', 'textcat']
        if not pos_tags:
            disable.append('tagger')
        if not parse:
            disable.append('parser')
        if not ner:
            disable.append('ner')
        try:
            spacy_model = spacy.load(spacy_model_name, disable=disable)
        except OSError:
            logger.warning(f"Spacy models '{spacy_model_name}' not found.  Downloading and installing.")
            spacy_download(spacy_model_name)
            spacy_model = spacy.load(spacy_model_name, disable=disable)

        LOADED_SPACY_MODELS[options] = spacy_model
    return LOADED_SPACY_MODELS[options]

开发者ID:jcyk，项目名称:gtos，代码行数:27，代码来源:file.py

示例5: count_frequencies

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def count_frequencies(language_class: Language, input_path: Path):
    """
    Given a file containing single documents per line
    (in this case, sentences for the ICLR case law corpus), split the text
    using a science specific tokenizer and compute word and
    document frequencies for all words.
    """
    print(f"Processing {input_path}.")
    nlp = English()
    #tokenizer = combined_rule_tokenizer(language_class())
    tokenizer = Tokenizer(nlp.vocab)
    counts = Counter()
    doc_counts = Counter()
    for line in tqdm.tqdm(open(input_path, "r")):
        words = [t.text for t in tokenizer(line)]
        counts.update(words)
        doc_counts.update(set(words))

    return counts, doc_counts

开发者ID:ICLRandD，项目名称:Blackstone，代码行数:21，代码来源:word_freqs.py

示例6: ensure_proper_language_model

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp: Optional["Language"]) -> None:
        """Checks if the spacy language model is properly loaded.

        Raises an exception if the model is invalid."""

        if nlp is None:
            raise Exception(
                "Failed to load spacy language model. "
                "Loading the model returned 'None'."
            )
        if nlp.path is None:
            # Spacy sets the path to `None` if
            # it did not load the model from disk.
            # In this case `nlp` is an unusable stub.
            raise Exception(
                "Failed to load spacy language model for "
                "lang '{}'. Make sure you have downloaded the "
                "correct model (https://spacy.io/docs/usage/)."
                "".format(nlp.lang)
            )

开发者ID:botfront，项目名称:rasa-for-botfront，代码行数:22，代码来源:spacy_utils.py

示例7: convert_to_flair_format

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def convert_to_flair_format(spacy_model: Language, data: List[Tuple[str, List[Offset]]]) -> List[str]:
    result: List[str] = list()
    for text, offsets in data:
        doc: Doc = spacy_model(text)
        # remove duplicated offsets
        offsets = normalize_offsets(offsets=offsets)
        offset_tuples = list(set([offset.to_tuple() for offset in offsets]))
        gold_annotations = GoldParse(doc, entities=offset_tuples)
        annotations: List[str] = gold_annotations.ner
        assert len(annotations) == len(doc)
        # Flair uses BIOES and Spacy BILUO
        # BILUO for Begin, Inside, Last, Unit, Out
        # BIOES for Begin, Inside, Outside, End, Single
        annotations = [a.replace('L-', 'E-') for a in annotations]
        annotations = [a.replace('U-', 'S-') for a in annotations]
        annotations = ["O" if a == "-" else a for a in annotations]  # replace unknown
        result += [f"{word} {tag}\n" for word, tag in zip(doc, annotations)]
        result.append('\n')
    return result

开发者ID:ELS-RD，项目名称:anonymisation，代码行数:21，代码来源:import_annotations.py

示例8: main

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def main(data_folder: str, output_folder: str, model_folder: str) -> None:
    nlp: Language = spacy.blank('fr')
    nlp.tokenizer = get_tokenizer(nlp)
    tokenizer = build_spacy_tokenizer(nlp)
    filenames = [filename for filename in os.listdir(data_folder) if filename.endswith(".txt")]
    tagger: SequenceTagger = SequenceTagger.load(os.path.join(model_folder, 'best-model.pt'))

    for filename in tqdm(iterable=filenames, unit=" txt", desc="anonymize cases"):
        with open(os.path.join(data_folder, filename), 'r') as input_f:
            sentences = tagger.predict(sentences=input_f.readlines(),
                                       mini_batch_size=32,
                                       verbose=False,
                                       use_tokenizer=tokenizer)
            case_name = filename.split('.')[0]
            page_html = render_ner_html(sentences, colors=colors, title=case_name)

            with open(os.path.join(output_folder, case_name + ".html"), "w") as output:
                output.write(page_html)

开发者ID:ELS-RD，项目名称:anonymisation，代码行数:20，代码来源:flair_generate_html_from_txt.py

示例9: count_frequencies

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def count_frequencies(language_class: Language, input_path: Path):
    """
    Given a file containing single documents per line
    (for scispacy, these are Pubmed abstracts), split the text
    using a science specific tokenizer and compute word and
    document frequencies for all words.
    """
    print(f"Processing {input_path}.")
    tokenizer = combined_rule_tokenizer(language_class())
    counts = Counter()
    doc_counts = Counter()
    for line in open(input_path, "r"):
        words = [t.text for t in tokenizer(line)]
        counts.update(words)
        doc_counts.update(set(words))

    return counts, doc_counts

开发者ID:allenai，项目名称:scispacy，代码行数:19，代码来源:count_word_frequencies.py

示例10: ensure_proper_language_model

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def ensure_proper_language_model(nlp):
        # type: (Optional[Language]) -> None
        """Checks if the spacy language model is properly loaded.

        Raises an exception if the model is invalid."""

        if nlp is None:
            raise Exception("Failed to load spacy language model. "
                            "Loading the model returned 'None'.")
        if nlp.path is None:
            # Spacy sets the path to `None` if
            # it did not load the model from disk.
            # In this case `nlp` is an unusable stub.
            raise Exception("Failed to load spacy language model for "
                            "lang '{}'. Make sure you have downloaded the "
                            "correct model (https://spacy.io/docs/usage/)."
                            "".format(nlp.lang))

开发者ID:crownpku，项目名称:Rasa_NLU_Chi，代码行数:19，代码来源:spacy_utils.py

示例11: test_neg_spacy_processor

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def test_neg_spacy_processor(self):
        spacy = Pipeline[DataPack]()
        spacy.set_reader(StringReader())

        config = {
            "processors": 'ner',
            "lang": "xx_ent_wiki_sm",
            # Language code for the language to build the Pipeline
            "use_gpu": False
        }
        spacy.add(SpacyProcessor(), config=config)
        spacy.initialize()

        sentences = ["This tool is called Forte.",
                     "The goal of this project to help you build NLP "
                     "pipelines.",
                     "NLP has never been made this easy before."]
        document = ' '.join(sentences)
        with self.assertRaises(ProcessExecutionException):
            _ = spacy.process(document)

开发者ID:asyml，项目名称:forte，代码行数:22，代码来源:spacy_processors_test.py

示例12: init

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def __init__(
        self,
        udpipe_model: UDPipeModel,
        meta: Optional[Dict] = None,
        **kwargs
    ):
        """Initialize the Language class.

        The language is called "udpipe_en" instead of "en" in order to
        avoid any potential conflicts with spaCy's built-in languages.
        Using entry points, this enables serializing and deserializing
        the language class and "lang": "udpipe_en" in the meta.json will
        automatically instantiate this class if this package is available.

        udpipe_model: The loaded UDPipe model.
        meta: spaCy model metadata.
        kwargs: Optional config parameters.
        """
        self.udpipe = udpipe_model
        self.Defaults = get_defaults(lang=udpipe_model._lang)
        self.lang = f"udpipe_{udpipe_model._lang}"
        ignore_tag_map = kwargs.get("ignore_tag_map", False)
        if ignore_tag_map:
            self.Defaults.tag_map = {}  # workaround for ValueError: [E167]
        self.vocab = self.Defaults.create_vocab()
        self.tokenizer = UDPipeTokenizer(model=self.udpipe, vocab=self.vocab)
        self.pipeline = []
        self.max_length = kwargs.get("max_length", 10 ** 6)
        self._meta = self.udpipe._meta if meta is None else dict(meta)
        self._path = None
        self._optimizer = None

开发者ID:TakeLab，项目名称:spacy-udpipe，代码行数:33，代码来源:language.py

示例13: load

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def load(lang: str, **kwargs) -> UDPipeLanguage:
    """Convenience function for initializing the Language class that
    mimicks spacy.load.

    lang: ISO 639-1 language code or shorthand UDPipe model name.
    kwargs: Optional config parameters.
    RETURNS: The UDPipeLanguage object.
    """
    model = UDPipeModel(lang=lang, path=None, meta=None)
    nlp = UDPipeLanguage(udpipe_model=model, meta=model._meta, **kwargs)
    return nlp

开发者ID:TakeLab，项目名称:spacy-udpipe，代码行数:13，代码来源:language.py

示例14: init

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def __init__(
        self, nlp: Language, input_id_col: str = "id", input_text_col: str = "text"
    ):
        """Initialize the SpacyExtractor pipeline.
        
        nlp (spacy.language.Language): pre-loaded spacy language model
        input_text_col (str): property on each document to run the model on
        input_id_col (str): property on each document to correlate with request

        RETURNS (EntityRecognizer): The newly constructed object.
        """
        self.nlp = nlp
        self.input_id_col = input_id_col
        self.input_text_col = input_text_col

开发者ID:microsoft，项目名称:cookiecutter-spacy-fastapi，代码行数:16，代码来源:spacy_extractor.py

示例15: get_spacy_model

# 需要导入模块: from spacy import language [as 别名]
# 或者: from spacy.language import Language [as 别名]
def get_spacy_model(
    spacy_model_name: str, pos_tags: bool, parse: bool, ner: bool
) -> SpacyModelType:
    """
    In order to avoid loading spacy models a whole bunch of times, we'll save references to them,
    keyed by the options we used to create the spacy model, so any particular configuration only
    gets loaded once.
    """

    options = (spacy_model_name, pos_tags, parse, ner)
    if options not in LOADED_SPACY_MODELS:
        disable = ["vectors", "textcat"]
        if not pos_tags:
            disable.append("tagger")
        if not parse:
            disable.append("parser")
        if not ner:
            disable.append("ner")
        try:
            spacy_model = spacy.load(spacy_model_name, disable=disable)
        except OSError:
            logger.warning(
                f"Spacy models '{spacy_model_name}' not found.  Downloading and installing."
            )
            spacy_download(spacy_model_name)

            # Import the downloaded model module directly and load from there
            spacy_model_module = __import__(spacy_model_name)
            spacy_model = spacy_model_module.load(disable=disable)  # type: ignore

        LOADED_SPACY_MODELS[options] = spacy_model
    return LOADED_SPACY_MODELS[options]

开发者ID:allenai，项目名称:allennlp，代码行数:34，代码来源:util.py

注：本文中的spacy.language.Language方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。