Python data.load方法代码示例

本文整理汇总了Python中nltk.data.load方法的典型用法代码示例。如果您正苦于以下问题：Python data.load方法的具体用法？Python data.load怎么用？Python data.load使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.data的用法示例。

在下文中一共展示了data.load方法的13个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: demo_sent_subjectivity

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def demo_sent_subjectivity(text):
    """
    Classify a single sentence as subjective or objective using a stored
    SentimentAnalyzer.

    :param text: a sentence whose subjectivity has to be classified.
    """
    from nltk.classify import NaiveBayesClassifier
    from nltk.tokenize import regexp
    word_tokenizer = regexp.WhitespaceTokenizer()
    try:
        sentim_analyzer = load('sa_subjectivity.pickle')
    except LookupError:
        print('Cannot find the sentiment analyzer you want to load.')
        print('Training a new one using NaiveBayesClassifier.')
        sentim_analyzer = demo_subjectivity(NaiveBayesClassifier.train, True)

    # Tokenize and convert to lower case
    tokenized_text = [word.lower() for word in word_tokenizer.tokenize(text)]
    print(sentim_analyzer.classify(tokenized_text))

开发者ID:sdoran35，项目名称:hate-to-hugs，代码行数:22，代码来源:util.py

示例2: _load_universal_map

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def _load_universal_map(fileid):
    contents = load(join(_UNIVERSAL_DATA, fileid+'.map'), format="text")

    # When mapping to the Universal Tagset,
    # map unknown inputs to 'X' not 'UNK'
    _MAPPINGS[fileid]['universal'].default_factory = lambda: 'X'

    for line in contents.splitlines():
        line = line.strip()
        if line == '':
            continue
        fine, coarse = line.split('\t')

        assert coarse in _UNIVERSAL_TAGS, 'Unexpected coarse tag: {}'.format(coarse)
        assert fine not in _MAPPINGS[fileid]['universal'], 'Multiple entries for original tag: {}'.format(fine)

        _MAPPINGS[fileid]['universal'][fine] = coarse

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:19，代码来源:mapping.py

示例3: pos_tag

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def pos_tag(tokens, tagset=None):
    """
    Use NLTK's currently recommended part of speech tagger to
    tag the given list of tokens.

        >>> from nltk.tag import pos_tag # doctest: +SKIP
        >>> from nltk.tokenize import word_tokenize # doctest: +SKIP
        >>> pos_tag(word_tokenize("John's big idea isn't all that bad.")) # doctest: +SKIP
        [('John', 'NNP'), ("'s", 'POS'), ('big', 'JJ'), ('idea', 'NN'), ('is',
        'VBZ'), ("n't", 'RB'), ('all', 'DT'), ('that', 'DT'), ('bad', 'JJ'),
        ('.', '.')]

    :param tokens: Sequence of tokens to be tagged
    :type tokens: list(str)
    :return: The tagged tokens
    :rtype: list(tuple(str, str))
    """
    tagger = load(_POS_TAGGER)
    if tagset:
        return [(token, map_tag('en-ptb', tagset, tag)) for (token, tag) in tagger.tag(tokens)]
    return tagger.tag(tokens)

开发者ID:EastonLee，项目名称:FancyWord，代码行数:23，代码来源:__init__.py

示例4: init

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def __init__(self, language):
        """
        :param str language: ISO 639-1 language code. See https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
        """
        self.language = language
        model = self.supported_models.get(language)
        if model:
            self.splitter = load(model)
        else:
            raise ValueError(
                "Invalid or unsupported language: '%s'. Please use one of the currently supported ones: %s" % (
                    language, self.supported_models.keys()))

开发者ID:Wikidata，项目名称:StrepHit，代码行数:14，代码来源:split_sentences.py

示例5: ne_chunk

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def ne_chunk(tagged_tokens, binary=False):
    """
    Use NLTK's currently recommended named entity chunker to
    chunk the given list of tagged tokens.
    """
    if binary:
        chunker_pickle = _BINARY_NE_CHUNKER
    else:
        chunker_pickle = _MULTICLASS_NE_CHUNKER
    chunker = load(chunker_pickle)
    return chunker.parse(tagged_tokens)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:13，代码来源:__init__.py

示例6: ne_chunk_sents

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def ne_chunk_sents(tagged_sentences, binary=False):
    """
    Use NLTK's currently recommended named entity chunker to chunk the
    given list of tagged sentences, each consisting of a list of tagged tokens.
    """
    if binary:
        chunker_pickle = _BINARY_NE_CHUNKER
    else:
        chunker_pickle = _MULTICLASS_NE_CHUNKER
    chunker = load(chunker_pickle)
    return chunker.parse_sents(tagged_sentences)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:13，代码来源:__init__.py

示例7: _format_tagset

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def _format_tagset(tagset, tagpattern=None):
    tagdict = load("help/tagsets/" + tagset + ".pickle")
    if not tagpattern:
        _print_entries(sorted(tagdict), tagdict)
    elif tagpattern in tagdict:
        _print_entries([tagpattern], tagdict)
    else:
        tagpattern = re.compile(tagpattern)
        tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
        if tags:
            _print_entries(tags, tagdict)
        else:
            print("No matching tags found.")

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:15，代码来源:help.py

示例8: read_rule

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def read_rule (self, filename):
        rules = load('nltk:stemmers/rslp/' + filename, format='raw').decode("utf8")
        lines = rules.split("\n")

        lines = [line for line in lines if line != ""]     # remove blank lines
        lines = [line for line in lines if line[0] != "#"]  # remove comments

        # NOTE: a simple but ugly hack to make this parser happy with double '\t's
        lines = [line.replace("\t\t", "\t") for line in lines]

        # parse rules
        rules = []
        for line in lines:
            rule = []
            tokens = line.split("\t")

            # text to be searched for at the end of the string
            rule.append( tokens[0][1:-1] ) # remove quotes

            # minimum stem size to perform the replacement
            rule.append( int(tokens[1]) )

            # text to be replaced into
            rule.append( tokens[2][1:-1] ) # remove quotes

            # exceptions to this rule
            rule.append( [token[1:-1] for token in tokens[3].split(",")] )

            # append to the results
            rules.append(rule)

        return rules

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:34，代码来源:rslp.py

示例9: load

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def load(self, path):
        '''Load the pickled model weights.'''
        self.weights = load(path)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:5，代码来源:perceptron.py

示例10: init

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def __init__(self, load=True):
        '''
        :param load: Load the pickled model upon instantiation.
        '''
        self.model = AveragedPerceptron()
        self.tagdict = {}
        self.classes = set()
        if load:
            AP_MODEL_LOC = str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
            self.load(AP_MODEL_LOC)

开发者ID:Thejas-1，项目名称:Price-Comparator，代码行数:12，代码来源:perceptron.py

示例11: init

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def __init__(self, load=True):
        '''
        :param load: Load the pickled model upon instantiation.
        '''
        self.model = AveragedPerceptron()
        self.tagdict = {}
        self.classes = set()
        if load:
            AP_MODEL_LOC = 'file:'+str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
            self.load(AP_MODEL_LOC)

开发者ID:SignalMedia，项目名称:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda，代码行数:12，代码来源:perceptron.py

示例12: sent_tokenize

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def sent_tokenize(text, language='english'):
    """
    Return a sentence-tokenized copy of *text*,
    using NLTK's recommended sentence tokenizer
    (currently :class:`.PunktSentenceTokenizer`
    for the specified language).

    :param text: text to split into sentences
    :param language: the model name in the Punkt corpus
    """
    tokenizer = load('tokenizers/punkt/{0}.pickle'.format(language))
    return tokenizer.tokenize(text)

# Standard word tokenizer.

开发者ID:EastonLee，项目名称:FancyWord，代码行数:16，代码来源:__init__.py

示例13: pos_tag_sents

# 需要导入模块: from nltk import data [as 别名]
# 或者: from nltk.data import load [as 别名]
def pos_tag_sents(sentences):
    """
    Use NLTK's currently recommended part of speech tagger to tag the
    given list of sentences, each consisting of a list of tokens.
    """
    tagger = load(_POS_TAGGER)
    return tagger.tag_sents(sentences)

开发者ID:EastonLee，项目名称:FancyWord，代码行数:9，代码来源:__init__.py

注：本文中的nltk.data.load方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。