當前位置: 首頁>>代碼示例>>Python>>正文


Python data.find方法代碼示例

本文整理匯總了Python中nltk.data.find方法的典型用法代碼示例。如果您正苦於以下問題:Python data.find方法的具體用法?Python data.find怎麽用?Python data.find使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在nltk.data的用法示例。


在下文中一共展示了data.find方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_vocabulary_martin_mode

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def test_vocabulary_martin_mode(self):
        """Tests all words from the test vocabulary provided by M Porter
        
        The sample vocabulary and output were sourced from:
            http://tartarus.org/martin/PorterStemmer/voc.txt
            http://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at
            http://tartarus.org/martin/PorterStemmer/
        """
        self._test_against_expected_output(
            PorterStemmer.MARTIN_EXTENSIONS,
            data.find('stemmers/porter_test/porter_martin_output.txt')
                .open(encoding='utf-8')
                .read()
                .splitlines()
        ) 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:19,代碼來源:test_stem.py

示例2: __init__

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def __init__(self):
        """
        Contains various synset related functions.
        """
        try:
            data.find(os.path.join("corpora", "wordnet"))
        except LookupError:
            download("wordnet")

        self.API = ImageNetAPI() 
開發者ID:zevisert,項目名稱:Imagyn,代碼行數:12,代碼來源:lexicon.py

示例3: build_model

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [find('corpora/ace_data/ace.dev'),
                   find('corpora/ace_data/ace.heldout'),
                   find('corpora/ace_data/bbn.dev'),
                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3: cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = '/tmp/ne_chunker_%s.pickle' % fmt
    print('Saving chunker to %s...' % outfilename)

    with open(outfilename, 'wb') as outfile:
        pickle.dump(cp, outfile, -1)

    return cp 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:34,代碼來源:named_entity.py

示例4: _chunk_parse

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def _chunk_parse(self, grammar=None, root_label='record', trace=0, **kwargs):
        """
        Returns an element tree structure corresponding to a toolbox data file
        parsed according to the chunk grammar.

        :type grammar: str
        :param grammar: Contains the chunking rules used to parse the
            database.  See ``chunk.RegExp`` for documentation.
        :type root_label: str
        :param root_label: The node value that should be used for the
            top node of the chunk structure.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            ``1`` will generate normal tracing output; and ``2`` or
            higher will generate verbose tracing output.
        :type kwargs: dict
        :param kwargs: Keyword arguments passed to ``toolbox.StandardFormat.fields()``
        :rtype: ElementTree._ElementInterface
        """
        from nltk import chunk
        from nltk.tree import Tree

        cp = chunk.RegexpParser(grammar, root_label=root_label, trace=trace)
        db = self.parse(**kwargs)
        tb_etree = Element('toolbox_data')
        header = db.find('header')
        tb_etree.append(header)
        for record in db.findall('record'):
            parsed = cp.parse([(elem.text, elem.tag) for elem in record])
            tb_etree.append(self._tree2etree(parsed))
        return tb_etree 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:34,代碼來源:toolbox.py

示例5: add_default_fields

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def add_default_fields(elem, default_fields):
    """
    Add blank elements and subelements specified in default_fields.

    :param elem: toolbox data in an elementtree structure
    :type elem: ElementTree._ElementInterface
    :param default_fields: fields to add to each type of element and subelement
    :type default_fields: dict(tuple)
    """
    for field in default_fields.get(elem.tag,  []):
        if elem.find(field) is None:
            SubElement(elem, field)
    for child in elem:
        add_default_fields(child, default_fields) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:16,代碼來源:toolbox.py

示例6: demo

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def demo():
    from itertools import islice

#    zip_path = find('corpora/toolbox.zip')
#    lexicon = ToolboxData(ZipFilePathPointer(zip_path, 'toolbox/rotokas.dic')).parse()
    file_path = find('corpora/toolbox/rotokas.dic')
    lexicon = ToolboxData(file_path).parse()
    print('first field in fourth record:')
    print(lexicon[3][0].tag)
    print(lexicon[3][0].text)

    print('\nfields in sequential order:')
    for field in islice(lexicon.find('record'), 10):
        print(field.tag, field.text)

    print('\nlx fields:')
    for field in islice(lexicon.findall('record/lx'), 10):
        print(field.text)

    settings = ToolboxSettings()
    file_path = find('corpora/toolbox/MDF/MDF_AltH.typ')
    settings.open(file_path)
#    settings.open(ZipFilePathPointer(zip_path, entry='toolbox/MDF/MDF_AltH.typ'))
    tree = settings.parse(unwrap=False, encoding='cp1252')
    print(tree.find('expset/expMDF/rtfPageSetup/paperSize').text)
    settings_tree = ElementTree(tree)
    print(to_settings_string(settings_tree).encode('utf8')) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:29,代碼來源:toolbox.py

示例7: __init__

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def __init__(self, load=True):
        '''
        :param load: Load the pickled model upon instantiation.
        '''
        self.model = AveragedPerceptron()
        self.tagdict = {}
        self.classes = set()
        if load:
            AP_MODEL_LOC = str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
            self.load(AP_MODEL_LOC) 
開發者ID:Thejas-1,項目名稱:Price-Comparator,代碼行數:12,代碼來源:perceptron.py

示例8: __init__

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def __init__(self, load=True):
        '''
        :param load: Load the pickled model upon instantiation.
        '''
        self.model = AveragedPerceptron()
        self.tagdict = {}
        self.classes = set()
        if load:
            AP_MODEL_LOC = 'file:'+str(find('taggers/averaged_perceptron_tagger/'+PICKLE))
            self.load(AP_MODEL_LOC) 
開發者ID:SignalMedia,項目名稱:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda,代碼行數:12,代碼來源:perceptron.py

示例9: nltk_download_corpus

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def nltk_download_corpus(resource_path):
    """
    Download the specified NLTK corpus file
    unless it has already been downloaded.

    Returns True if the corpus needed to be downloaded.
    """
    from nltk.data import find
    from nltk import download
    from os.path import split

    # Download the wordnet data only if it is not already downloaded
    _, corpus_name = split(resource_path)

    ## From http://www.nltk.org/api/nltk.html ##
    # When using find() to locate a directory contained in a zipfile,
    # the resource name must end with the forward slash character.
    # Otherwise, find() will not locate the directory.
    ####
    # Helps when resource_path=='sentiment/vader_lexicon''
    if not resource_path.endswith('/'):
        resource_path = resource_path + '/'

    downloaded = False

    try:
        find(resource_path)
    except LookupError:
        download(corpus_name)
        downloaded = True

    return downloaded 
開發者ID:isipalma,項目名稱:Tutorial-Chatterbot,代碼行數:34,代碼來源:utils.py

示例10: build_model

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def build_model(fmt='binary'):
    print('Loading training data...')
    train_paths = [find('corpora/ace_data/ace.dev'),
                   find('corpora/ace_data/ace.heldout'),
                   find('corpora/ace_data/bbn.dev'),
                   find('corpora/ace_data/muc.dev')]
    train_trees = load_ace_data(train_paths, fmt)
    train_data = [postag_tree(t) for t in train_trees]
    print('Training...')
    cp = NEChunkParser(train_data)
    del train_data

    print('Loading eval data...')
    eval_paths = [find('corpora/ace_data/ace.eval')]
    eval_trees = load_ace_data(eval_paths, fmt)
    eval_data = [postag_tree(t) for t in eval_trees]

    print('Evaluating...')
    chunkscore = ChunkScore()
    for i, correct in enumerate(eval_data):
        guess = cp.parse(correct.leaves())
        chunkscore.score(correct, guess)
        if i < 3: cmp_chunks(correct, guess)
    print(chunkscore)

    outfilename = '/tmp/ne_chunker_{0}.pickle'.format(fmt)
    print('Saving chunker to {0}...'.format(outfilename))

    with open(outfilename, 'wb') as outfile:
        pickle.dump(cp, outfile, -1)

    return cp 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:34,代碼來源:named_entity.py

示例11: test_corpus_bleu

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def test_corpus_bleu(self):
        ref_file = find('models/wmt15_eval/ref.ru')
        hyp_file = find('models/wmt15_eval/google.ru')
        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')

        # Reads the BLEU scores from the `mteval-13a.output` file.
        # The order of the list corresponds to the order of the ngrams.
        with open(mteval_output_file, 'r') as mteval_fin:
            # The numbers are located in the last 2nd line of the file.
            # The first and 2nd item in the list are the score and system names.
            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])

        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
                # Whitespace tokenize the file.
                # Note: split() automatically strip().
                hypothesis = list(map(lambda x: x.split(), hyp_fin))
                # Note that the corpus_bleu input is list of list of references.
                references = list(map(lambda x: [x.split()], ref_fin))
                # Without smoothing.
                for i, mteval_bleu in zip(range(1,10), mteval_bleu_scores):
                    nltk_bleu = corpus_bleu(references, hypothesis, weights=(1.0/i,)*i)
                    # Check that the BLEU scores difference is less than 0.005 .
                    # Note: This is an approximate comparison; as much as
                    #       +/- 0.01 BLEU might be "statistically significant",
                    #       the actual translation quality might not be.
                    assert abs(mteval_bleu - nltk_bleu) < 0.005

                # With the same smoothing method used in mteval-v13a.pl
                chencherry = SmoothingFunction()
                for i, mteval_bleu in zip(range(1,10), mteval_bleu_scores):
                    nltk_bleu = corpus_bleu(references, hypothesis,
                                            weights=(1.0/i,)*i,
                                            smoothing_function=chencherry.method3)
                    assert abs(mteval_bleu - nltk_bleu) < 0.005 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:37,代碼來源:test_bleu.py

示例12: _vocabulary

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def _vocabulary(self):
        return (
            data.find('stemmers/porter_test/porter_vocabulary.txt')
                .open(encoding='utf-8')
                .read()
                .splitlines()
        ) 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:9,代碼來源:test_stem.py

示例13: test_vocabulary_original_mode

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def test_vocabulary_original_mode(self):
        # The list of stems for this test was generated by taking the
        # Martin-blessed stemmer from
        # http://tartarus.org/martin/PorterStemmer/c.txt
        # and removing all the --DEPARTURE-- sections from it and
        # running it against Martin's test vocabulary.
        self._test_against_expected_output(
            PorterStemmer.ORIGINAL_ALGORITHM,
            data.find('stemmers/porter_test/porter_original_output.txt')
                .open(encoding='utf-8')
                .read()
                .splitlines()
        ) 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:15,代碼來源:test_stem.py

示例14: demo

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def demo():
    """This assumes the Python module bllipparser is installed."""

    # download and install a basic unified parsing model (Wall Street Journal)
    # sudo python -m nltk.downloader bllip_wsj_no_aux

    from nltk.data import find
    model_dir = find('models/bllip_wsj_no_aux').path

    print('Loading BLLIP Parsing models...')
    # the easiest way to get started is to use a unified model
    bllip = BllipParser.from_unified_model_dir(model_dir)
    print('Done.')

    sentence1 = 'British left waffles on Falklands .'.split()
    sentence2 = 'I saw the man with the telescope .'.split()
    # this sentence is known to fail under the WSJ parsing model
    fail1 = '# ! ? : -'.split()
    for sentence in (sentence1, sentence2, fail1):
        print('Sentence: %r' % ' '.join(sentence))
        try:
            tree = next(bllip.parse(sentence))
            print(tree)
        except StopIteration:
            print("(parse failed)")

    # n-best parsing demo
    for i, parse in enumerate(bllip.parse(sentence1)):
        print('parse %d:\n%s' % (i, parse))

    # using external POS tag constraints
    print("forcing 'tree' to be 'NN':",
          next(bllip.tagged_parse([('A', None), ('tree', 'NN')])))
    print("forcing 'A' to be 'DT' and 'tree' to be 'NNP':",
          next(bllip.tagged_parse([('A', 'DT'), ('tree', 'NNP')])))
    # constraints don't have to make sense... (though on more complicated
    # sentences, they may cause the parse to fail)
    print("forcing 'A' to be 'NNP':",
          next(bllip.tagged_parse([('A', 'NNP'), ('tree', None)]))) 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:41,代碼來源:bllip.py

示例15: _get_tagger

# 需要導入模塊: from nltk import data [as 別名]
# 或者: from nltk.data import find [as 別名]
def _get_tagger(lang=None):
    if lang == 'rus':
        tagger = PerceptronTagger(False)
        ap_russian_model_loc = 'file:' + str(find(RUS_PICKLE))
        tagger.load(ap_russian_model_loc)
    elif lang == 'eng':
        tagger = PerceptronTagger()
    else:
        tagger = PerceptronTagger()
    return tagger 
開發者ID:sdoran35,項目名稱:hate-to-hugs,代碼行數:12,代碼來源:__init__.py


注:本文中的nltk.data.find方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。