当前位置: 首页>>代码示例>>Python>>正文


Python porter2.stem方法代码示例

本文整理汇总了Python中stemming.porter2.stem方法的典型用法代码示例。如果您正苦于以下问题:Python porter2.stem方法的具体用法?Python porter2.stem怎么用?Python porter2.stem使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在stemming.porter2的用法示例。


在下文中一共展示了porter2.stem方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _calculate_word_scores

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def _calculate_word_scores(self, phrase_list):
        """Scores words according to frequency and tendency to appear in multi-word key phrases"""
        word_freq = nltk.FreqDist()
        word_multiplier = nltk.FreqDist()
        for phrase in phrase_list:
            # Give a higher score if word appears in multi-word candidates
            multi_word = min(2, len(filter(lambda x: not is_numeric(x), phrase)))
            for word in phrase:
                # Normalize by taking the stem
                word_freq[stem(word)] += 1
                word_multiplier[stem(word)] += multi_word
        for word in word_freq.keys():
            word_multiplier[word] = word_multiplier[word] / float(word_freq[word])  # Take average
        word_scores = {}
        for word in word_freq.keys():
            word_scores[word] = word_freq[word] * word_multiplier[word]

        return word_scores 
开发者ID:mhbuehler,项目名称:resume-optimizer,代码行数:20,代码来源:keyword_extractor.py

示例2: _calculate_phrase_scores

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def _calculate_phrase_scores(self, phrase_list, word_scores, metric='avg'):
        """Scores phrases by taking the average, sum, or max of the scores of its words"""
        phrase_scores = {}
        for phrase in phrase_list:
            phrase_score = 0
            if metric in ['avg', 'sum']:
                for word in phrase:
                    phrase_score += word_scores[stem(word)]
                phrase_scores[" ".join(phrase)] = phrase_score
                if metric == 'avg':
                    phrase_scores[" ".join(phrase)] = phrase_score / float(len(phrase))
            elif metric == 'max':
                for word in phrase:
                    phrase_score = word_scores[stem(word)] if word_scores[stem(word)] > phrase_score else phrase_score
                phrase_scores[" ".join(phrase)] = phrase_score

        return phrase_scores 
开发者ID:mhbuehler,项目名称:resume-optimizer,代码行数:19,代码来源:keyword_extractor.py

示例3: retrieve_dataset

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def retrieve_dataset(index_name, doc_type, weight={'title': 5, 'abstract': 1}):
    es = Elasticsearch()
    results = es.search(index=index_name, doc_type=doc_type, size=10000)['hits']['hits']
    dataset = {}
    for res in results:
        doc = DocumentInfo(res['_id'])
        term_vectors = es.termvectors(index=index_name, doc_type=doc_type, id=res['_id'], offsets=False,
                                      payloads=False, positions=False, fields='title,abstract',
                                      field_statistics=False)['term_vectors']
        for zone in {'abstract', 'title'}:
            term_vector = term_vectors[zone]['terms']
            for term in term_vector:
                stemmed = stem(term)
                if stemmed.isalpha():
                    if stemmed not in doc.tf:
                        doc.tf[stemmed] = term_vector[term]['term_freq'] * weight[zone]
                    else:
                        doc.tf[stemmed] += term_vector[term]['term_freq'] * weight[zone]
        dataset[res['_id']] = doc
    return dataset 
开发者ID:moinfar,项目名称:ResearchGate-Analyser,代码行数:22,代码来源:tasks.py

示例4: input_f_word

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def input_f_word():

    with open(input_file) as lines:

        #ptn = re.compile(r'(.*?[\.|\:|\?|\!])(\s)([A-Z].*)')

        for line in lines:
            l_word=[]
            line = line.strip()

            l_word = line.split()
            for word in l_word:
                word = re.sub(r"(\,|\.|\(|\)|\'|\"|)",'',word)
                "Need to handle '? -' ?"
                print('{}\t{}'.format(word,stem(word)))

            #print() 
开发者ID:tmu-nlp,项目名称:100knock2017,代码行数:19,代码来源:knock52.py

示例5: get_feature

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def get_feature():
    target = []
    feature = []
    for line in open("sentiment.txt"):
        y = line.split(" ")[0]
        x = [stem(w) for w in line.strip("\n").split(" ")[1:] if not if_stopword(w)]
        target.append(int(y))
        feature.append(x)
    return target, feature 
开发者ID:tmu-nlp,项目名称:100knock2016,代码行数:11,代码来源:knock72.py

示例6: word_stem

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def word_stem():
    for word in get_word():
        word = word.replace(',', '')
        print(stem(word), '\t', word) 
开发者ID:tmu-nlp,项目名称:100knock2016,代码行数:6,代码来源:knock52.py

示例7: get_feature

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def get_feature(sentence):
    feature = defaultdict(int)
    for tok in sentence.split():
        tok = tok.lower()
        if not in_stoplist(tok):
            feature[stem(tok)] += 1
    return dict(feature) 
开发者ID:tmu-nlp,项目名称:100knock2016,代码行数:9,代码来源:knock72.py

示例8: preprocess

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def preprocess(s):
    words = s.lower().strip().split(" ")
    words = map(lambda x: x.strip(), words)
    words = filter(lambda x: x not in foobar, words)
    words = list(map(stem, words))
    return words 
开发者ID:r9y9,项目名称:nlp100,代码行数:8,代码来源:76.py

示例9: clean_text

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def clean_text(text):
    """Clean text for TFIDF."""
    new_text = re.sub(ur'\p{P}+', ' ', text)

    new_text = [stem(i) for i in new_text.lower().split() if not
                re.findall(r'[0-9]', i)]

    new_text = ' '.join(new_text)

    return new_text


############################################################################### 
开发者ID:marcolagi,项目名称:quantulum,代码行数:15,代码来源:classifier.py

示例10: tokenize

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def tokenize(self, text, use_stem=True):
        if use_stem:
            tokens = [stem(w.lower()) for w in self.word_tokenize(text) if w not in punctuation and not sqlpie.global_cache[sqlpie.Config.STOPWORDS].get(w.lower()) and re.match('[A-Z0-9]', w, re.IGNORECASE)]
        else:
            tokens = [w.lower() for w in self.word_tokenize(text) if w not in punctuation and not sqlpie.global_cache[sqlpie.Config.STOPWORDS].get(w.lower()) and re.match('[A-Z0-9]', w, re.IGNORECASE)]
        return tokens 
开发者ID:lessaworld,项目名称:SQLpie,代码行数:8,代码来源:summarizer.py

示例11: normalize_term

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def normalize_term(s, is_query_term=False):
        s = Indexer.normalize_term_without_stemming(s, is_query_term)
        term = stem(s)
        return term 
开发者ID:lessaworld,项目名称:SQLpie,代码行数:6,代码来源:indexer.py

示例12: make_word_stem

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def make_word_stem(data_in):
    for word in data_in:
        word = word.strip()
        yield [word, stem(word)] 
开发者ID:tmu-nlp,项目名称:100knock2017,代码行数:6,代码来源:knock52.py

示例13: create_features

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def create_features(x):
    phi = defaultdict(lambda: 0)

#    words = x.split()
    for word in x:
        word = stem(word)
        phi["UNI:" + word] += 1

    return phi 
开发者ID:tmu-nlp,项目名称:100knock2017,代码行数:11,代码来源:knock76.py

示例14: create_features

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def create_features(x, feature_set):
    phi = defaultdict(lambda: 0)

#    words = x.split()
    for word in x:
        if stem(word) in feature_set:
            phi["UNI:" + word] += 1

    return phi 
开发者ID:tmu-nlp,项目名称:100knock2017,代码行数:11,代码来源:knock78.py

示例15: test_create_features

# 需要导入模块: from stemming import porter2 [as 别名]
# 或者: from stemming.porter2 import stem [as 别名]
def test_create_features(x):
    phi = defaultdict(lambda: 0)

#    words = x.split()
    for word in x:
        word = stem(word)
        phi["UNI:" + word] += 1

    return phi 
开发者ID:tmu-nlp,项目名称:100knock2017,代码行数:11,代码来源:knock78.py


注:本文中的stemming.porter2.stem方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。