当前位置: 首页>>代码示例>>Python>>正文


Python textblob.Word方法代码示例

本文整理汇总了Python中textblob.Word方法的典型用法代码示例。如果您正苦于以下问题:Python textblob.Word方法的具体用法?Python textblob.Word怎么用?Python textblob.Word使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在textblob的用法示例。


在下文中一共展示了textblob.Word方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: make_mine

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def make_mine(yours, swap_rate):
    mine = []
    for string_word in yours:
        word_object = Word(string_word)
        if random.randint(0, swap_rate - 1) == 0:
            meaning_count = len(word_object.synsets)
            if meaning_count > 0:
                meaning_selected = random.randint(0, meaning_count - 1)
                lemmas = word_object.synsets[meaning_selected].lemmas()
                synonym_count = len(lemmas)
                mine += [lemmas[random.randint(0, synonym_count - 1)].name()]
            else:
                mine += [string_word]
        else:
            mine += [string_word]

    return ' '.join(mine) 
开发者ID:paubric,项目名称:python-sirajnet,代码行数:19,代码来源:main.py

示例2: get_synsets

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def get_synsets(text):
    return Word(to_lemma(text)).synsets 
开发者ID:arguman,项目名称:arguman.org,代码行数:4,代码来源:utils.py

示例3: get_lemmas

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def get_lemmas(text):
    word = Word(to_lemma(text))
    sets = map(set, [synset.lemma_names()
                     for synset in word.synsets])

    return map(from_lemma, reduce(operator.or_, sets)) 
开发者ID:arguman,项目名称:arguman.org,代码行数:8,代码来源:utils.py

示例4: normalize

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def normalize(self, word, tag="N"):
    """
    Normalizes word using given tag. If no tag is given, NOUN is assumed.
    
    """

    kind = NOUN
    if tag.startswith("V"):
      kind = VERB
    elif tag.startswith("RB"):
      kind = ADV
    elif tag.startswith("J"):
      kind = ADJ
    return Word(word).lemmatize(kind).lower() 
开发者ID:orionmelt,项目名称:sherlock,代码行数:16,代码来源:text_parser.py

示例5: resolution

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def resolution(surfaceName):
    '''
    input: a surface name of entity
    output: the "normalized" entity name
    process: 1) lowercase
             2) lemmatization
    '''
    tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()]
    # tmp = [ele.lower() for ele in surfaceName.split()]
    return " ".join(tmp) 
开发者ID:mickeystroller,项目名称:SetExpan,代码行数:12,代码来源:entityResolutionAndFilter.py

示例6: iter_filth

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def iter_filth(self, text):

        # find 'skype' in the text using a customized tokenizer. this makes
        # sure that all valid skype usernames are kept as tokens and not split
        # into different words
        tokenizer = nltk.tokenize.regexp.RegexpTokenizer(
            self.filth_cls.SKYPE_TOKEN
        )
        blob = textblob.TextBlob(text, tokenizer=tokenizer)
        skype_indices, tokens = [], []
        for i, token in enumerate(blob.tokens):
            tokens.append(token)
            if 'skype' in token.lower():
                skype_indices.append(i)

        # go through the words before and after skype words to identify
        # potential skype usernames.
        skype_usernames = []
        for i in skype_indices:
            jmin = max(i-self.word_radius, 0)
            jmax = min(i+self.word_radius+1, len(tokens))
            for j in list(range(jmin, i)) + list(range(i+1, jmax)):
                token = tokens[j]
                if self.filth_cls.SKYPE_USERNAME.match(token):

                    # this token is a valid skype username. Most skype
                    # usernames appear to be misspelled words. Word.spellcheck
                    # does not handle the situation of an all caps word very
                    # well, so we cast these to all lower case before checking
                    # whether the word is misspelled
                    if token.isupper():
                        token = token.lower()
                    word = textblob.Word(token)
                    suggestions = word.spellcheck()
                    corrected_word, score = suggestions[0]
                    if score < 0.5:
                        skype_usernames.append(token)

        # replace all skype usernames
        if skype_usernames:
            self.filth_cls.regex = re.compile('|'.join(skype_usernames))
        else:
            self.filth_cls.regex = None
        return super(SkypeDetector, self).iter_filth(text) 
开发者ID:datascopeanalytics,项目名称:scrubadub,代码行数:46,代码来源:skype.py

示例7: keywords

# 需要导入模块: import textblob [as 别名]
# 或者: from textblob import Word [as 别名]
def keywords(self, keywords):

        try:
            if keywords is not None:
                if not (isinstance(keywords, list) or isinstance(keywords, WordList)):
                    raise TypeError('supplied keyword object of type that is not list or TextBlob.WordList')
                else:
                    if isinstance(keywords, list):
                        keywords = [Word(word.lower()) for word in keywords]
            else:
                raise ValueError('no input keywords supplied')
    
            # normalize case
            words = [word.lower() for word in keywords]
    
            # remove all stopwords
            stopwords = sw.words("english")
            words = [word for word in words if word not in stopwords] 
            #words = [word for word in keywords] 
            nwords = []
            for word in words:
                if word in keyword_filterlist:
                #if word.string in keyword_filterlist:
                    continue
                for term in self.global_filterlist:
                    #if word.string in term:
                    if word in term:
                        pass
                        #break
                else:
                    nwords.append(word)
    
            # remove plural, reduce to stems
            # textblob breaks possessives and other contractions into 
            # two distinct words, but sometimes leaves a trailing unicode 
            # apostrophe - if so, strip it
    
            words = [word.strip(u'\u2019') for word in nwords]
    
            return words

        except Exception as e:
            raise e 
开发者ID:ministryofpromise,项目名称:tlp,代码行数:45,代码来源:tlp_filter.py


注:本文中的textblob.Word方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。