當前位置: 首頁>>代碼示例>>Python>>正文


Python textblob.Word方法代碼示例

本文整理匯總了Python中textblob.Word方法的典型用法代碼示例。如果您正苦於以下問題:Python textblob.Word方法的具體用法?Python textblob.Word怎麽用?Python textblob.Word使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在textblob的用法示例。


在下文中一共展示了textblob.Word方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: make_mine

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def make_mine(yours, swap_rate):
    mine = []
    for string_word in yours:
        word_object = Word(string_word)
        if random.randint(0, swap_rate - 1) == 0:
            meaning_count = len(word_object.synsets)
            if meaning_count > 0:
                meaning_selected = random.randint(0, meaning_count - 1)
                lemmas = word_object.synsets[meaning_selected].lemmas()
                synonym_count = len(lemmas)
                mine += [lemmas[random.randint(0, synonym_count - 1)].name()]
            else:
                mine += [string_word]
        else:
            mine += [string_word]

    return ' '.join(mine) 
開發者ID:paubric,項目名稱:python-sirajnet,代碼行數:19,代碼來源:main.py

示例2: get_synsets

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def get_synsets(text):
    return Word(to_lemma(text)).synsets 
開發者ID:arguman,項目名稱:arguman.org,代碼行數:4,代碼來源:utils.py

示例3: get_lemmas

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def get_lemmas(text):
    word = Word(to_lemma(text))
    sets = map(set, [synset.lemma_names()
                     for synset in word.synsets])

    return map(from_lemma, reduce(operator.or_, sets)) 
開發者ID:arguman,項目名稱:arguman.org,代碼行數:8,代碼來源:utils.py

示例4: normalize

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def normalize(self, word, tag="N"):
    """
    Normalizes word using given tag. If no tag is given, NOUN is assumed.
    
    """

    kind = NOUN
    if tag.startswith("V"):
      kind = VERB
    elif tag.startswith("RB"):
      kind = ADV
    elif tag.startswith("J"):
      kind = ADJ
    return Word(word).lemmatize(kind).lower() 
開發者ID:orionmelt,項目名稱:sherlock,代碼行數:16,代碼來源:text_parser.py

示例5: resolution

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def resolution(surfaceName):
    '''
    input: a surface name of entity
    output: the "normalized" entity name
    process: 1) lowercase
             2) lemmatization
    '''
    tmp = [Word(ele.lower()).lemmatize() for ele in surfaceName.split()]
    # tmp = [ele.lower() for ele in surfaceName.split()]
    return " ".join(tmp) 
開發者ID:mickeystroller,項目名稱:SetExpan,代碼行數:12,代碼來源:entityResolutionAndFilter.py

示例6: iter_filth

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def iter_filth(self, text):

        # find 'skype' in the text using a customized tokenizer. this makes
        # sure that all valid skype usernames are kept as tokens and not split
        # into different words
        tokenizer = nltk.tokenize.regexp.RegexpTokenizer(
            self.filth_cls.SKYPE_TOKEN
        )
        blob = textblob.TextBlob(text, tokenizer=tokenizer)
        skype_indices, tokens = [], []
        for i, token in enumerate(blob.tokens):
            tokens.append(token)
            if 'skype' in token.lower():
                skype_indices.append(i)

        # go through the words before and after skype words to identify
        # potential skype usernames.
        skype_usernames = []
        for i in skype_indices:
            jmin = max(i-self.word_radius, 0)
            jmax = min(i+self.word_radius+1, len(tokens))
            for j in list(range(jmin, i)) + list(range(i+1, jmax)):
                token = tokens[j]
                if self.filth_cls.SKYPE_USERNAME.match(token):

                    # this token is a valid skype username. Most skype
                    # usernames appear to be misspelled words. Word.spellcheck
                    # does not handle the situation of an all caps word very
                    # well, so we cast these to all lower case before checking
                    # whether the word is misspelled
                    if token.isupper():
                        token = token.lower()
                    word = textblob.Word(token)
                    suggestions = word.spellcheck()
                    corrected_word, score = suggestions[0]
                    if score < 0.5:
                        skype_usernames.append(token)

        # replace all skype usernames
        if skype_usernames:
            self.filth_cls.regex = re.compile('|'.join(skype_usernames))
        else:
            self.filth_cls.regex = None
        return super(SkypeDetector, self).iter_filth(text) 
開發者ID:datascopeanalytics,項目名稱:scrubadub,代碼行數:46,代碼來源:skype.py

示例7: keywords

# 需要導入模塊: import textblob [as 別名]
# 或者: from textblob import Word [as 別名]
def keywords(self, keywords):

        try:
            if keywords is not None:
                if not (isinstance(keywords, list) or isinstance(keywords, WordList)):
                    raise TypeError('supplied keyword object of type that is not list or TextBlob.WordList')
                else:
                    if isinstance(keywords, list):
                        keywords = [Word(word.lower()) for word in keywords]
            else:
                raise ValueError('no input keywords supplied')
    
            # normalize case
            words = [word.lower() for word in keywords]
    
            # remove all stopwords
            stopwords = sw.words("english")
            words = [word for word in words if word not in stopwords] 
            #words = [word for word in keywords] 
            nwords = []
            for word in words:
                if word in keyword_filterlist:
                #if word.string in keyword_filterlist:
                    continue
                for term in self.global_filterlist:
                    #if word.string in term:
                    if word in term:
                        pass
                        #break
                else:
                    nwords.append(word)
    
            # remove plural, reduce to stems
            # textblob breaks possessives and other contractions into 
            # two distinct words, but sometimes leaves a trailing unicode 
            # apostrophe - if so, strip it
    
            words = [word.strip(u'\u2019') for word in nwords]
    
            return words

        except Exception as e:
            raise e 
開發者ID:ministryofpromise,項目名稱:tlp,代碼行數:45,代碼來源:tlp_filter.py


注:本文中的textblob.Word方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。