当前位置: 首页>>代码示例>>Python>>正文


Python TextBlob.lower方法代码示例

本文整理汇总了Python中textblob.TextBlob.lower方法的典型用法代码示例。如果您正苦于以下问题:Python TextBlob.lower方法的具体用法?Python TextBlob.lower怎么用?Python TextBlob.lower使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在textblob.TextBlob的用法示例。


在下文中一共展示了TextBlob.lower方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: extract

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def extract(ngrams, dataset, doc_id):
    # extract keywords
    print 'Extracting keywords'
    for i, ngram in enumerate(ngrams):
        doc = doc_id[i]

        if field not in dataset[doc]:
            dataset[doc][field] = set()

            if doc > 0 and doc % 1000 == 0:
                print '\t', doc

        for kw in filter(lambda k: '_' in k, ngram):
            keyword = kw.replace('_', ' ')

            kw_tb = TextBlob(keyword)

            # filter out punctuation, etc (make sure that there are two non-punc words)
            if len(kw_tb.words) < 2:
                continue

            # add keywords which are all proper nouns
            distinct_tags = set(t[1] for t in kw_tb.tags)
            if distinct_tags - {'NNP', 'NNPS'} == {}:
                dataset[doc][field].add(kw_tb.lower())
                continue

            # add noun phrases
            for np in kw_tb.lower().noun_phrases:
                dataset[doc][field].add(np)

    return kw_set_to_list(dataset)
开发者ID:MattL920,项目名称:nasaMining,代码行数:34,代码来源:extract.py

示例2: __init__

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
class SexxiBot:
    """ Main ChatBot class to take in user input and return an appropriate response.

    Contains methods: fix_typos, to correct any user's typos;
    help_check, to check if the user has asked for 'help' (a list of possible commands);
    check_phrase_similarity, to compare user inputs to keywords to generate basic responses;
    create_response, to generate a new response based on the users input.
    """

    def __init__(self):
        self.user_input = str()
        self.input_len = int()
        self.response = str()

    def fix_typos(self):
        self.user_input = TextBlob(self.user_input.lower()).tags
        # Fix lazy user typos, or slang
        words = list()
        for i in self.user_input:
            words.append(i[0])

        for part in range(len(words)):
            if words[part] in slang_typo_dict.keys():
                words[part] = slang_typo_dict[words[part]]
        self.user_input = ' '.join(words)
        return False  # Returns false to move on to help_check

    def help_check(self):
        if self.user_input.lower() == "help":
            self.response = responses.HELP
            return True
        return False  # User didn't ask for help, move on to check_phrase_similarity

    def check_phrase_similarity(self):
        self.user_input = TextBlob(self.user_input.lower()).tags
        self.input_len = len(self.user_input)
        for phrase_type in PHRASE_TYPES:
            for phrase in getattr(keywords, phrase_type):
                score = float()
                for word in self.user_input:
                    for n in phrase:
                        if word and n not in unimportant_words:
                            score += liquidmetal.score(n, word[0]) / self.input_len
                if score >= 0.7:  # Could be increased/ decreased through testing to find more optimal value
                    self.response = random.choice(getattr(responses, phrase_type))
                    return True
        return False

    def create_response(self):  # NOT WORKING YET!
        # Craft a response based on user's message
        noun, pronoun, verb, adj, prep, text_len = check_pos_tags.pos_tags(self.user_input)
        self.response = format_response.craft_response(noun, pronoun, verb, adj, prep, text_len)
        print self.response
        return False if self.response == ' ' else True
开发者ID:sexxis,项目名称:goose,代码行数:56,代码来源:response.py

示例3: fit

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
 def fit(self, X, y=None):
     words = []
     for x in X:
         x = TextBlob(x.lower())
         words += [word.lemmatize() for word in x.words]
     if self.num_words:
         words = Counter(words)
         self._vocab = [word for word, _ in words.most_common(self.num_words)]
     else:
         self._vocab = list(set(words))
     return self
开发者ID:andrewmpierce,项目名称:polyglot,代码行数:13,代码来源:polyglot_lib.py

示例4: transform

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
 def transform(self, X):
     vectors = []
     for x in X:
         x = TextBlob(x.lower())
         word_count = Counter(x.words)
         vector = [0] * len(self._vocab)
         for word, count in word_count.items():
             try:
                 idx = self._vocab.index(word)
                 vector[idx] = count
             except ValueError:
                 pass
         vectors.append(vector)
     return vectors
开发者ID:andrewmpierce,项目名称:polyglot,代码行数:16,代码来源:polyglot_lib.py

示例5: textblob_ngrams

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def textblob_ngrams(sentence, n=3, remove_stopwords=False, all_lower_case=False):
    ''' Takes in a sentence returns the words and/or punctuation
        in that sentence as the features (depending on chosen tokenizer)

        @Arguments:
            sentence -- Chosen sentence to tokenize, type(sentence) = String

            tokenizer (optional) -- Function of type nlkt.tokenize to be used
                for breaking apart the sentence string. Standard tokenizer
                splits on whitespace and removes punctuation

            remove_stopwords (optional) -- if true, all stopwords in sentence
                will not be included as features. Currently only for English
                text. Value is initially false

            stemmer (optional) -- Function of type nltk.stem to be used for
                stemming word features.

        @Return:
            List of features of the following form:
                {ngram_1: True, ngram_2: True, ... , ngram_n: True}
    '''

    sentence = TextBlob(sentence)
    features = dict()
    clean_string = ''

    # Changes all word features to lower case if true
    if all_lower_case:
        sentence = sentence.lower()

    # Removes stopwords
    for word in sentence.words:
        # Removes word from features if in nlkt.corpus.stopwords('english')
        if remove_stopwords:
            if word.string in stopwords:
                continue
        clean_string += ''.join([word, ' '])

    for ngram in TextBlob(clean_string):
        features[ngram] = True
    return features
开发者ID:AimVoma,项目名称:sunny-side-up,代码行数:44,代码来源:textblob_ngrams.py

示例6: hello_monkey

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def hello_monkey():
    """Respond to incoming calls with a simple text message."""
    resp = twiml.Response()
    message=""
    name=""

    fromNumber = request.values.get('From',None)
    myNumber = request.values.get('To',None)
    
    body = request.values.get('Body')
    body = body.decode("ascii", errors="ignore")
    blob = TextBlob(body)


    NLPObject = NLPStuff(resp, blob, message)

    counter = storeCookies(blob)
    message+= salutationToCaller(message, fromNumber, myNumber, counter) 

    if "help" in blob.lower():
       message="This is an information HELP message please tell me what to do"

    return setMessage(message, name, myNumber, counter, body, blob, resp)
开发者ID:sugeerth,项目名称:Podolski,代码行数:25,代码来源:nlpFun.py

示例7: feature_extractor

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def feature_extractor(text):
    if not isinstance(text, TextBlob):
        text = TextBlob(text.lower())

    return {
        'has_rumor': 'rumor' in text.words,
        'has_gosip': 'gosip' in text.words,
        'has_urbanesia': 'urbanesia' in text.words,
        'has_batista': 'batista' in text.words,
        'has_harahap': 'harahap' in text.words,
        'has_pemasaran': 'pemasaran' in text.words,
        'has_saham': 'saham' in text.words,
        'has_hackathon': 'hackathon' in text.words,
        'has_ipo': 'ipo' in text.words,
        'has_akuisisi': 'akuisisi' in text.words,
        'has_startup': 'startup' in text.words,
        'has_android': 'android' in text.words,
        'has_aplikasi': 'aplikasi' in text.words,
        'has_payment': 'payment' in text.words,
        'has_pembayaran': 'pembayaran' in text.words,
        'has_api': 'api' in text.words,
        'has_kompetisi': 'kompetisi' in text.words,
        'has_ide': 'ide' in text.words,
        'has_permainan': 'permainan' in text.words,
        'has_game': 'game' in text.words,
        'has_fundraising': 'fundraising' in text.words,
        'has_askds': '[[email protected]]' in text.words,
        'has_investasi': 'investasi' in text.words,
        'has_musik': 'musik' in text.words,
        'has_lagu': 'lagu' in text.words,
        'has_bhinneka': 'bhinneka' in text.words,
        'has_marketplace': 'marketplace' in text.words,
        'has_mobile': 'mobile' in text.words,
        'has_cto': 'cto' in text.words,
        'has_traffic': 'traffic' in text.words,
        'starts_with_[': text[0] == '['
    }
开发者ID:tistaharahap,项目名称:ds-for-me,代码行数:39,代码来源:extractor.py

示例8: filter

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
            if doc > 0 and doc % 1000 == 0:
                print '\t', doc

        for kw in filter(lambda k: '_' in k, ngram):
            keyword = kw.replace('_', ' ')

            kw_tb = TextBlob(keyword)

            # filter out punctuation, etc (make sure that there are two non-punc words)
            if len(kw_tb.words) < 2:
                continue

            # add keywords which are all proper nouns
            distinct_tags = set(t[1] for t in kw_tb.tags)
            if distinct_tags - {'NNP', 'NNPS'} == {}:
                dataset[doc][field].add(kw_tb.lower())
                continue

            # add noun phrases
            for np in kw_tb.lower().noun_phrases:
                dataset[doc][field].add(np)

    # convert set into list for json serialization
    for d in dataset:
        d[field] = list(d[field])

        # fix 's
        for i, np in enumerate(d[field]):
            if np.endswith(" 's"):
                np = np[:-3]
开发者ID:MattL920,项目名称:nasaMining,代码行数:32,代码来源:commerce_ngrams_np.py

示例9: TextBlob

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]

# save it as a TextBlob object
review = TextBlob(yelp_best_worst.text[0])


# list the words
review.words


# list the sentences
review.sentences


# some string methods are available
review.lower()


# ## Part 6: Stemming and Lemmatization

# **Stemming:**
# 
# - **What:** Reduce a word to its base/stem/root form
# - **Why:** Often makes sense to treat related words the same way
# - **Notes:**
#     - Uses a "simple" and fast rule-based approach
#     - Stemmed words are usually not shown to users (used for analysis/indexing)
#     - Some search engines treat words with the same stem as synonyms

# initialize stemmer
stemmer = SnowballStemmer('english')
开发者ID:AhlamMD,项目名称:DAT8,代码行数:32,代码来源:15_natural_language_processing_nb.py

示例10: TextBlob

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
    now = datetime.datetime.now()

    normSource = normmd.tables.get('Source')
    sources = [dict(row) for row in normcon.execute(select([
            normSource.c.Id,
            normSource.c.Name,
            normSource.c.Content
        ]).where(
            normSource.c.Content.isnot(None)
        ))]
    lemmafrequency = {}
    for source in sources:
        if args.verbosity > 1:
            print "Reading source: " + source['Name']
        content = TextBlob(source['Content'])
        noun_phrases = content.lower().noun_phrases
        lemmas = noun_phrases.lemmatize()
        for lemma in lemmas:
            if lemma in lemmafrequency.keys():
                lemmafrequency[lemma] += 1
            else:
                lemmafrequency[lemma] = 1

        if args.limit > 0:
            args.limit -= 1
            if args.limit == 0:
                break

    normNode = normmd.tables.get('Node')
    normTagging = normmd.tables.get('Tagging')
    nounPhraseNode = normcon.execute(select([
开发者ID:jschultz,项目名称:nvivotools,代码行数:33,代码来源:textblobExampleCode.py

示例11: tweet_to_feat

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def tweet_to_feat(tweet, features):
    tb = TextBlob(tweet)
    #lang = tb.detect_language()
    words = [word.lemma for word in tb.lower().tokenize()]
    return [words.count(feature) for feature in features]
开发者ID:c0g,项目名称:TweetClassifier,代码行数:7,代码来源:util.py

示例12: find_tweet

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]
def find_tweet(tweet, place_list):
    t = TextBlob(unicode(tweet))
    tweet_loc = []
    for word in t.lower().tokenize():
        if word in place_list:
            tweet_loc = tweet_loc + [word]
开发者ID:c0g,项目名称:TweetClassifier,代码行数:8,代码来源:util.py

示例13: WordNetLemmatizer

# 需要导入模块: from textblob import TextBlob [as 别名]
# 或者: from textblob.TextBlob import lower [as 别名]

from __future__ import print_function

from textblob import TextBlob
from nltk.stem.wordnet import WordNetLemmatizer
import sys

lmtzr = WordNetLemmatizer()

for line in sys.stdin.readlines():
    blob = TextBlob(line.strip())

    sys.stdout.write("Detected language: {}\n".format(blob.detect_language()))
    sys.stdout.write("This message had {} words.\n".format(len(blob.words)))
    sys.stdout.write("Corrected sentence\n{}\n".format(blob.lower().correct()))
    proper_nouns = [tag[0] for tag in blob.tags if tag[1] == 'NNP']
    verbs = [lmtzr.lemmatize(tag[0], 'v') for tag in blob.tags if 'V' in tag[1]]
    sys.stdout.write("I found these proper nouns: {}\n".format(proper_nouns))
    sys.stdout.write("I found these verbs: {}\n".format(verbs))

    sentiment = blob.sentiment
    sys.stdout.write("Sentiment for that message: {}\n".format(sentiment))
    if sentiment.polarity > 0 and sentiment.subjectivity > 0.7:
        sys.stdout.write("That sounds amazing!\n")
    elif sentiment.polarity < 0 and sentiment.subjectivity > 0.7:
        sys.stdout.write("It'll get better.\n")
    else:
        sys.stdout.write("Meh.\n")

    sys.stdout.flush()
开发者ID:MathYourLife,项目名称:emoti-reader,代码行数:32,代码来源:play.py


注:本文中的textblob.TextBlob.lower方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。