本文整理汇总了Python中emoji.UNICODE_EMOJI属性的典型用法代码示例。如果您正苦于以下问题:Python emoji.UNICODE_EMOJI属性的具体用法?Python emoji.UNICODE_EMOJI怎么用?Python emoji.UNICODE_EMOJI使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类emoji
的用法示例。
在下文中一共展示了emoji.UNICODE_EMOJI属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _script_category
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def _script_category(char):
"""Returns category of a Unicode character
Possible values:
default, Cyrillic, Greek, Han, Hiragana
"""
if char in emoji.UNICODE_EMOJI:
return "Emoji"
cat = unicodedata2.script_cat(char)[0]
if char == u':':
return 'Han'
if cat in ['Latin', 'Common']:
return 'default'
return cat
示例2: on_message
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def on_message(self, message):
channel = message.channel
if message.server.id not in self.settings:
return
if not self.settings[message.server.id]:
return
emoji_list = []
for word in message.content.split(" "):
if word.startswith("<:") and word.endswith(">"):
emoji_list.append(word.rpartition(">")[0].partition("<")[2])
if word in UNICODE_EMOJI:
emoji_list.append(word)
if emoji_list == []:
return
for emoji in emoji_list:
try:
await self.bot.add_reaction(message, emoji)
except:
pass
示例3: screening
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def screening(text):
s = text
if s[0:3] == "RT ":
s = s.replace(s[0:3], "")
while s.find("@") != -1:
index_at = s.find("@")
if s.find(" ") != -1:
index_sp = s.find(" ", index_at)
if index_sp != -1:
s = s.replace(s[index_at:index_sp + 1], "")
else:
s = s.replace(s[index_at:], "")
else:
s = s.replace(s[index_at:], "")
while s.find("\n") != -1:
index_ret = s.find("\n")
s = s.replace(s[index_ret], "")
s = s.replace('\n', '')
s = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", s)
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), '')
s = s.translate(non_bmp_map)
s = ''.join(c if c not in emoji.UNICODE_EMOJI else '' for c in s)
s = re.sub('。+', '。', s)
while s.find('#') != -1:
index_hash = s.find('#')
s = s[0:index_hash]
s = neologdn.normalize(s, repeat=4)
s = re.sub(r'[^、。!?ー〜1-9a-zA-Zぁ-んァ-ヶ亜-腕纊-黑一-鿕]', '', s)
return s
示例4: separate_emojis_and_text
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def separate_emojis_and_text(text):
emoji_chars = []
non_emoji_chars = []
for c in text:
if c in emoji.UNICODE_EMOJI:
emoji_chars.append(c)
else:
non_emoji_chars.append(c)
return ''.join(emoji_chars), ''.join(non_emoji_chars)
示例5: read_english
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def read_english(path="english_words.txt", add_emojis=True):
# read english words for filtering (includes emojis as part of set)
english = set()
with codecs.open(path, "r", "utf-8") as f:
for line in f:
line = line.strip().lower().replace('\n', '')
if len(line):
english.add(line)
if add_emojis:
for e in UNICODE_EMOJI:
english.add(e)
return english
示例6: convert_emoji
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def convert_emoji(self, name: str) -> str:
ctx = SimpleNamespace(bot=self, guild=self.modmail_guild)
converter = commands.EmojiConverter()
if name not in UNICODE_EMOJI:
try:
name = await converter.convert(ctx, name.strip(":"))
except commands.BadArgument as e:
logger.warning("%s is not a valid emoji. %s.", e)
raise
return name
示例7: get_emoji_countered
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def get_emoji_countered(msgs):
"""Counts all emojis in messages.
Args:
msgs (list of MyMessage objects): Messages.
Returns:
collections.Counter of emojis.
"""
cnt = Counter()
for msg in msgs:
for character in msg.text:
if character in emoji.UNICODE_EMOJI:
cnt[character] += 1
return cnt
示例8: get_data_for_entity
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def get_data_for_entity(self, entity_data):
"""Given an entity data dictionary that contains some linking data (ex. title or ID), additional information (ex. description, aliases etc.) is added to the given entity dictionary"""
data = self.data
title = entity_data["wikipedia_title"]
if "wikidata_info" in data[title]:
if ("aliases" in data[title]["wikidata_info"]) and (
data[title]["wikidata_info"]["aliases"]
) is not None:
aliases = [
alias
for alias in data[title]["wikidata_info"]["aliases"]
if alias not in emoji.UNICODE_EMOJI
]
else:
aliases = None
else:
aliases = None
entity_data["aliases"] = aliases
sents = []
for k in range(0, 10):
key = "sent_desc_{}".format(k + 1)
sents.append(data[title].get(key, ""))
entity_data["sentences"] = sents
return entity_data
示例9: str_has_emoji
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def str_has_emoji(s: str) -> bool:
for character in s:
if character in emoji.UNICODE_EMOJI:
return True
return False
示例10: get_pragmatic_features
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def get_pragmatic_features(tweet_tokens):
capitalized_words = user_specific = intensifiers = tweet_len_ch = 0
for t in tweet_tokens:
tweet_len_ch += len(t)
if t.isupper() and len(t) > 1:
capitalized_words += 1 # count of capitalized words
if t.startswith("@"):
user_specific += 1 # count of user mentions
if t.startswith("#"):
user_specific += 1 # count-based feature of hashtags used (excluding sarcasm or sarcastic)
if t.lower().startswith("haha") or re.match('l(o)+l$', t.lower()):
user_specific += 1 # binary feature marking the presence of laughter
if t in helper.strong_negations:
intensifiers += 1 # count-based feature of strong negations
if t in helper.strong_affirmatives:
intensifiers += 1 # count-based feature of strong affirmatives
if t in helper.interjections:
intensifiers += 1 # count-based feature of relevant interjections
if t in helper.intensifiers:
intensifiers += 1 # count-based feature of relevant intensifiers
if t in helper.punctuation:
user_specific += 1 # count-based feature of relevant punctuation signs
if t in emoji.UNICODE_EMOJI:
user_specific += 1 # count-based feature of emojis
tweet_len_tokens = len(tweet_tokens) # get the length of the tweet in tokens
average_token_length = float(tweet_len_tokens) / max(1.0, float(tweet_len_ch)) # average tweet length
feature_list = {'tw_len_ch': tweet_len_ch, 'tw_len_tok': tweet_len_tokens, 'avg_len': average_token_length,
'capitalized': capitalized_words, 'user_specific': user_specific, 'intensifiers': intensifiers}
return feature_list
# Extract the n-grams (specified as a list n = [1, 2, 3, ...])
# e.g if n = [1,2,3] then n-gram_features is a dictionary of all uni-grams, bi-grams and tri-grams
# This n-gram extractor works for any kind of tokens i.e both words and pos tags
示例11: extract_emojis
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def extract_emojis(tweets):
emojis = []
for tw in tweets:
tw_emojis = []
for word in tw:
chars = list(word)
for ch in chars:
if ch in emoji.UNICODE_EMOJI:
tw_emojis.append(ch)
emojis.append(' '.join(tw_emojis))
return emojis
# Replace a contraction (coming from possessives, verbs, emphasis or just bad language) by its longer form
示例12: process_emojis
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def process_emojis(word, emoji_dict, translate_emojis=True):
processed = []
chars = list(word)
remaining = ""
for c in chars:
if c in emoji_dict.keys() or c in emoji.UNICODE_EMOJI:
if remaining != "":
processed.append(remaining)
remaining = ""
if translate_emojis:
if c in emoji_dict:
processed.extend(emoji_dict[c][3].lower().split())
else:
processed.extend(c)
else:
remaining += c
if remaining != "":
processed.append(remaining)
if processed != []:
return ' '.join(processed)
else:
return word
# TODO: Numerals - sarcasm heavily relies on them so find a way to extract meaning behind numbers
# Attempt to clean each tweet and make it as grammatical as possible
示例13: clean_tweet
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def clean_tweet(tweet, word_list, split_hashtag_method, replace_user_mentions=True,
remove_hashtags=False, remove_emojis=False, all_to_lower_case=False):
# Add white space before every punctuation sign so that we can split around it and keep it
tweet = re.sub('([!?*&%"~`^+{}])', r' \1 ', tweet)
tweet = re.sub('\s{2,}', ' ', tweet)
tokens = tweet.split()
valid_tokens = []
for word in tokens:
# Never include #sarca* hashtags
if word.lower().startswith('#sarca'):
continue
# Never include URLs
if 'http' in word:
continue
# Replace specific user mentions with a general user name
if replace_user_mentions and word.startswith('@'):
word = '@user'
# Split or remove hashtags
if word.startswith('#'):
if remove_hashtags:
continue
splits = split_hashtag_method(word[1:], word_list)
if all_to_lower_case:
valid_tokens.extend([split.lower() for split in splits])
else:
valid_tokens.extend(splits)
continue
if remove_emojis and word in emoji.UNICODE_EMOJI:
continue
if all_to_lower_case:
word = word.lower()
valid_tokens.append(word)
return ' '.join(valid_tokens)
示例14: check_if_emoji
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def check_if_emoji(word, emoji_dict):
emojis = list(word)
for em in emojis:
if em in emoji_dict.keys() or em in emoji.UNICODE_EMOJI:
return True
return False
# A strict clean of the twitter data - removing emojis, hashtags, URLs, user mentions
示例15: extract_emojis
# 需要导入模块: import emoji [as 别名]
# 或者: from emoji import UNICODE_EMOJI [as 别名]
def extract_emojis(self, string=""):
emj = []
for c in string:
if c in emoji.UNICODE_EMOJI:
emj.append(c)
return emj