本文整理汇总了Python中chatterbot.conversation.Statement.search_text方法的典型用法代码示例。如果您正苦于以下问题:Python Statement.search_text方法的具体用法?Python Statement.search_text怎么用?Python Statement.search_text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类chatterbot.conversation.Statement
的用法示例。
在下文中一共展示了Statement.search_text方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_file
# 需要导入模块: from chatterbot.conversation import Statement [as 别名]
# 或者: from chatterbot.conversation.Statement import search_text [as 别名]
def read_file(files, queue, preprocessors, stemmer):
statements_from_file = []
for tsv_file in files:
with open(tsv_file, 'r', encoding='utf-8') as tsv:
reader = csv.reader(tsv, delimiter='\t')
previous_statement_text = None
previous_statement_search_text = ''
for row in reader:
if len(row) > 0:
statement = Statement(
text=row[3],
in_response_to=previous_statement_text,
conversation='training',
created_at=date_parser.parse(row[0]),
persona=row[1]
)
for preprocessor in preprocessors:
statement = preprocessor(statement)
statement.search_text = stemmer.get_bigram_pair_string(statement.text)
statement.search_in_response_to = previous_statement_search_text
previous_statement_text = statement.text
previous_statement_search_text = statement.search_text
statements_from_file.append(statement)
queue.put(tuple(statements_from_file))
示例2: train
# 需要导入模块: from chatterbot.conversation import Statement [as 别名]
# 或者: from chatterbot.conversation.Statement import search_text [as 别名]
def train(self):
import glob
tagger = PosLemmaTagger(language=self.chatbot.storage.tagger.language)
# Download and extract the Ubuntu dialog corpus if needed
corpus_download_path = self.download(self.data_download_url)
# Extract if the directory does not already exist
if not self.is_extracted(self.extracted_data_directory):
self.extract(corpus_download_path)
extracted_corpus_path = os.path.join(
self.extracted_data_directory,
'**', '**', '*.tsv'
)
def chunks(items, items_per_chunk):
for start_index in range(0, len(items), items_per_chunk):
end_index = start_index + items_per_chunk
yield items[start_index:end_index]
file_list = glob.glob(extracted_corpus_path)
file_groups = tuple(chunks(file_list, 10000))
start_time = time.time()
for tsv_files in file_groups:
statements_from_file = []
for tsv_file in tsv_files:
with open(tsv_file, 'r', encoding='utf-8') as tsv:
reader = csv.reader(tsv, delimiter='\t')
previous_statement_text = None
previous_statement_search_text = ''
for row in reader:
if len(row) > 0:
statement = Statement(
text=row[3],
in_response_to=previous_statement_text,
conversation='training',
created_at=date_parser.parse(row[0]),
persona=row[1]
)
for preprocessor in self.chatbot.preprocessors:
statement = preprocessor(statement)
statement.search_text = tagger.get_bigram_pair_string(statement.text)
statement.search_in_response_to = previous_statement_search_text
previous_statement_text = statement.text
previous_statement_search_text = statement.search_text
statements_from_file.append(statement)
self.chatbot.storage.create_many(statements_from_file)
print('Training took', time.time() - start_time, 'seconds.')