本文整理汇总了Python中helper.Helper.file_exists方法的典型用法代码示例。如果您正苦于以下问题:Python Helper.file_exists方法的具体用法?Python Helper.file_exists怎么用?Python Helper.file_exists使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类helper.Helper
的用法示例。
在下文中一共展示了Helper.file_exists方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from helper import Helper [as 别名]
# 或者: from helper.Helper import file_exists [as 别名]
class BookFetcher:
def __init__(self, skip_existing_files=True, sleep_timer=10):
self.Helper = Helper()
self.url = "https://www.gutenberg.org/files/"
self.skip_existing_files = skip_existing_files
self.sleep_timer = sleep_timer
self.ignored_books = [69]
def run(self, from_id=1, to_id=100):
try:
for index in range(from_id, to_id):
filename = str(index) + ".txt"
if self.Helper.file_exists(self.Helper.directory_original_books, filename) & self.skip_existing_files:
print "File " + filename + " already fetched!"
continue
if index in self.ignored_books:
continue
response = urllib2.urlopen(self.url + str(index) + "/" + filename)
if response.info().type != "text/plain":
raise Exception("Server returned Captcha. Stopping now.")
book = response.read()
self.Helper.write_file(book, filename)
print "Waiting for " + str(self.sleep_timer) + " seconds..."
time.sleep(self.sleep_timer)
except urllib2.HTTPError as err:
# if a book is not found (i.e. HTTP status code is 404) then try the next book
if err.code == 404:
self.run(index + 1, to_id)
except Exception as err:
# stop when captcha is returned not to get blocked
print err
exit(1)
示例2: __init__
# 需要导入模块: from helper import Helper [as 别名]
# 或者: from helper.Helper import file_exists [as 别名]
class TextAnalyzer:
def __init__(self):
self.Helper = Helper()
self.Stemmer = SnowballStemmer("english")
self.Lemmatizer = WordNetLemmatizer()
self.filtered_books = []
self.stemmed_books = []
self.lemmatized_books = []
self.all_tokens_counts = []
self.distinct_tokens_counts = []
self.distinct_tokens_counts_with_stemming = []
self.distinct_tokens_counts_with_lemmatization = []
def process_text(self, specific_id=-1, perform_stemming=True, perform_lemmatization=True):
for filename in os.listdir(self.Helper.directory_original_books):
if filename.endswith(".txt"):
# only analyze book with specific ID
if (specific_id >= 0) & (filename != str(specific_id) + ".txt"):
continue
book = self.Helper.read_file(filename)
# analyze books without stemming
tokens = self.tokenize(book)
# join tokens again to have the filtered text for stemming
book = " ".join(tokens)
self.Helper.write_file(book, filename, "f")
if perform_stemming & (not self.Helper.file_exists(self.Helper.stemming_directory, filename)):
stemmed_book = self.perform_stemming(book, filename)
self.stemmed_books.append(stemmed_book)
self.Helper.write_file(stemmed_book, filename, "s")
else:
print self.Helper.stemming_directory + filename, "already exists."
if perform_lemmatization & (not self.Helper.file_exists(self.Helper.lemmatization_directory, filename)):
lemmatized_book = self.perform_lemmatization(book, filename)
self.lemmatized_books.append(lemmatized_book)
self.Helper.write_file(lemmatized_book, filename, "l")
else:
print self.Helper.lemmatization_directory + filename, "already exists."
# tokenization currently without stopword removal
def tokenize(self, string):
book_lower_case = string.lower()
tokens = nltk.wordpunct_tokenize(book_lower_case)
tokens_filtered = filter(lambda t: t.isalpha(), tokens)
return tokens_filtered
def perform_stemming(self, book, filename):
print "INFO: STEMMING", filename
stemmed_book = ""
for word in book.split():
stemmed_word = self.Stemmer.stem(word)
stemmed_book += stemmed_word + " "
return stemmed_book
def perform_lemmatization(self, book, filename):
print "INFO: LEMMATIZING", filename
lemmatized_book = ""
for word in book.split():
lemmatized_word = self.Lemmatizer.lemmatize(word)
lemmatized_book += lemmatized_word + " "
return lemmatized_book
def count_distinct_tokens(self, tokens):
return set(tokens)
def count_documents(self):
return len(self.filtered_books)
def count_all_tokens_of_all_books(self):
return sum(self.all_tokens_counts)
def count_distinct_tokens_of_all_books(self):
return sum(self.distinct_tokens_counts)
def count_distinct_tokens_of_all_books_with_stemming(self):
return sum(self.distinct_tokens_counts_with_stemming)
def count_distinct_tokens_of_all_books_with_lemmatization(self):
return sum(self.distinct_tokens_counts_with_lemmatization)
def calculate_word_frequencies(self, identifier):
combined_books = ""
if identifier == "f":
for book in self.filtered_books:
combined_books += book + "\n"
elif identifier == "s":
for book in self.stemmed_books:
combined_books += book + "\n"
#.........这里部分代码省略.........