本文整理汇总了Python中smart_open.open方法的典型用法代码示例。如果您正苦于以下问题:Python smart_open.open方法的具体用法?Python smart_open.open怎么用?Python smart_open.open使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类smart_open
的用法示例。
在下文中一共展示了smart_open.open方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __getitem__
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def __getitem__(self, i):
""" Returns the line indexed by i. Primarily used for
:meth:`~fse.models.sentencevectors.SentenceVectors.most_similar`
Parameters
----------
i : int
The line index used to index the file
Returns
-------
str
line at the current index
"""
if not self.get_able:
raise RuntimeError("To index the lines, you must contruct with get_able=True")
with open(self.path, "rb") as f:
f.seek(self.line_offset[i])
output = f.readline()
f.seek(0)
return any2unicode(output).rstrip()
示例2: bigrammer
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def bigrammer(source_file, outfile, mincount=100, threshold=0.99, scoring='npmi',
commonfile='common_tagged.txt'):
"""
:param source_file:
:param outfile:
:param mincount:
:param threshold:
:param scoring:
:param commonfile:
:return:
"""
common = set([word.strip() for word in open(commonfile, 'r').readlines()])
data = LineSentence(source_file)
bigram_transformer = Phrases(sentences=data, min_count=mincount, threshold=threshold,
scoring=scoring, max_vocab_size=400000000, delimiter=b':::',
progress_per=100000, common_terms=common)
bigrams = Phraser(bigram_transformer)
tempfile = open(outfile, 'a')
print('Writing bigrammed text to %s' % outfile, file=sys.stderr)
for i in bigrams[data]:
tempfile.write(' '.join(i) + '\n')
tempfile.close()
return len(bigrams.phrasegrams)
示例3: load_glove_from_file
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_glove_from_file(glove_filepath: Path) -> Tuple[Dict[str, int], np.array]:
w2i = {}
embeddings = []
with open(glove_filepath, "r") as fp:
iterator = tqdm(enumerate(fp), "Embeddings") if TQDM else enumerate(fp)
for index, line in iterator:
line = line.split(" ") # each line: word num1 num2 ...
w2i[line[0]] = index # word = line[0]
embedding_i = np.array([float(val) for val in line[1:]])
embeddings.append(embedding_i)
return w2i, np.stack(embeddings)
示例4: report
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def report(self, name: str, experiment: ExperimentConfig, report_dir: Path):
with open(report_dir / 'metrics_report.txt', 'w') as reporting:
reporting.write(f"Metrics reporting for experiment {name}\n")
reporting.write("#"*50 + '\n')
for mode, metrics in experiment['trainer'].metrics_history.items():
reporting.write(f"Reporting metrics in {mode} mode\n")
for metric, values in metrics.items():
reporting.write(f"{metric}: [{', '.join([str(value) for value in values])}]\n")
示例5: open_file
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def open_file(path, mode, num_tries=20, encoding='utf-8', auto_decompression=True):
import warnings
if is_s3_path(path) and 'r' in mode:
client = _get_fsclient_bypath(path)
client.wait_for_path(path)
with warnings.catch_warnings():
warnings.simplefilter('ignore')
import smart_open
nTry = 0
while nTry <= num_tries:
try:
# TODO: support append mode for s3
return smart_open.open(
path,
mode,
encoding=encoding,
ignore_ext=not auto_decompression,
transport_params=get_smart_open_transport_params(path)
)
except Exception as e:
if nTry >= num_tries:
raise
if 'w' in mode:
remove_file(path)
nTry = nTry + 1
time.sleep(1)
示例6: get_file_size
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def get_file_size(path):
client = _get_fsclient_bypath(path)
return client.get_file_size(path)
# @classmethod
# def openFile(cls, path, mode):
# client = cls._get_fsclient_bypath(path)
# return client.open(path, mode)
示例7: post_image_to_actress
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def post_image_to_actress(actress_id, image_f, emby_url, api_key):
with open(image_f, 'rb') as f:
b6_pic = base64.b64encode(f.read()) # 读取文件内容,转换为base64编码
url = f'{emby_url}emby/Items/{actress_id}/Images/Primary?api_key={api_key}'
if image_f.endswith('png'):
header = {"Content-Type": 'image/png', }
else:
header = {"Content-Type": 'image/jpeg', }
requests.post(url=url, data=b6_pic, headers=header)
print(f'successfully post actress ID: {actress_id} image')
return 1
示例8: load_model
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_model(self, model: str, model_path: str, max_seq_length: int):
try:
encoding = 'utf-8'
unicode_errors = 'strict'
model_file = [f for f in os.listdir(model_path) if os.path.isfile(os.path.join(model_path, f))]
f = open(os.path.join(model_path, model_file[0]), 'rb')
header = to_unicode(f.readline(), encoding=encoding)
vocab_size, vector_size = (int(x) for x in header.split()) # throws for invalid file format
binary_len = dtype(real).itemsize * vector_size
for _ in tqdm(range(vocab_size)):
word = []
while True:
ch = f.read(1)
if ch == b' ':
break
if ch == b'':
raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
if ch != b'\n': # ignore newlines in front of words (some binary files have)
word.append(ch)
word = to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
weights = fromstring(f.read(binary_len), dtype=real).astype(real)
self.word_vectors[word] = weights
self.model_name = model
self.max_seq_length = max_seq_length
print("Model loaded Successfully !")
return self
except Exception as e:
print('Error loading Model, ', str(e))
示例9: read_index
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def read_index(pack_index_path: str) -> Dict[str, str]:
page_idx: Dict[str, str] = {}
logging.info("Reading pack index from %s", pack_index_path)
with open(pack_index_path) as idx:
for page_name, page_path in csv.reader(idx, delimiter='\t'):
page_idx[page_name] = page_path
return page_idx
示例10: _parse_pack
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _parse_pack(
self, collection: Tuple[str, Dict[str, List[state_type]]]
) -> Iterator[DataPack]:
resource_name, info_box_data = collection
if resource_name in self.redirects:
resource_name = self.redirects[resource_name]
if resource_name in self.pack_index:
print_progress(f'Add infobox to resource: [{resource_name}]')
pack_path = os.path.join(
self.pack_dir,
self.pack_index[resource_name]
)
if os.path.exists(pack_path):
with open(pack_path) as pack_file:
pack = data_utils.deserialize(
self._pack_manager, pack_file.read())
add_info_boxes(pack, info_box_data['literals'])
add_info_boxes(pack, info_box_data['objects'])
add_property(pack, info_box_data['properties'])
yield pack
else:
print_notice(f"Resource {resource_name} is not in the raw packs.")
self.logger.warning("Resource %s is not in the raw packs.",
resource_name)
示例11: _build_offsets
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _build_offsets(self):
""" Builds an offset table to index the file """
with open(self.path, "rb") as f:
offset = f.tell()
for line in f:
self.line_offset.append(offset)
offset += len(line)
示例12: __iter__
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def __iter__(self):
"""Iterate through the lines in the source.
Yields
------
tuple : (list[str], int)
Tuple of list of string and index
"""
with open(self.path, "rb") as f:
for i, line in enumerate(f):
yield (any2unicode(line).split(), i)
示例13: load_object_from_file
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_object_from_file(path, use_local_cache=False):
import joblib
path_to_load = None
if is_s3_path(path):
if use_local_cache:
local_path = path.replace(
"s3://"+os.environ.get('S3_DATA_PATH'), os.environ.get("AUGER_LOCAL_TMP_DIR", ''))
#logging.info("Local cache path: %s"%local_path)
if not is_file_exists(local_path):
local_lock_path = local_path + '.lock'
create_parent_folder(local_lock_path)
f_lock = None
try:
f_lock = open(local_lock_path, 'x')
except Exception as e:
#logging.exception("Open lock file failed.")
pass
if f_lock:
try:
if not is_file_exists(local_path):
with save_atomic(local_path) as local_tmp_path:
logging.info("Download file from s3 to: %s, temp folder: %s" % (
local_path, local_tmp_path))
download_file(path, local_tmp_path)
finally:
f_lock.close()
remove_file(local_lock_path)
else:
wait_for_file(local_path, True,
num_tries=300, interval_sec=10)
path_to_load = local_path
else:
with save_atomic(path, move_file=False) as local_tmp_path:
download_file(path, local_tmp_path)
return joblib.load(local_tmp_path)
else:
path_to_load = path
return joblib.load(path_to_load)
示例14: load_df_from_s3
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def load_df_from_s3(
aws_key: str,
aws_secret: str,
bucket_name: str,
file_path: str,
skiprows: Optional[int] = 0,
skipfooter: Optional[int] = 0,
) -> pd.DataFrame:
"""Load data from a S3 bucket.
Given a file object, try to read the content and transform it into a data
frame.
It also tries to convert as many columns as possible to date/time format
(testing the conversion on every string column).
:param aws_key: Key to access the S3 bucket
:param aws_secret: Secret to access the S3 bucket
:param bucket_name: Bucket name
:param file_path: Path to access the file within the bucket
:param skiprows: Number of lines to skip at the top of the document
:param skipfooter: Number of lines to skip at the bottom of the document
:return: Resulting data frame, or an Exception.
"""
path_prefix = ''
if aws_key and aws_secret:
# If key/secret are given, create prefix
path_prefix = '{0}:{1}@'.format(aws_key, aws_secret)
if settings.ONTASK_TESTING:
uri = 'file:///{0}/{1}'.format(
bucket_name,
file_path)
else:
uri = 's3://{0}{1}/{2}'.format(
path_prefix,
bucket_name,
file_path)
data_frame = pd.read_csv(
smart_open.open(uri),
index_col=False,
infer_datetime_format=True,
quotechar='"',
skiprows=skiprows,
skipfooter=skipfooter,
encoding='utf-8',
)
# Strip white space from all string columns and try to convert to
# datetime just in case
return pandas.detect_datetime_columns(data_frame)
示例15: _parse
# 需要导入模块: import smart_open [as 别名]
# 或者: from smart_open import open [as 别名]
def _parse(self, path: Union[Path, str]) -> None:
section_name: str = ''
sample_header: Optional[List[str]] = None
with open(path, encoding=self._encoding) as handle:
lines = list(csv.reader(handle, skipinitialspace=True))
for i, line in enumerate(lines):
# Skip to next line if this line is empty to support formats of
# sample sheets with multiple newlines as section seperators.
#
# https://github.com/clintval/sample-sheet/issues/46
#
if not ''.join(line).strip():
continue
# Raise exception if we encounter invalid characters.
if any(
character not in VALID_ASCII
for character in set(''.join(line))
):
raise ValueError(
f'Sample sheet contains invalid characters on line '
f'{i + 1}: {"".join(line)}'
)
header_match = self._section_header_re.match(line[0])
# If we enter a section save it's name and continue to next line.
if header_match:
section_name, *_ = header_match.groups()
if (
section_name not in self._sections
and section_name not in REQUIRED_SECTIONS
):
self.add_section(section_name)
continue
# [Reads] - vertical list of integers.
if section_name == 'Reads':
self.Reads.append(int(line[0]))
continue
# [Data] - delimited data with the first line a header.
elif section_name == 'Data':
if sample_header is not None:
self.add_sample(Sample(dict(zip(sample_header, line))))
elif any(key == '' for key in line):
raise ValueError(
f'Header for [Data] section is not allowed to '
f'have empty fields: {line}'
)
else:
sample_header = line
continue
# [<Other>] - keys in first column and values in second column.
elif len(line) >= 2:
key, value = (line[0], line[1])
section: Section = getattr(self, section_name)
section[key] = value