本文整理汇总了Python中config.DATA_PATH属性的典型用法代码示例。如果您正苦于以下问题:Python config.DATA_PATH属性的具体用法?Python config.DATA_PATH怎么用?Python config.DATA_PATH使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类config
的用法示例。
在下文中一共展示了config.DATA_PATH属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: obj_edge_vectors
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def obj_edge_vectors(names, wv_type='glove.6B', wv_dir=DATA_PATH, wv_dim=300):
wv_dict, wv_arr, wv_size = load_word_vectors(wv_dir, wv_type, wv_dim)
vectors = torch.Tensor(len(names), wv_dim)
vectors.normal_(0,1)
for i, token in enumerate(names):
wv_index = wv_dict.get(token, None)
if wv_index is not None:
vectors[i] = wv_arr[wv_index]
else:
# Try the longest word (hopefully won't be a preposition
lw_token = sorted(token.split(' '), key=lambda x: len(x), reverse=True)[0]
print("{} -> {} ".format(token, lw_token))
wv_index = wv_dict.get(lw_token, None)
if wv_index is not None:
vectors[i] = wv_arr[wv_index]
else:
print("fail on {}".format(token))
return vectors
示例2: save_x_y
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def save_x_y(fold_index, train_x, train_y):
_get = lambda x, l: [x[i] for i in l]
for i in range(len(fold_index)):
print("now part %d" % (i+1))
part_index = fold_index[i]
Xv_train_, y_train_ = _get(train_x, part_index), _get(train_y, part_index)
save_dir_Xv = config.DATA_PATH + "part" + str(i+1) + "/"
save_dir_y = config.DATA_PATH + "part" + str(i+1) + "/"
if (os.path.exists(save_dir_Xv) == False):
os.makedirs(save_dir_Xv)
if (os.path.exists(save_dir_y) == False):
os.makedirs(save_dir_y)
save_path_Xv = save_dir_Xv + train_x_name
save_path_y = save_dir_y + train_y_name
np.save(save_path_Xv, Xv_train_)
np.save(save_path_y, y_train_)
# def save_test(test_x, test_y):
# np.save("../data/test/test_x.npy", test_x)
# np.save("../data/test/test_y.npy", test_y)
示例3: main
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def main():
train_x, train_y = _load_data()
print('loading data done!')
folds = list(StratifiedKFold(n_splits=10, shuffle=True,
random_state=config.RANDOM_SEED).split(train_x, train_y))
fold_index = []
for i,(train_id, valid_id) in enumerate(folds):
fold_index.append(valid_id)
print("fold num: %d" % (len(fold_index)))
fold_index = np.array(fold_index)
np.save(config.DATA_PATH + "fold_index.npy", fold_index)
save_x_y(fold_index, train_x, train_y)
print("save train_x_y done!")
fold_index = np.load(config.DATA_PATH + "fold_index.npy")
save_i(fold_index)
print("save index done!")
示例4: save_x_y
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def save_x_y(fold_index, train_x, train_y):
_get = lambda x, l: [x[i] for i in l]
for i in range(len(fold_index)):
print("now part %d" % (i+1))
part_index = fold_index[i]
Xv_train_, y_train_ = _get(train_x, part_index), _get(train_y, part_index)
save_dir_Xv = config.DATA_PATH + "part" + str(i+1) + "/"
save_dir_y = config.DATA_PATH + "part" + str(i+1) + "/"
if (os.path.exists(save_dir_Xv) == False):
os.makedirs(save_dir_Xv)
if (os.path.exists(save_dir_y) == False):
os.makedirs(save_dir_y)
save_path_Xv = save_dir_Xv + train_x_name
save_path_y = save_dir_y + train_y_name
np.save(save_path_Xv, Xv_train_)
np.save(save_path_y, y_train_)
示例5: save_i
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def save_i(fold_index):
_get = lambda x, l: [x[i] for i in l]
train_i = pd.read_csv(config.TRAIN_I,header=None,sep=' ',nrows=None, dtype=np.int32)
train_i = train_i.values
feature_size = train_i.max() + 1
print ("feature_size = %d" % feature_size)
feature_size = [feature_size]
feature_size = np.array(feature_size)
np.save(config.DATA_PATH + "feature_size.npy", feature_size)
print("train_i size: %d" % len(train_i))
for i in range(len(fold_index)):
print("now part %d" % (i+1))
part_index = fold_index[i]
Xi_train_ = _get(train_i, part_index)
save_path_Xi = config.DATA_PATH + "part" + str(i+1)+ '/train_i.npy'
np.save(save_path_Xi, Xi_train_)
示例6: number_gold_credit
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def number_gold_credit():
credit = 0
db = TinyDB(config.DATA_PATH + 'reddit_gold.json')
data = db.all()
db.close()
for gold in data:
if gold['status'] == "buy":
# user have buy credits
credit = credit - int(gold['quantity'])
if gold['status'] == "refill":
# user have buy credits
credit = credit + int(gold['quantity'])
return credit
示例7: get_lines
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def get_lines():
id2line = {}
file_path = os.path.join(config.DATA_PATH, config.LINE_FILE)
print(config.LINE_FILE)
with open(file_path, 'r', errors='ignore') as f:
# lines = f.readlines()
# for line in lines:
i = 0
try:
for line in f:
parts = line.split(' +++$+++ ')
if len(parts) == 5:
if parts[4][-1] == '\n':
parts[4] = parts[4][:-1]
id2line[parts[0]] = parts[4]
i += 1
except UnicodeDecodeError:
print(i, line)
return id2line
示例8: index_subset
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def index_subset(subset):
"""Index a subset by looping through all of its files and recording relevant information.
# Arguments
subset: Name of the subset
# Returns
A list of dicts containing information about all the image files in a particular subset of the
Omniglot dataset dataset
"""
images = []
print('Indexing {}...'.format(subset))
# Quick first pass to find total for tqdm bar
subset_len = 0
for root, folders, files in os.walk(DATA_PATH + '/Omniglot/images_{}/'.format(subset)):
subset_len += len([f for f in files if f.endswith('.png')])
progress_bar = tqdm(total=subset_len)
for root, folders, files in os.walk(DATA_PATH + '/Omniglot/images_{}/'.format(subset)):
if len(files) == 0:
continue
alphabet = root.split('/')[-2]
class_name = '{}.{}'.format(alphabet, root.split('/')[-1])
for f in files:
progress_bar.update(1)
images.append({
'subset': subset,
'alphabet': alphabet,
'class_name': class_name,
'filepath': os.path.join(root, f)
})
progress_bar.close()
return images
示例9: scale_each_fold
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def scale_each_fold():
for i in range(1,11):
print('now part %d' % i)
data = np.load(config.DATA_PATH + 'part'+str(i)+'/train_x.npy')
part = data[:,0:13]
for j in range(part.shape[0]):
if j % 100000 ==0:
print(j)
part[j] = list(map(scale, part[j]))
np.save(config.DATA_PATH + 'part' + str(i) + '/train_x2.npy', data)
示例10: save_i
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def save_i(fold_index):
_get = lambda x, l: [x[i] for i in l]
train_i = pd.read_csv(config.TRAIN_I,header=None,sep=' ',nrows=None, dtype=np.int32)
train_i = train_i.values
feature_size = train_i.max() + 1
print ("feature_size = %d" % feature_size)
feature_size = [feature_size]
feature_size = np.array(feature_size)
np.save(config.DATA_PATH + "feature_size.npy", feature_size)
# pivot = 40000000
# test_i = train_i[pivot:]
# train_i = train_i[:pivot]
# print("test_i size: %d" % len(test_i))
print("train_i size: %d" % len(train_i))
# np.save("../data/test/test_i.npy", test_i)
for i in range(len(fold_index)):
print("now part %d" % (i+1))
part_index = fold_index[i]
Xi_train_ = _get(train_i, part_index)
save_path_Xi = config.DATA_PATH + "part" + str(i+1)+ '/train_i.npy'
np.save(save_path_Xi, Xi_train_)
示例11: query_time_speed_test
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def query_time_speed_test():
from tika import parser as tp
import re
import numpy as np
alec_bills = [json.loads(x) for x in open("{0}/model_legislation/alec_bills.json".format(DATA_PATH))]
test_queries = [base64.b64decode(s['source']) for s in alec_bills]
pattern = re.compile("[0-9]\.\s.*")
for i,t in enumerate(test_queries):
test_queries[i] = tp.from_buffer(t)['content']
test_queries[i] = " ".join(re.findall(pattern,test_queries[i]))
test_queries[i] = test_queries[i].split()
test_queries = [x for x in test_queries if len(x) >= 1500]
query_sizes = np.arange(50,1050,50)
ec = ElasticConnection()
avg_times = []
for query_size in query_sizes:
temp_times = []
for query in test_queries:
query = " ".join(query[0:query_size])
t1 = time.time()
ec.similar_doc_query(query,num_results = 1000)
temp_times.append(time.time()-t1)
avg_times.append(np.mean(temp_times))
print "query size {0} , avg time (s) {1}".format(query_size,np.mean(temp_times))
for i in avg_times:
print i
示例12: parallel_requests_test
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def parallel_requests_test():
alec_bills = [json.loads(x) for x in open("{0}/model_legislation/alec_bills.json".format(DATA_PATH))]
test_queries = [base64.b64decode(s['source']) for s in alec_bills]
pattern = re.compile("[0-9]\.\s.*")
for i,t in enumerate(test_queries):
test_queries[i] = tp.from_buffer(t)['content']
test_queries[i] = " ".join(re.findall(pattern,test_queries[i]))
#test_queries[i] = test_queries[i].split()
#test_queries[i] = " ".join(test_queries[i][0:200])
test_queries = test_queries[0:100]
ec = ElasticConnection()
serial_time = time.time()
for test_query in test_queries:
ec.similar_doc_query(test_query)
print "serial time: ",time.time()-serial_time
pool = Pool(processes=7)
parallel_time = time.time()
pool.map(parallel_query,test_queries)
print "parallel time: ",time.time()-parallel_time
exit()
## main function that manages unix interface
示例13: scrape_ALEC_model_legislation
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def scrape_ALEC_model_legislation():
url = 'http://www.alec.org/model-legislation/'
response = urllib2.urlopen(url).read()
bs = BeautifulSoup(response, 'html5')
# Get all links from website
ALEClist = []
for link in bs.find_all('a'):
if link.has_attr('href'):
ALEClist.append(link.attrs['href'])
# Filter list so that we have only the ones with model-legislation
ALEClinks = []
i = 0
for i in range(0, len(ALEClist)):
if ALEClist[i][20:38] == "model-legislation/":
ALEClinks.append(ALEClist[i])
i = i + 1
# To get only unique links (get rid off duplicates)
ALEClinks = set(ALEClinks)
# Save to json file
with open('{0}/data/model_legislation/alec_bills.json'.format(DATA_PATH, 'w')) as f:
for line in ALEClinks:
source = urllib2.urlopen(line).read()
url = line
date = 2015
Jsonbill = bill_source_to_json(url, source, date)
f.write("{0}\n".format(Jsonbill))
# Save old alec bills (from Center for the Media and Democracy)
示例14: sensor_index
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def sensor_index():
with open(os.path.join(DATA_PATH, 'sensor_graph/graph_sensor_ids.txt')) as f:
sensor_ids = f.read().strip().split(',')
sensor_idx = {}
for i, sensor_id in enumerate(sensor_ids):
sensor_idx[sensor_id] = i
return sensor_idx
示例15: sensor_location
# 需要导入模块: import config [as 别名]
# 或者: from config import DATA_PATH [as 别名]
def sensor_location():
sensor_idx = sensor_index()
sensor_locs = np.loadtxt(os.path.join(DATA_PATH, 'sensor_graph/graph_sensor_locations.csv'), delimiter=',', skiprows=1)
n = len(sensor_idx)
loc = np.zeros((n, 2))
for i in range(n):
id = str(int(sensor_locs[i, 1]))
loc[sensor_idx[id], :] = sensor_locs[i, 2:4]
return loc