本文整理汇总了Python中scipy.io.mmwrite函数的典型用法代码示例。如果您正苦于以下问题:Python mmwrite函数的具体用法?Python mmwrite怎么用?Python mmwrite使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mmwrite函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: RWWR
def RWWR(alpha, nbBasket, nbReco):
data = load()
###############################################################
# CREATE MODELS
###############################################################
print 'Create the model based on the training set'
modelRWWR = processing.RandomWalkWithRestartRecoModel(data.getUserItemMatrix(), alpha)
###############################################################
# SET RECOMMENDATION
###############################################################
if nbBasket == -1:
evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList(), nbReco)
else :
evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList()[:nbBasket], nbReco)
###############################################################
# LAUNCH RECOMMENDATION + SAVE RESULTS
###############################################################
t = time.time()
evalRWWR.newEval()
RWWRTime = time.time()-t
mmwrite('RWWR_a%s_nb%s'%(alpha, nbBasket),evalRWWR.perf)
print 'RWWR Execution time:', RWWRTime
print 'Performances :'
print evalRWWR.testNames
print evalRWWR.meanPerf()
evalRWWR.savePerf('RWWR_a%s_nb%s'%(alpha, nbBasket))
return evalRWWR
示例2: genJaccard
def genJaccard(feature_matrix):
jaccard_matrix_pre = []
#jaccard_matrix_pre is a list of arrays that contain non-zero indicies of each article in the corpus
for i in feature_matrix[0:test_num]:
indicies = np.flatnonzero(i)
jaccard_matrix_pre.append(indicies)
S=sparse.dok_matrix((test_num, test_num))
t0=time.time()
numi=0
for i in jaccard_matrix_pre:
jnum=0
for j in jaccard_matrix_pre[0:numi+1]: #decrease number of calculations to n choose 2 instead of n^2
diviser = float(len(set(i).union(set(j))))
if diviser != 0:
actual_jaccard = float(len(set(i).intersection(set(j))))/diviser
if actual_jaccard != 0 and actual_jaccard!=1:
S[numi,jnum] = actual_jaccard
jnum=jnum+1
numi = numi+1
with open('pickled_minhash/actual_jaccard_matrix_small.mtx', 'wb') as f:
#size of feature_matrix_large: 1261 x 19043
io.mmwrite(f, S)
print("TIME to generate jaccard_matrix: {}".format(time.time()-t0))
示例3: test1D2
def test1D2():
spl = splineRefMat(DIM_1D)
# list_r = list(np.random.random(20))
list_r = [0.1,0.2,0.3]
nx = 3
px = 2
geo = line(n=[nx], p=[px])
nrb = geo[0]
knots = nrb.knots[0]
n = nrb.shape[0]
p = nrb.degree[0]
P = nrb.points
M = spl.construct(list_r, p, n, knots)
from scipy.io import mmwrite
mmwrite('M.mtx', M)
R = M.dot(nrb.points[:,0])
geo = line(n=[nx], p=[px])
geo.refine(id=0, list_t=[list_r])
nrb = geo[0]
P = np.asarray(nrb.points[:,0])
assert(np.allclose(P,R))
print("test1D2: OK")
示例4: run
def run(self, ratio, input_db, output_mat):
db = sqlite3.connect(input_db)
# assume no empty users
users = db.execute("""SELECT Users.[Id] FROM Users""").fetchall()
# pick <ratio> of them for training db, pick <ratio/10> of them for test db
train_ids = []
test_ids = []
test_threshold = ratio/10
train_threshold = test_threshold + ratio
for u in users:
rnd = random.random()
if (rnd <= test_threshold):
test_ids.append(u[0])
elif (rnd <= train_threshold):
train_ids.append(u[0])
train_matrix = self.data_to_matrix(db, train_ids).tocsc()
test_matrix = self.data_to_matrix(db, test_ids).tocsc()
(train_matrix, test_matrix) = self.trim_matrices(train_matrix, test_matrix)
savemat(output_mat, {'train' : train_matrix,'test' : test_matrix}, oned_as = 'row')
mmwrite(output_mat + '.train', train_matrix)
mmwrite(output_mat + '.test', test_matrix)
print("Done!")
示例5: __init__
def __init__(self, programEntities, sim=ssd.correlation):
cleaner = DataCleaner()
nusers = len(programEntities.userIndex.keys())
fin = open("../Data/users.csv", 'rb')
colnames = fin.readline().strip().split(",")
self.userMatrix = ss.dok_matrix((nusers, len(colnames) - 1))
for line in fin:
cols = line.strip().split(",")
# consider the user only if he exists in train.csv
if programEntities.userIndex.has_key(cols[0]):
i = programEntities.userIndex[cols[0]]
self.userMatrix[i, 0] = cleaner.getLocaleId(cols[1])
self.userMatrix[i, 1] = cleaner.getBirthYearInt(cols[2])
self.userMatrix[i, 2] = cleaner.getGenderId(cols[3])
self.userMatrix[i, 3] = cleaner.getJoinedYearMonth(cols[4])
self.userMatrix[i, 4] = cleaner.getCountryId(cols[5])
self.userMatrix[i, 5] = cleaner.getTimezoneInt(cols[6])
fin.close()
# normalize the user matrix
self.userMatrix = normalize(self.userMatrix, norm="l1", axis=0, copy=False)
sio.mmwrite("../Models/US_userMatrix", self.userMatrix)
# calculate the user similarity matrix and save it for later
self.userSimMatrix = ss.dok_matrix((nusers, nusers))
for i in range(0, nusers):
self.userSimMatrix[i, i] = 1.0
for u1, u2 in programEntities.uniqueUserPairs:
i = programEntities.userIndex[u1]
j = programEntities.userIndex[u2]
if not self.userSimMatrix.has_key((i, j)):
usim = sim(self.userMatrix.getrow(i).todense(),
self.userMatrix.getrow(j).todense())
self.userSimMatrix[i, j] = usim
self.userSimMatrix[j, i] = usim
sio.mmwrite("../Models/US_userSimMatrix", self.userSimMatrix)
示例6: make_author_vectors
def make_author_vectors(crawl_fname, doc_vec_fname, auth_vec_fname):
docs = np.load(doc_vec_fname)
doc_vecs = docs["vectors"][()]
# Convert to LIL, because modifying CSR is slow
doc_vecs = doc_vecs.tolil()
# Create mapping from label (=DOI) to row number (=doc vector)
doi2n = dict((l,i) for i,l in enumerate(docs["labels"]))
# Collect authors
tree = etree.parse(crawl_fname)
authors = np.array(list(set(tree.xpath("//author/text()"))))
# Create empty author vectors
shape = (len(authors), doc_vecs.shape[1])
auth_vecs = sp.lil_matrix(shape)
# Create mapping from authors to row number (=author vector)
auth2n = dict((a,i) for i,a in enumerate(authors))
## author to group mapping
##auth2group = {}
# Fill author vectors by adding doc vectors
for item in tree.findall("//item"):
author = item.find("author").text
##group = item.find("group")
##auth2group[author] = group
url = item.find("url").text
query = urlparse.urlparse(url).query
doi = urlparse.parse_qs(query)["doi"][0]
log.debug(u"DOI={} author={}".format(doi, author))
try:
auth_vecs[auth2n[author]] += doc_vecs[doi2n[doi]]
except KeyError:
log.warning(u"No document with DOI={} for author {}".format(
doi, author))
auth_vecs = auth_vecs.tocsr()
##group_labels = [auth2group[auth] for auth in authors]
log.info("saving matrix in Numpy format to " + auth_vec_fname)
np.savez(auth_vec_fname,
vectorizer=docs["vectorizer"],
vectors=auth_vecs,
author_labels=authors,
##group_labels=group_labels
)
base_fname = splitext(auth_vec_fname)[0]
mm_fname = base_fname + ".mtx"
log.info("saving matrix in Matrix Market format to " + mm_fname)
mmwrite(mm_fname, auth_vecs, "IDIScape document vectors", "integer")
label_fname = base_fname + "_labels.txt"
log.info("saving labels to " + label_fname)
open(label_fname, "w", "utf8").write(u"\n".join(authors))
示例7: RW_POP
def RW_POP(alpha, nbBasket, nbReco):
data = load()
###############################################################
# CREATE MODELS
###############################################################
print 'Create the model based on the training set'
modelRW = processing.BasketRandomWalk_POP(data.getUserItemMatrix(), alpha)
###############################################################
# SET RECOMMENDATION
###############################################################
if nbBasket == -1:
evalRW = processing.Evaluation(modelRW, data.getBasketItemList(), nbReco)
else :
evalRW = processing.Evaluation(modelRW, data.getBasketItemList()[:nbBasket], nbReco)
###############################################################
# LAUNCH RECOMMENDATION + SAVE RESULTS
###############################################################
t = time.time()
evalRW.newEval()
RWTime = time.time()-t
mmwrite(resultFolder+'RW_POP_a%s_nb%s'%(alpha, nbBasket),evalRW.perf)
print 'RW_POP Execution time:', RWTime
print 'Performances :'
print evalRW.testNames
print evalRW.computePerf()
evalRW.savePerf(resultFolder+'RW_POP_a%s_nb%s.txt'%(alpha, nbBasket))
return evalRW
示例8: make_doc_vectors
def make_doc_vectors(fname_pat, out_fname):
fnames = glob(fname_pat)
labels = [splitext(basename(fn))[0] for fn in fnames]
stop_words = frozenset(list(ENGLISH_STOP_WORDS) + OTHER_STOPWORDS)
vectorizer = CountVectorizer(input="filename",
ngram_range=(1,3),
min_df=5,
max_df=0.7,
stop_words=stop_words,
token_pattern=r"(?u)\b[A-Za-z]\w+\b")
vectors = vectorizer.fit_transform(fnames)
log.info("saving matrix in Numpy format to " + out_fname)
np.savez(out_fname,
vectorizer=vectorizer,
vectors=vectors,
labels=labels)
base_fname = splitext(out_fname)[0]
mm_fname = base_fname + ".mtx"
log.info("saving matrix in Matrix Market format to " + mm_fname)
mmwrite(mm_fname, vectors, "IDIScape document vectors", "integer")
feat_fname = base_fname + "_features.txt"
log.info("saving features to " + feat_fname)
feat_names = vectorizer.get_feature_names()
open(feat_fname, "w", "utf8").write(u"\n".join(feat_names))
label_fname = base_fname + "_labels.txt"
log.info("saving labels to " + label_fname)
open(label_fname, "w", "utf8").write(u"\n".join(labels))
示例9: main
def main():
"""
Main entry point to script to perform kmeans.
Returns:
- `0` or `1` on success or failure respectively.
- Saves `centroids`, `centroiddict`, and `clusters` in working dir.
"""
parser = gen_args()
args = parser.parse_args()
sessionid = args.sessionid
data = spio.mmread(args.data).tocsc()
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
if args.verbose:
logger.setLevel(logging.DEBUG)
if args.k:
k = args.k
kmeans = KMeans(data, k, args.n, args.delta, args.randomcentroids, \
args.classical, args.verbose)
result = kmeans.run()
clusters = result['clusters']
centroids = result['centroids']
centroiddict = result['centroiddict']
cPickle.dump(clusters, open("data_clusters_" + sessionid + '.pck', 'w'))
cPickle.dump(centroiddict, open("centroid_dict_" + \
sessionid + '.pck', 'w'))
spio.mmwrite(open("data_centroids_" + sessionid + '.mtx', 'w'), \
centroids, comment="CSC Matrix", field='real')
logger.info(" %d Clusters Generated ", len(clusters))
return 0
示例10: __init__
def __init__(self, programEvents):
nevents = len(programEvents.eventIndex.keys())
self.eventPopularity = ss.dok_matrix((nevents, 5))
self.eventAttendees = collections.defaultdict(list)
f = open("/users/chaitanya/PyCharmProjects/EventRec/data/event_attendees.csv", 'rb')
f.readline() # skip header
for line in f:
cols = line.strip().split(",")
eventId = cols[0]
if programEvents.eventIndex.has_key(eventId):
i = programEvents.eventIndex[eventId]
self.eventPopularity[i, 0] = len(cols[1].split(" ")) - len(cols[4].split(" ")) # number of yes-no
self.eventPopularity[i, 1] = len(cols[3].split(" ")) # number of invited folks
self.eventAttendees[i].append(cols[1].split(" "))
#list of yes folks
self.eventAttendees[i].append(cols[2].split(" ")) #list of no folks
self.eventAttendees[i].append(cols[3].split(" ")) #list of invited folks
f.close()
self.eventPopularity = normalize(self.eventPopularity, norm="l1",axis=0, copy=False)
sio.mmwrite("/users/chaitanya/PyCharmProjects/EventRec/Models/EA_eventPopularity", self.eventPopularity)
cPickle.dump(self.eventAttendees, open("/users/chaitanya/PyCharmProjects/EventRec/Models/PE_eventAttendees.pkl", 'wb'))
示例11: encode
def encode() :
"""
Generate extra features from pairs, triplets, and common
quadruplets of the existing features and then save those features
in a sparse matrix to disk.
"""
dftrain = load_dataframe('train')
dftest = load_dataframe('test')
lentrain = len(dftrain)
all_data = np.vstack((dftrain.ix[:,1:-1], dftest.ix[:,1:-1]))
np.array(dftrain.ACTION).dump('{}/train_truth.dat'.format(ddir))
dp = group_data(all_data, degree=2, remove_unique=True)
dt = group_data(all_data, degree=3, remove_unique=True)
dq = group_data(all_data, degree=4, remove_unique=True)
dq = remove_rare(dq, 15)
X = all_data[:lentrain]
X_2 = dp[:lentrain]
X_3 = dt[:lentrain]
X_4 = dq[:lentrain]
X_train_all = np.hstack((X, X_2, X_3, X_4))
mmwrite('{}/train_encoded'.format(ddir), X_train_all)
X_test = all_data[lentrain:]
X_test_2 = dp[lentrain:]
X_test_3 = dt[lentrain:]
X_test_4 = dq[lentrain:]
X_test_all = np.hstack((X_test, X_test_2, X_test_3, X_test_4))
mmwrite('{}/test_encoded'.format(ddir), X_test_all)
示例12: get_content_similarity_scores
def get_content_similarity_scores(readmes, dataset_dir, profile="tfidf",
similarity="cos"):
"""Return CSR matrix of similarity_{r,r} for all r in `readmes`.
`dataset_dir` the directory where the similarity scores are
`profile` bool or tfidf
`similarity` cos or ijd (inverse Jacquard Distance)
"""
if profile == "tfidf":
sim_fn = join(dataset_dir, TF_IDF_FN)
if exists(sim_fn):
return mmread(sim_fn).tocsr()
if profile == "bool":
#readme_words = COUNTVECTORIZER readmes
pass
else:
tfidf = TfidfVectorizer(input='file', #sublinear_tf=True,
max_df=0.5, stop_words='english',
decode_error="ignore")
#max_df=0.5: if a word occurs in more than half of the readmes it is
# ignored
readme_words = tfidf.fit_transform(readmes)
if similarity == "cos":
similarity_scores = csr_matrix(cosine_similarity(readme_words))
else:
# similarity_scores = csr_matrix(ijd(readme_words))
pass
mmwrite(sim_fn, similarity_scores, comment=profile+"_"+similarity+"_similarity_{r,r}")
return similarity_scores
示例13: store_matrix
def store_matrix(matrix='',
output_dir_path='',
out_file_name='',
output_format=''):
"""store_matrix."""
if not os.path.exists(output_dir_path):
os.mkdir(output_dir_path)
full_out_file_name = os.path.join(output_dir_path, out_file_name)
if output_format == "MatrixMarket":
if len(matrix.shape) == 1:
raise Exception(
"'MatrixMarket' format supports only 2D dimensional array\
and not vectors")
else:
io.mmwrite(full_out_file_name, matrix, precision=None)
elif output_format == "numpy":
np.save(full_out_file_name, matrix)
elif output_format == "joblib":
joblib.dump(matrix, full_out_file_name)
elif output_format == "text":
with open(full_out_file_name, "w") as f:
if len(matrix.shape) == 1:
for x in matrix:
f.write("%s\n" % (x))
else:
raise Exception(
"'text' format supports only mono dimensional array\
and not matrices")
logger.info("Written file: %s" % full_out_file_name)
示例14: SOP
def SOP(alpha, teta, nbBasket, nbReco):
data = load()
###############################################################
# CREATE MODELS
###############################################################
print 'Create the model based on the training set'
modelSOP = processing.SOPRecoModel(data.getUserItemMatrix(), alpha, teta)
modelSOP.launch()
###############################################################
# SET RECOMMENDATION
###############################################################
if nbBasket == -1:
evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList(), nbReco)
else :
evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList()[:nbBasket], nbReco)
###############################################################
# LAUNCH RECOMMENDATION + SAVE RESULTS
###############################################################
t = time.time()
evalSOP.newEval()
SOPTime = time.time()-t
mmwrite('SOPPerf_a%s_t%s_nb%s_nr%s'%(alpha,teta,nbBasket,nbReco),evalSOP.perf)
print 'SOP Execution time:', SOPTime
print 'Performances : '
print evalSOP.testNames
print evalSOP.meanPerf()
evalSOP.savePerf('SOPPerf_a%s_t%s_nb%s_nr%s.txt'%(alpha,teta,nbBasket,nbReco))
return evalSOP
示例15: save_new_ref
def save_new_ref(filename, data):
""" Saves a new version of the reference data, and backs up the old """
ext = filename.split('.')[-1]
if (data == None):
print("WARNING: Error generating file: %s" % filename)
print("Skipped... try again.")
return
if os.path.exists(filename):
os.system( 'mv %s %s' % (filename, BACKUP_DIR) )
if ext in ['h5', 'lh5']:
if scipy.sparse.issparse(data):
data = data.toarray()
Serializer.SaveData(filename, data)
elif ext == 'mtx':
io.mmwrite(filename, data)
elif ext == 'pkl':
f = open(filename, 'w')
pickle.dump(f, data)
f.close()
else:
raise ValueError('Could not understand extension (.%s) for %s' % (ext, filename))
return