当前位置: 首页>>代码示例>>Python>>正文


Python io.mmwrite函数代码示例

本文整理汇总了Python中scipy.io.mmwrite函数的典型用法代码示例。如果您正苦于以下问题:Python mmwrite函数的具体用法?Python mmwrite怎么用?Python mmwrite使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了mmwrite函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: RWWR

def RWWR(alpha, nbBasket, nbReco):
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelRWWR = processing.RandomWalkWithRestartRecoModel(data.getUserItemMatrix(), alpha)
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList(), nbReco)
	else :
		evalRWWR = processing.Evaluation(modelRWWR, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalRWWR.newEval()
	RWWRTime = time.time()-t
	mmwrite('RWWR_a%s_nb%s'%(alpha, nbBasket),evalRWWR.perf) 
	
	print 'RWWR Execution time:', RWWRTime
	print 'Performances :'
	print evalRWWR.testNames
	print evalRWWR.meanPerf()
	evalRWWR.savePerf('RWWR_a%s_nb%s'%(alpha, nbBasket))
	return evalRWWR
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:31,代码来源:Experiences_modif.py

示例2: genJaccard

def genJaccard(feature_matrix):
   
    jaccard_matrix_pre = []

    #jaccard_matrix_pre is a list of arrays that contain non-zero indicies of each article in the corpus
    for i in feature_matrix[0:test_num]:
        indicies = np.flatnonzero(i)
        jaccard_matrix_pre.append(indicies)
    
    S=sparse.dok_matrix((test_num, test_num))
    t0=time.time()
    numi=0
    for i in jaccard_matrix_pre:
        jnum=0
        for j in jaccard_matrix_pre[0:numi+1]: #decrease number of calculations to n choose 2 instead of n^2
            diviser = float(len(set(i).union(set(j))))
            if diviser != 0:
                actual_jaccard = float(len(set(i).intersection(set(j))))/diviser
                if actual_jaccard != 0 and actual_jaccard!=1:
                    S[numi,jnum] = actual_jaccard
            jnum=jnum+1
        numi = numi+1          
    with open('pickled_minhash/actual_jaccard_matrix_small.mtx', 'wb') as f:
        #size of feature_matrix_large: 1261 x 19043
        io.mmwrite(f, S)
    
    print("TIME to generate jaccard_matrix: {}".format(time.time()-t0))
开发者ID:rosensts,项目名称:reuters-articles-machine-learning,代码行数:27,代码来源:minhash.py

示例3: test1D2

    def test1D2():
        spl = splineRefMat(DIM_1D)
    #    list_r = list(np.random.random(20))
        list_r = [0.1,0.2,0.3]

        nx = 3
        px = 2
        geo = line(n=[nx], p=[px])

        nrb     = geo[0]
        knots   = nrb.knots[0]
        n       = nrb.shape[0]
        p       = nrb.degree[0]
        P       = nrb.points

        M = spl.construct(list_r, p, n, knots)
        from scipy.io import mmwrite
        mmwrite('M.mtx', M)
        R = M.dot(nrb.points[:,0])

        geo = line(n=[nx], p=[px])
        geo.refine(id=0, list_t=[list_r])
        nrb     = geo[0]
        P = np.asarray(nrb.points[:,0])

        assert(np.allclose(P,R))
        print("test1D2: OK")
开发者ID:sommaric,项目名称:caid,代码行数:27,代码来源:extraction.py

示例4: run

    def run(self, ratio, input_db, output_mat):
        db = sqlite3.connect(input_db)
        # assume no empty users
        users = db.execute("""SELECT Users.[Id] FROM Users""").fetchall()
        # pick <ratio> of them for training db, pick <ratio/10> of them for test db
        train_ids = []
        test_ids = []

        test_threshold = ratio/10
        train_threshold = test_threshold + ratio
        for u in users:
            rnd = random.random()
            if (rnd <= test_threshold):
                test_ids.append(u[0])
            elif (rnd <= train_threshold):
                train_ids.append(u[0])

        train_matrix = self.data_to_matrix(db, train_ids).tocsc()
        test_matrix = self.data_to_matrix(db, test_ids).tocsc()

        (train_matrix, test_matrix) = self.trim_matrices(train_matrix, test_matrix)

        savemat(output_mat, {'train' : train_matrix,'test' : test_matrix}, oned_as = 'row')
        mmwrite(output_mat + '.train', train_matrix)
        mmwrite(output_mat + '.test', test_matrix)
        print("Done!")
开发者ID:vosen,项目名称:Juiz,代码行数:26,代码来源:pick_test_db.py

示例5: __init__

 def __init__(self, programEntities, sim=ssd.correlation):
   cleaner = DataCleaner()
   nusers = len(programEntities.userIndex.keys())
   fin = open("../Data/users.csv", 'rb')
   colnames = fin.readline().strip().split(",")
   self.userMatrix = ss.dok_matrix((nusers, len(colnames) - 1))
   for line in fin:
     cols = line.strip().split(",")
     # consider the user only if he exists in train.csv
     if programEntities.userIndex.has_key(cols[0]):
       i = programEntities.userIndex[cols[0]]
       self.userMatrix[i, 0] = cleaner.getLocaleId(cols[1])
       self.userMatrix[i, 1] = cleaner.getBirthYearInt(cols[2])
       self.userMatrix[i, 2] = cleaner.getGenderId(cols[3])
       self.userMatrix[i, 3] = cleaner.getJoinedYearMonth(cols[4])
       self.userMatrix[i, 4] = cleaner.getCountryId(cols[5])
       self.userMatrix[i, 5] = cleaner.getTimezoneInt(cols[6])
   fin.close()
   # normalize the user matrix
   self.userMatrix = normalize(self.userMatrix, norm="l1", axis=0, copy=False)
   sio.mmwrite("../Models/US_userMatrix", self.userMatrix)
   # calculate the user similarity matrix and save it for later
   self.userSimMatrix = ss.dok_matrix((nusers, nusers))
   for i in range(0, nusers):
     self.userSimMatrix[i, i] = 1.0
   for u1, u2 in programEntities.uniqueUserPairs:
     i = programEntities.userIndex[u1]
     j = programEntities.userIndex[u2]
     if not self.userSimMatrix.has_key((i, j)):
       usim = sim(self.userMatrix.getrow(i).todense(),
         self.userMatrix.getrow(j).todense())
       self.userSimMatrix[i, j] = usim
       self.userSimMatrix[j, i] = usim
   sio.mmwrite("../Models/US_userSimMatrix", self.userSimMatrix)
开发者ID:ChrisBg,项目名称:mlia-examples,代码行数:34,代码来源:BaseData.py

示例6: make_author_vectors

def make_author_vectors(crawl_fname, doc_vec_fname, auth_vec_fname):
    docs = np.load(doc_vec_fname)
    doc_vecs = docs["vectors"][()]
    # Convert to LIL, because modifying CSR is slow
    doc_vecs = doc_vecs.tolil()
    
    # Create mapping from label (=DOI) to row number (=doc vector)  
    doi2n = dict((l,i) for i,l in enumerate(docs["labels"]))
    
    # Collect authors         
    tree = etree.parse(crawl_fname)
    authors = np.array(list(set(tree.xpath("//author/text()"))))

    # Create empty author vectors
    shape = (len(authors), doc_vecs.shape[1])
    auth_vecs = sp.lil_matrix(shape)     
    
    # Create mapping from authors to row number (=author vector)
    auth2n = dict((a,i) for i,a in enumerate(authors))
    
    ## author to group mapping
    ##auth2group = {}
    
    # Fill author vectors by adding doc vectors 
    for item in tree.findall("//item"):
        author = item.find("author").text
        ##group = item.find("group")
        ##auth2group[author] = group
        url = item.find("url").text
        query = urlparse.urlparse(url).query
        doi = urlparse.parse_qs(query)["doi"][0]
        log.debug(u"DOI={} author={}".format(doi, author))
        
        try:
            auth_vecs[auth2n[author]] += doc_vecs[doi2n[doi]]
        except KeyError:
            log.warning(u"No document with DOI={} for author {}".format(
                doi, author))
            
    auth_vecs = auth_vecs.tocsr()
    
    ##group_labels = [auth2group[auth] for auth in authors]
           
    log.info("saving matrix in Numpy format to " + auth_vec_fname)
    np.savez(auth_vec_fname, 
             vectorizer=docs["vectorizer"],
             vectors=auth_vecs,
             author_labels=authors,
             ##group_labels=group_labels
             ) 
    
    base_fname = splitext(auth_vec_fname)[0]
    
    mm_fname = base_fname + ".mtx"
    log.info("saving matrix in Matrix Market format to " + mm_fname)
    mmwrite(mm_fname, auth_vecs, "IDIScape document vectors", "integer")
    
    label_fname = base_fname + "_labels.txt"
    log.info("saving labels to " + label_fname)
    open(label_fname, "w", "utf8").write(u"\n".join(authors))    
开发者ID:emsrc,项目名称:idiscape,代码行数:60,代码来源:make_author_vecs.py

示例7: RW_POP

def RW_POP(alpha, nbBasket, nbReco):
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelRW = processing.BasketRandomWalk_POP(data.getUserItemMatrix(), alpha)
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalRW = processing.Evaluation(modelRW, data.getBasketItemList(), nbReco)
	else :
		evalRW = processing.Evaluation(modelRW, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalRW.newEval()
	RWTime = time.time()-t
	mmwrite(resultFolder+'RW_POP_a%s_nb%s'%(alpha, nbBasket),evalRW.perf) 
	
	print 'RW_POP Execution time:', RWTime
	print 'Performances :'
	print evalRW.testNames
	print evalRW.computePerf()
	evalRW.savePerf(resultFolder+'RW_POP_a%s_nb%s.txt'%(alpha, nbBasket))
	return evalRW
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:31,代码来源:Experiences.py

示例8: make_doc_vectors

def make_doc_vectors(fname_pat, out_fname):
    fnames = glob(fname_pat)
    labels = [splitext(basename(fn))[0] for fn in fnames]
    stop_words = frozenset(list(ENGLISH_STOP_WORDS) + OTHER_STOPWORDS)
    vectorizer = CountVectorizer(input="filename", 
                                 ngram_range=(1,3),
                                 min_df=5, 
                                 max_df=0.7,
                                 stop_words=stop_words,
                                 token_pattern=r"(?u)\b[A-Za-z]\w+\b")
    vectors = vectorizer.fit_transform(fnames)
    
    log.info("saving matrix in Numpy format to " + out_fname)
    np.savez(out_fname, 
             vectorizer=vectorizer,
             vectors=vectors,
             labels=labels)
    
    base_fname = splitext(out_fname)[0]
    
    mm_fname = base_fname + ".mtx"
    log.info("saving matrix in Matrix Market format to " + mm_fname)
    mmwrite(mm_fname, vectors, "IDIScape document vectors", "integer")

    feat_fname = base_fname + "_features.txt"
    log.info("saving features to " + feat_fname)
    feat_names = vectorizer.get_feature_names() 
    open(feat_fname, "w", "utf8").write(u"\n".join(feat_names))
    
    label_fname = base_fname + "_labels.txt"
    log.info("saving labels to " + label_fname)
    open(label_fname, "w", "utf8").write(u"\n".join(labels))    
开发者ID:emsrc,项目名称:idiscape,代码行数:32,代码来源:make_doc_vectors.py

示例9: main

def main():
    """
        Main entry point to script to perform kmeans.

        Returns:

        - `0` or `1` on success or failure respectively.
        - Saves `centroids`, `centroiddict`, and `clusters` in working dir.

    """
    parser = gen_args()
    args = parser.parse_args()
    sessionid = args.sessionid
    data = spio.mmread(args.data).tocsc()
    logger = logging.getLogger(__name__)
    logger.addHandler(logging.StreamHandler())
    if args.verbose:
        logger.setLevel(logging.DEBUG)
    if args.k:
        k = args.k
    kmeans = KMeans(data, k, args.n, args.delta, args.randomcentroids, \
                    args.classical, args.verbose)
    result = kmeans.run()
    clusters = result['clusters']
    centroids = result['centroids']
    centroiddict = result['centroiddict']
    cPickle.dump(clusters, open("data_clusters_" + sessionid + '.pck', 'w'))
    cPickle.dump(centroiddict, open("centroid_dict_" + \
                                    sessionid + '.pck', 'w'))
    spio.mmwrite(open("data_centroids_" + sessionid + '.mtx', 'w'), \
                 centroids, comment="CSC Matrix", field='real')
    logger.info(" %d Clusters Generated ", len(clusters))
    return 0
开发者ID:eshwaran,项目名称:matsya,代码行数:33,代码来源:kmeans.py

示例10: __init__

  def __init__(self, programEvents):
        
    nevents = len(programEvents.eventIndex.keys())
    self.eventPopularity = ss.dok_matrix((nevents, 5))
    self.eventAttendees = collections.defaultdict(list)
    f = open("/users/chaitanya/PyCharmProjects/EventRec/data/event_attendees.csv", 'rb')
    f.readline() # skip header
    
    for line in f:
      cols = line.strip().split(",")
      eventId = cols[0]

      if programEvents.eventIndex.has_key(eventId):      
        i = programEvents.eventIndex[eventId]
        self.eventPopularity[i, 0] =           len(cols[1].split(" ")) - len(cols[4].split(" "))   # number of yes-no
        self.eventPopularity[i, 1] =           len(cols[3].split(" "))        # number of invited folks
        
        self.eventAttendees[i].append(cols[1].split(" "))
                                                            #list of yes folks
        
        self.eventAttendees[i].append(cols[2].split(" "))    #list of no folks
              
        self.eventAttendees[i].append(cols[3].split(" "))   #list of invited folks
                
    f.close()
    
    self.eventPopularity = normalize(self.eventPopularity, norm="l1",axis=0, copy=False)
    sio.mmwrite("/users/chaitanya/PyCharmProjects/EventRec/Models/EA_eventPopularity", self.eventPopularity)
    cPickle.dump(self.eventAttendees, open("/users/chaitanya/PyCharmProjects/EventRec/Models/PE_eventAttendees.pkl", 'wb'))
开发者ID:chaitanyamalaviya,项目名称:recsys,代码行数:29,代码来源:rabbi.py

示例11: encode

def encode() :
    """
    Generate extra features from pairs, triplets, and common
    quadruplets of the existing features and then save those features
    in a sparse matrix to disk.
    """
    dftrain = load_dataframe('train')
    dftest = load_dataframe('test')
    lentrain = len(dftrain)
    all_data = np.vstack((dftrain.ix[:,1:-1], dftest.ix[:,1:-1]))
    np.array(dftrain.ACTION).dump('{}/train_truth.dat'.format(ddir))
    
    dp = group_data(all_data, degree=2, remove_unique=True)
    dt = group_data(all_data, degree=3, remove_unique=True)
    dq = group_data(all_data, degree=4, remove_unique=True)
    dq = remove_rare(dq, 15)

    X = all_data[:lentrain]
    X_2 = dp[:lentrain]
    X_3 = dt[:lentrain]
    X_4 = dq[:lentrain]
    X_train_all = np.hstack((X, X_2, X_3, X_4))
    mmwrite('{}/train_encoded'.format(ddir), X_train_all)

    X_test = all_data[lentrain:]
    X_test_2 = dp[lentrain:]
    X_test_3 = dt[lentrain:]
    X_test_4 = dq[lentrain:]
    X_test_all = np.hstack((X_test, X_test_2, X_test_3, X_test_4))
    mmwrite('{}/test_encoded'.format(ddir), X_test_all)
开发者ID:jamesjohnson92,项目名称:kaggle-amazonaccess,代码行数:30,代码来源:utility.py

示例12: get_content_similarity_scores

def get_content_similarity_scores(readmes, dataset_dir, profile="tfidf",
                                  similarity="cos"):
    """Return CSR matrix of similarity_{r,r} for all r in `readmes`.

       `dataset_dir`      the directory where the similarity scores are
       `profile`    bool or tfidf
       `similarity` cos or ijd (inverse Jacquard Distance)
    """
    if profile == "tfidf":
        sim_fn = join(dataset_dir, TF_IDF_FN)

    if exists(sim_fn):
        return mmread(sim_fn).tocsr()

    if profile == "bool":
        #readme_words = COUNTVECTORIZER readmes
        pass
    else:
        tfidf = TfidfVectorizer(input='file', #sublinear_tf=True,
                                max_df=0.5, stop_words='english',
                                decode_error="ignore")
        #max_df=0.5: if a word occurs in more than half of the readmes it is
        #            ignored
        readme_words = tfidf.fit_transform(readmes)

    if similarity == "cos":
        similarity_scores = csr_matrix(cosine_similarity(readme_words))
    else:
        # similarity_scores = csr_matrix(ijd(readme_words))
        pass

    mmwrite(sim_fn, similarity_scores, comment=profile+"_"+similarity+"_similarity_{r,r}")
    return similarity_scores
开发者ID:fenekku,项目名称:Masters,代码行数:33,代码来源:content_similarity_scores.py

示例13: store_matrix

def store_matrix(matrix='',
                 output_dir_path='',
                 out_file_name='',
                 output_format=''):
    """store_matrix."""
    if not os.path.exists(output_dir_path):
        os.mkdir(output_dir_path)
    full_out_file_name = os.path.join(output_dir_path, out_file_name)
    if output_format == "MatrixMarket":
        if len(matrix.shape) == 1:
            raise Exception(
                "'MatrixMarket' format supports only 2D dimensional array\
                and not vectors")
        else:
            io.mmwrite(full_out_file_name, matrix, precision=None)
    elif output_format == "numpy":
        np.save(full_out_file_name, matrix)
    elif output_format == "joblib":
        joblib.dump(matrix, full_out_file_name)
    elif output_format == "text":
        with open(full_out_file_name, "w") as f:
            if len(matrix.shape) == 1:
                for x in matrix:
                    f.write("%s\n" % (x))
            else:
                raise Exception(
                    "'text' format supports only mono dimensional array\
                    and not matrices")
    logger.info("Written file: %s" % full_out_file_name)
开发者ID:fabriziocosta,项目名称:EDeN,代码行数:29,代码来源:util.py

示例14: SOP

def SOP(alpha, teta, nbBasket, nbReco):
	
	data = load()
	###############################################################
	# CREATE MODELS
	###############################################################
	print 'Create the model based on the training set'
	
	modelSOP = processing.SOPRecoModel(data.getUserItemMatrix(), alpha, teta)
	modelSOP.launch()
		
	###############################################################
	# SET RECOMMENDATION
	###############################################################
	if nbBasket == -1:
		evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList(), nbReco)
	else :
		evalSOP = processing.Evaluation(modelSOP, data.getBasketItemList()[:nbBasket], nbReco)
	
	###############################################################
	# LAUNCH RECOMMENDATION + SAVE RESULTS
	###############################################################	
	t = time.time()
	evalSOP.newEval()
	SOPTime = time.time()-t
	mmwrite('SOPPerf_a%s_t%s_nb%s_nr%s'%(alpha,teta,nbBasket,nbReco),evalSOP.perf) 
	
	print 'SOP Execution time:', SOPTime
	print 'Performances : '
	print evalSOP.testNames
	print evalSOP.meanPerf()
	evalSOP.savePerf('SOPPerf_a%s_t%s_nb%s_nr%s.txt'%(alpha,teta,nbBasket,nbReco))
	return evalSOP
开发者ID:kfrancoi,项目名称:phd-retailreco,代码行数:33,代码来源:Experiences_modif.py

示例15: save_new_ref

def save_new_ref(filename, data):
    """ Saves a new version of the reference data, and backs up the old """
    
    ext = filename.split('.')[-1]
    
    if (data == None):
        print("WARNING: Error generating file: %s" % filename)
        print("Skipped... try again.")
        return
    
    if os.path.exists(filename):
        os.system( 'mv %s %s' % (filename, BACKUP_DIR) )
    
    if ext in ['h5', 'lh5']:
        if scipy.sparse.issparse(data):
            data = data.toarray()
        Serializer.SaveData(filename, data)
    elif ext == 'mtx':
        io.mmwrite(filename, data)
    elif ext == 'pkl':
        f = open(filename, 'w')
        pickle.dump(f, data)
        f.close()
    else:
        raise ValueError('Could not understand extension (.%s) for %s' % (ext, filename))
    
    return
开发者ID:AgnesHH,项目名称:msmbuilder,代码行数:27,代码来源:generate_tpt_ref.py


注:本文中的scipy.io.mmwrite函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。