本文整理汇总了C++中SuperLU_timer_函数的典型用法代码示例。如果您正苦于以下问题:C++ SuperLU_timer_函数的具体用法?C++ SuperLU_timer_怎么用?C++ SuperLU_timer_使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了SuperLU_timer_函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: NewNsuper
int NewNsuper(const int pnum, mutex_t *lock, int *data)
{
register int i;
#ifdef PROFILE
double t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock(lock);
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock(lock)
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard (*lock)
#endif
{
i = ++(*data);
}
#if ( MACH==SUN )
mutex_unlock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock(lock);
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard (*lock)
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
return i;
}
示例2: main
main()
{
/* Parameters */
#define NMAX 100
#define ITS 10000
int i, j;
double alpha, avg, t1, t2, tnotim;
double x[NMAX], y[NMAX];
double SuperLU_timer_();
/* Initialize X and Y */
for (i = 0; i < NMAX; ++i) {
x[i] = 1.0 / (double)(i+1);
y[i] = (double)(NMAX - i) / (double)NMAX;
}
alpha = 0.315;
/* Time 1,000,000 DAXPY operations */
t1 = SuperLU_timer_();
for (j = 0; j < ITS; ++j) {
for (i = 0; i < NMAX; ++i)
y[i] += alpha * x[i];
alpha = -alpha;
}
t2 = SuperLU_timer_();
printf("Time for 1,000,000 DAXPY ops = %10.3g seconds\n", t2-t1);
if ( t2-t1 > 0. )
printf("DAXPY performance rate = %10.3g mflops\n", 2./(t2-t1));
else
printf("*** Error: Time for operations was zero\n");
tnotim = t2 - t1;
/* Time 1,000,000 DAXPY operations with SuperLU_timer_()
in the outer loop */
t1 = SuperLU_timer_();
for (j = 0; j < ITS; ++j) {
for (i = 0; i < NMAX; ++i)
y[i] += alpha * x[i];
alpha = -alpha;
t2 = SuperLU_timer_();
}
/* Compute the time in milliseconds used by an average call to
SuperLU_timer_(). */
printf("Including DSECND, time = %10.3g seconds\n", t2-t1);
avg = ( (t2 - t1) - tnotim )*1000. / (double)ITS;
printf("Average time for DSECND = %10.3g milliseconds\n", avg);
/* Compute the equivalent number of floating point operations used
by an average call to DSECND. */
if ( tnotim > 0. )
printf("Equivalent floating point ops = %10.3g ops\n",
1000.*avg / tnotim);
mysub(NMAX, x, y);
return 0;
}
示例3: NewNsuper
int NewNsuper(const int pnum, pxgstrf_shared_t *pxgstrf_shared, int *data)
{
register int i;
mutex_t *lock = &pxgstrf_shared->lu_locks[NSUPER_LOCK];
Gstat_t *Gstat = pxgstrf_shared->Gstat;
#ifdef PROFILE
double t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock(lock);
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock(lock)
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard (*lock)
#elif ( MACH==OPENMP )
#pragma omp critical ( NSUPER_LOCK )
#endif
{
i = ++(*data);
}
#if ( MACH==SUN )
mutex_unlock(lock);
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock(lock);
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard (*lock)
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
return i;
}
示例4: pzgstrs_Bglobal
void
pzgstrs_Bglobal(int_t n, LUstruct_t *LUstruct, gridinfo_t *grid,
doublecomplex *B, int_t ldb, int nrhs,
SuperLUStat_t *stat, int *info)
{
Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
LocalLU_t *Llu = LUstruct->Llu;
doublecomplex alpha = {1.0, 0.0};
doublecomplex zero = {0.0, 0.0};
doublecomplex *lsum; /* Local running sum of the updates to B-components */
doublecomplex *x; /* X component at step k. */
doublecomplex *lusup, *dest;
doublecomplex *recvbuf, *tempv;
doublecomplex *rtemp; /* Result of full matrix-vector multiply. */
int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;
int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */
Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */
int_t kcol, krow, mycol, myrow;
int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr;
int_t nb, nlb, nub, nsupers;
int_t *xsup, *lsub, *usub;
int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/
int Pc, Pr, iam;
int knsupc, nsupr;
int ldalsum; /* Number of lsum entries locally owned. */
int maxrecvsz, p, pi;
int_t **Lrowind_bc_ptr;
doublecomplex **Lnzval_bc_ptr;
MPI_Status status;
#if defined (ISEND_IRECV) || defined (BSEND)
MPI_Request *send_req, recv_req;
#endif
/*-- Counts used for L-solve --*/
int_t *fmod; /* Modification count for L-solve. */
int_t **fsendx_plist = Llu->fsendx_plist;
int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */
int_t *frecv; /* Count of modifications to be recv'd from
processes in this row. */
int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */
int_t nleaf = 0, nroot = 0;
/*-- Counts used for U-solve --*/
int_t *bmod; /* Modification count for L-solve. */
int_t **bsendx_plist = Llu->bsendx_plist;
int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */
int_t *brecv; /* Count of modifications to be recv'd from
processes in this row. */
int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */
double t;
#if ( DEBUGlevel>=2 )
int_t Ublocks = 0;
#endif
int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */
t = SuperLU_timer_();
/* Test input parameters. */
*info = 0;
if ( n < 0 ) *info = -1;
else if ( nrhs < 0 ) *info = -9;
if ( *info ) {
pxerr_dist("PZGSTRS_BGLOBAL", grid, -*info);
return;
}
/*
* Initialization.
*/
iam = grid->iam;
Pc = grid->npcol;
Pr = grid->nprow;
myrow = MYROW( iam, grid );
mycol = MYCOL( iam, grid );
nsupers = Glu_persist->supno[n-1] + 1;
xsup = Glu_persist->xsup;
Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;
Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;
nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */
stat->ops[SOLVE] = 0.0;
Llu->SolveMsgSent = 0;
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Enter pzgstrs_Bglobal()");
#endif
/* Save the count to be altered so it can be used by
subsequent call to PDGSTRS_BGLOBAL. */
if ( !(fmod = intMalloc_dist(nlb)) )
ABORT("Calloc fails for fmod[].");
for (i = 0; i < nlb; ++i) fmod[i] = Llu->fmod[i];
if ( !(frecv = intMalloc_dist(nlb)) )
ABORT("Malloc fails for frecv[].");
Llu->frecv = frecv;
#if defined (ISEND_IRECV) || defined (BSEND)
k = SUPERLU_MAX( Llu->nfsendx, Llu->nbsendx ) + nlb;
if ( !(send_req = (MPI_Request*) SUPERLU_MALLOC(k*sizeof(MPI_Request))) )
//.........这里部分代码省略.........
示例5: mpc_my_threadnum
//.........这里部分代码省略.........
#endif
#if ( DEBUGlevel>=1 )
printf("(%d) thr_arg-> pnum %d, info %d\n", pnum, thr_arg->pnum, thr_arg->info);
#endif
singular = 0;
m = A->nrow;
n = A->ncol;
lsub = Glu->lsub;
xlsub = Glu->xlsub;
xlsub_end = Glu->xlsub_end;
/* Allocate and initialize the per-process working storage. */
if ( (*info = pzgstrf_WorkInit(m, panel_size, &iwork, &dwork)) ) {
*info += pzgstrf_memory_use(Glu->nzlmax, Glu->nzumax, Glu->nzlumax);
return 0;
}
pxgstrf_SetIWork(m, panel_size, iwork, &segrep, &parent, &xplore,
&repfnz, &panel_lsub, &marker, &lbusy);
pzgstrf_SetRWork(m, panel_size, dwork, &dense, &tempv);
/* New data structures to facilitate parallel algorithm */
spa_marker = intMalloc(m * panel_size);
w_lsub_end = intMalloc(panel_size);
ifill (spa_marker, m * panel_size, EMPTY);
ifill (marker, m * NO_MARKER, EMPTY);
ifill (lbusy, m, EMPTY);
jcol = EMPTY;
marker1 = marker + m;
marker2 = marker + 2*m;
#ifdef PROFILE
stime = SuperLU_timer_();
#endif
/* -------------------------
Main loop: repeatedly ...
------------------------- */
while ( pxgstrf_shared->tasks_remain > 0 ) {
#ifdef PROFILE
TIC(t);
#endif
/* Get a panel from the scheduler. */
pxgstrf_scheduler(pnum, n, etree, &jcol, &bcol, pxgstrf_shared);
#if ( DEBUGlevel>=1 )
if ( jcol>=LOCOL && jcol<=HICOL ) {
printf("(%d) Scheduler(): jcol %d, bcol %d, tasks_remain %d\n",
pnum, jcol, bcol, pxgstrf_shared->tasks_remain);
fflush(stdout);
}
#endif
#ifdef PROFILE
TOC(t2, t);
Gstat->procstat[pnum].skedtime += t2;
#endif
if ( jcol != EMPTY ) {
w = pxgstrf_shared->pan_status[jcol].size;
#if ( DEBUGlevel>=3 )
printf("P%2d got panel %5d-%5d\ttime %.4f\tpanels_left %d\n",
pnum, jcol, jcol+w-1, SuperLU_timer_(),
示例6: get_perm_c_dist
/*! \brief
*
* <pre>
* Purpose
* =======
*
* GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
* minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
* or using approximate minimum degree column ordering by Davis et. al.
* The LU factorization of A*Pc tends to have less fill than the LU
* factorization of A.
*
* Arguments
* =========
*
* ispec (input) colperm_t
* Specifies what type of column permutation to use to reduce fill.
* = NATURAL: natural ordering (i.e., Pc = I)
* = MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
* = MMD_ATA: minimum degree ordering on structure of A'*A
* = METIS_AT_PLUS_A: MeTis on A'+A
*
* A (input) SuperMatrix*
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
* of the linear equations is A->nrow. Currently, the type of A
* can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
* In the future, more general A can be handled.
*
* perm_c (output) int*
* Column permutation vector of size A->ncol, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
* </pre>
*/
void
get_perm_c_dist(int_t pnum, int_t ispec, SuperMatrix *A, int_t *perm_c)
{
NCformat *Astore = A->Store;
int_t m, n, bnz = 0, *b_colptr, *b_rowind, i;
int_t delta, maxint, nofsub, *invp;
int_t *dhead, *qsize, *llist, *marker;
double t, SuperLU_timer_();
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(pnum, "Enter get_perm_c_dist()");
#endif
m = A->nrow;
n = A->ncol;
t = SuperLU_timer_();
switch ( ispec ) {
case NATURAL: /* Natural ordering */
for (i = 0; i < n; ++i) perm_c[i] = i;
#if ( PRNTlevel>=1 )
if ( !pnum ) printf(".. Use natural column ordering\n");
#endif
return;
case MMD_AT_PLUS_A: /* Minimum degree ordering on A'+A */
if ( m != n ) ABORT("Matrix is not square");
at_plus_a_dist(n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
t = SuperLU_timer_() - t;
/*printf("Form A'+A time = %8.3f\n", t);*/
#if ( PRNTlevel>=1 )
if ( !pnum ) printf(".. Use minimum degree ordering on A'+A.\n");
#endif
break;
case MMD_ATA: /* Minimum degree ordering on A'*A */
getata_dist(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
t = SuperLU_timer_() - t;
/*printf("Form A'*A time = %8.3f\n", t);*/
#if ( PRNTlevel>=1 )
if ( !pnum ) printf(".. Use minimum degree ordering on A'*A\n");
#endif
break;
case METIS_AT_PLUS_A: /* METIS ordering on A'+A */
if ( m != n ) ABORT("Matrix is not square");
at_plus_a_dist(n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
if ( bnz ) { /* non-empty adjacency structure */
get_metis(n, bnz, b_colptr, b_rowind, perm_c);
} else { /* e.g., diagonal matrix */
for (i = 0; i < n; ++i) perm_c[i] = i;
SUPERLU_FREE(b_colptr);
/* b_rowind is not allocated in this case */
}
#if ( PRNTlevel>=1 )
if ( !pnum ) printf(".. Use METIS ordering on A'+A\n");
#endif
return;
//.........这里部分代码省略.........
示例7: pxgstrf_scheduler
//.........这里部分代码省略.........
} /* while */
}
} else {
/*
* jcol was EMPTY; Try to get a panel from the task Q.
*/
while ( 1 ) {
/*>>if ( (j = Dequeue(taskq, &item)) == EMPTY ) {*/
if ( taskq->count <= 0 ) {
jcol = EMPTY;
break;
} else {
jcol = taskq->queue[taskq->head++];
--taskq->count;
if ( STATE( jcol ) >= CANGO ) { /* CANGO or CANPIPE */
#ifdef DEBUG
printf("(%d) Dequeue[2] Got %d, STATE %d, Qcount %d\n",
pnum, jcol, STATE(jcol), j);
#endif
#ifdef PROFILE
if (STATE( jcol ) == CANGO) ++(Gstat->panhows[NOPIPE]);
else ++(Gstat->panhows[PIPE]);
#endif
break;
}
}
} /* while */
}
/*
* Update the status of the new panel "jcol" and its parent "dad".
*/
if ( jcol != EMPTY ) {
--pxgstrf_shared->tasks_remain;
#ifdef DOMAINS
if ( in_domain[jcol] == TREE_DOMAIN ) {
/* Dequeue the first descendant of this domain */
*bcol = taskq->queue[taskq->head++];
--taskq->count;
} else
#endif
{
STATE( jcol ) = BUSY;
w = pxgstrf_shared->pan_status[jcol].size;
for (j = jcol; j < jcol+w; ++j) pxgstrf_shared->spin_locks[j] = 1;
dad = DADPANEL (jcol);
if ( dad < n && pxgstrf_shared->pan_status[dad].ukids == 1 ) {
STATE( dad ) = CANPIPE;
/*>> j = Enqueue(taskq, dad);*/
taskq->queue[taskq->tail++] = dad;
++taskq->count;
#ifdef DEBUG
printf("(%d) Enqueue() %d's dad %d ->CANPIPE, Qcount %d\n",
pnum, jcol, dad, j);
#endif
}
#ifdef PROFILE
Gstat->procstat[pnum].panels++;
#endif
/* Find the farthest busy descendant of the new panel
and its parent.*/
*bcol = fb_cols[jcol];
#ifdef DEBUG
printf("(%d) Scheduler[2] fb_cols[%d]=%d, STATE %d\n",
pnum, jcol, *bcol, STATE( *bcol ));
#endif
while ( STATE( *bcol ) == DONE ) *bcol = DADPANEL (*bcol);
fb_cols[dad] = *bcol;
} /* else regular_panel */
} /* if jcol != empty */
*cur_pan = jcol;
#ifdef DEBUG
printf("(%d) Exit C.S. tasks_remain %d, cur_pan %d\n",
pnum, pxgstrf_shared->tasks_remain, jcol);
#endif
} /* ---- END CRITICAL SECTION ---- */
#if ( MACH==SUN )
/* Exit C.S. */
mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock( &pxgstrf_shared->lu_locks[SCHED_LOCK] );
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard SCHED_LOCK
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
return;
}
示例8: dgssv
//.........这里部分代码省略.........
* Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR).
* Uses column-wise storage scheme, i.e., U has types:
* Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU.
*
* B (input/output) SuperMatrix*
* B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
* On entry, the right hand side matrix.
* On exit, the solution matrix if info = 0;
*
* info (output) int*
* = 0: successful exit
* > 0: if info = i, and i is
* <= A->ncol: U(i,i) is exactly zero. The factorization has
* been completed, but the factor U is exactly singular,
* so the solution could not be computed.
* > A->ncol: number of bytes allocated when memory allocation
* failure occurred, plus A->ncol.
*
*/
double t1; /* Temporary time */
char refact[1], trans[1];
DNformat *Bstore;
SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
SuperMatrix AC; /* Matrix postmultiplied by Pc */
int lwork = 0, *etree, i;
/* Set default values for some parameters */
double diag_pivot_thresh = 1.0;
double drop_tol = 0;
int panel_size; /* panel size */
int relax; /* no of columns in a relaxed snodes */
double *utime;
extern SuperLUStat_t SuperLUStat;
/* Test the input parameters ... */
*info = 0;
Bstore = B->Store;
if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
A->Dtype != SLU_D || A->Mtype != SLU_GE )
*info = -1;
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
*info = -6;
if ( *info != 0 ) {
i = -(*info);
xerbla_("dgssv", &i);
return;
}
*refact = 'N';
*trans = 'N';
panel_size = sp_ienv(1);
relax = sp_ienv(2);
StatInit(panel_size, relax);
utime = SuperLUStat.utime;
/* Convert A to SLU_NC format when necessary. */
if ( A->Stype == SLU_NR ) {
NRformat *Astore = A->Store;
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
*trans = 'T';
} else if ( A->Stype == SLU_NC ) AA = A;
etree = intMalloc(A->ncol);
t1 = SuperLU_timer_();
sp_preorder(refact, AA, perm_c, etree, &AC);
utime[ETREE] = SuperLU_timer_() - t1;
/*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
relax, panel_size, sp_ienv(3), sp_ienv(4));*/
t1 = SuperLU_timer_();
/* Compute the LU factorization of A. */
dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size,
etree, NULL, lwork, perm_r, perm_c, L, U, info);
utime[FACT] = SuperLU_timer_() - t1;
t1 = SuperLU_timer_();
if ( *info == 0 ) {
/* Solve the system A*X=B, overwriting B with X. */
dgstrs (trans, L, U, perm_r, perm_c, B, info);
}
utime[SOLVE] = SuperLU_timer_() - t1;
SUPERLU_FREE (etree);
Destroy_CompCol_Permuted(&AC);
if ( A->Stype == SLU_NR ) {
Destroy_SuperMatrix_Store(AA);
SUPERLU_FREE(AA);
}
/*PrintStat( &SuperLUStat );*/
StatFree();
}
示例9: ddistribute
//.........这里部分代码省略.........
#endif
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Enter ddistribute()");
#endif
if ( fact == SamePattern_SameRowPerm ) {
/* ---------------------------------------------------------------
* REUSE THE L AND U DATA STRUCTURES FROM A PREVIOUS FACTORIZATION.
* --------------------------------------------------------------- */
#if ( PROFlevel>=1 )
t_l = t_u = 0; u_blks = 0;
#endif
/* We can propagate the new values of A into the existing
L and U data structures. */
ilsum = Llu->ilsum;
ldaspa = Llu->ldalsum;
if ( !(dense = doubleCalloc_dist(((size_t)ldaspa) * sp_ienv_dist(3))) )
ABORT("Calloc fails for SPA dense[].");
nrbu = CEILING( nsupers, grid->nprow ); /* No. of local block rows */
if ( !(Urb_length = intCalloc_dist(nrbu)) )
ABORT("Calloc fails for Urb_length[].");
if ( !(Urb_indptr = intMalloc_dist(nrbu)) )
ABORT("Malloc fails for Urb_indptr[].");
Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;
Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;
Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;
Unzval_br_ptr = Llu->Unzval_br_ptr;
#if ( PRNTlevel>=1 )
mem_use += 2.0*nrbu*iword + ldaspa*sp_ienv_dist(3)*dword;
#endif
#if ( PROFlevel>=1 )
t = SuperLU_timer_();
#endif
/* Initialize Uval to zero. */
for (lb = 0; lb < nrbu; ++lb) {
Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */
index = Ufstnz_br_ptr[lb];
if ( index ) {
uval = Unzval_br_ptr[lb];
len = index[1];
for (i = 0; i < len; ++i) uval[i] = zero;
} /* if index != NULL */
} /* for lb ... */
for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */
pc = PCOL( jb, grid );
if ( mycol == pc ) { /* Block column jb in my process column */
fsupc = FstBlockC( jb );
nsupc = SuperSize( jb );
/* Scatter A into SPA (for L), or into U directly. */
for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) {
for (i = xa_begin[j]; i < xa_end[j]; ++i) {
irow = asub[i];
gb = BlockNum( irow );
if ( myrow == PROW( gb, grid ) ) {
lb = LBi( gb, grid );
if ( gb < jb ) { /* in U */
index = Ufstnz_br_ptr[lb];
uval = Unzval_br_ptr[lb];
while ( (k = index[Urb_indptr[lb]]) < jb ) {
/* Skip nonzero values in this block */
Urb_length[lb] += index[Urb_indptr[lb]+1];
示例10: sgssv
//.........这里部分代码省略.........
* info (output) int*
* = 0: successful exit
* > 0: if info = i, and i is
* <= A->ncol: U(i,i) is exactly zero. The factorization has
* been completed, but the factor U is exactly singular,
* so the solution could not be computed.
* > A->ncol: number of bytes allocated when memory allocation
* failure occurred, plus A->ncol.
*
*/
DNformat *Bstore;
SuperMatrix *AA = NULL;/* A in SLU_NC format used by the factorization routine.*/
SuperMatrix AC; /* Matrix postmultiplied by Pc */
int lwork = 0, *etree, i;
/* Set default values for some parameters */
int panel_size; /* panel size */
int relax; /* no of columns in a relaxed snodes */
int permc_spec;
trans_t trans = NOTRANS;
double *utime;
double t; /* Temporary time */
/* Test the input parameters ... */
*info = 0;
Bstore = B->Store;
if ( options->Fact != DOFACT ) *info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 ||
(A->Stype != SLU_NC && A->Stype != SLU_NR) ||
A->Dtype != SLU_S || A->Mtype != SLU_GE )
*info = -2;
else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
*info = -7;
if ( *info != 0 ) {
i = -(*info);
xerbla_("sgssv", &i);
return;
}
utime = stat->utime;
/* Convert A to SLU_NC format when necessary. */
if ( A->Stype == SLU_NR ) {
NRformat *Astore = A->Store;
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
trans = TRANS;
} else {
if ( A->Stype == SLU_NC ) AA = A;
}
t = SuperLU_timer_();
/*
* Get column permutation vector perm_c[], according to permc_spec:
* permc_spec = NATURAL: natural ordering
* permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
* permc_spec = MMD_ATA: minimum degree on structure of A'*A
* permc_spec = COLAMD: approximate minimum degree column ordering
* permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
*/
permc_spec = options->ColPerm;
if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
get_perm_c(permc_spec, AA, perm_c);
utime[COLPERM] = SuperLU_timer_() - t;
etree = intMalloc(A->ncol);
t = SuperLU_timer_();
sp_preorder(options, AA, perm_c, etree, &AC);
utime[ETREE] = SuperLU_timer_() - t;
panel_size = sp_ienv(1);
relax = sp_ienv(2);
/*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n",
relax, panel_size, sp_ienv(3), sp_ienv(4));*/
t = SuperLU_timer_();
/* Compute the LU factorization of A. */
sgstrf(options, &AC, relax, panel_size,
etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
utime[FACT] = SuperLU_timer_() - t;
t = SuperLU_timer_();
if ( *info == 0 ) {
/* Solve the system A*X=B, overwriting B with X. */
sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
}
utime[SOLVE] = SuperLU_timer_() - t;
SUPERLU_FREE (etree);
Destroy_CompCol_Permuted(&AC);
if ( A->Stype == SLU_NR ) {
Destroy_SuperMatrix_Store(AA);
SUPERLU_FREE(AA);
}
}
示例11: zgsisx
//.........这里部分代码省略.........
utime = stat->utime;
/* Convert A to SLU_NC format when necessary. */
if ( A->Stype == SLU_NR ) {
NRformat *Astore = A->Store;
AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz,
Astore->nzval, Astore->colind, Astore->rowptr,
SLU_NC, A->Dtype, A->Mtype);
if ( notran ) { /* Reverse the transpose argument. */
trant = TRANS;
notran = 0;
} else {
trant = NOTRANS;
notran = 1;
}
} else { /* A->Stype == SLU_NC */
trant = options->Trans;
AA = A;
}
if ( nofact ) {
register int i, j;
NCformat *Astore = AA->Store;
int nnz = Astore->nnz;
int *colptr = Astore->colptr;
int *rowind = Astore->rowind;
doublecomplex *nzval = (doublecomplex *)Astore->nzval;
int n = AA->nrow;
if ( mc64 ) {
*equed = 'B';
rowequ = colequ = 1;
t0 = SuperLU_timer_();
if ((perm = intMalloc(n)) == NULL)
ABORT("SUPERLU_MALLOC fails for perm[]");
info1 = zldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C);
if (info1 > 0) { /* MC64 fails, call zgsequ() later */
mc64 = 0;
SUPERLU_FREE(perm);
perm = NULL;
} else {
for (i = 0; i < n; i++) {
R[i] = exp(R[i]);
C[i] = exp(C[i]);
}
/* permute and scale the matrix */
for (j = 0; j < n; j++) {
for (i = colptr[j]; i < colptr[j + 1]; i++) {
zd_mult(&nzval[i], &nzval[i], R[rowind[i]] * C[j]);
rowind[i] = perm[rowind[i]];
}
}
}
utime[EQUIL] = SuperLU_timer_() - t0;
}
if ( !mc64 & equil ) {
t0 = SuperLU_timer_();
/* Compute row and column scalings to equilibrate the matrix A. */
zgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
if ( info1 == 0 ) {
/* Equilibrate matrix A. */
zlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
示例12: pddistribute
//.........这里部分代码省略.........
ABORT("Calloc fails for Urb_length[].");
if ( !(Urb_indptr = intMalloc_dist(nrbu)) )
ABORT("Malloc fails for Urb_indptr[].");
for (lb = 0; lb < nrbu; ++lb)
Urb_indptr[lb] = BR_HEADER; /* Skip header in U index[]. */
Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;
Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;
Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;
Unzval_br_ptr = Llu->Unzval_br_ptr;
#if ( PRNTlevel>=1 )
mem_use += 2*nrbu*iword + ldaspa*sp_ienv_dist(3)*dword;
#endif
for (jb = 0; jb < nsupers; ++jb) { /* Loop through each block column */
pc = PCOL( jb, grid );
if ( mycol == pc ) { /* Block column jb in my process column */
fsupc = FstBlockC( jb );
nsupc = SuperSize( jb );
/* Scatter A into SPA. */
for (j = fsupc, dense_col = dense; j < FstBlockC(jb+1); ++j) {
for (i = xa[j]; i < xa[j+1]; ++i) {
irow = asub[i];
gb = BlockNum( irow );
if ( myrow == PROW( gb, grid ) ) {
lb = LBi( gb, grid );
irow = ilsum[lb] + irow - FstBlockC( gb );
dense_col[irow] = a[i];
}
}
dense_col += ldaspa;
}
#if ( PROFlevel>=1 )
t = SuperLU_timer_();
#endif
/* Gather the values of A from SPA into Unzval[]. */
for (lb = 0; lb < nrbu; ++lb) {
index = Ufstnz_br_ptr[lb];
if ( index && index[Urb_indptr[lb]] == jb ) {
uval = Unzval_br_ptr[lb];
len = Urb_indptr[lb] + UB_DESCRIPTOR;
gb = lb * grid->nprow + myrow;/* Global block number */
k = FstBlockC( gb+1 );
irow = ilsum[lb] - FstBlockC( gb );
for (jj = 0, dense_col = dense; jj < nsupc; ++jj) {
j = index[len+jj];
for (i = j; i < k; ++i) {
uval[Urb_length[lb]++] = dense_col[irow+i];
dense_col[irow+i] = zero;
}
dense_col += ldaspa;
}
Urb_indptr[lb] += UB_DESCRIPTOR + nsupc;
} /* if index != NULL */
} /* for lb ... */
#if ( PROFlevel>=1 )
t_u += SuperLU_timer_() - t;
t = SuperLU_timer_();
#endif
/* Gather the values of A from SPA into Lnzval[]. */
ljb = LBj( jb, grid ); /* Local block number */
index = Lrowind_bc_ptr[ljb];
if ( index ) {
nrbl = index[0]; /* Number of row blocks. */
len = index[1]; /* LDA of lusup[]. */
lusup = Lnzval_bc_ptr[ljb];
示例13: pzgstrs
void
pzgstrs(int_t n, LUstruct_t *LUstruct,
ScalePermstruct_t *ScalePermstruct,
gridinfo_t *grid, doublecomplex *B,
int_t m_loc, int_t fst_row, int_t ldb, int nrhs,
SOLVEstruct_t *SOLVEstruct,
SuperLUStat_t *stat, int *info)
{
Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
LocalLU_t *Llu = LUstruct->Llu;
doublecomplex alpha = {1.0, 0.0};
doublecomplex zero = {0.0, 0.0};
doublecomplex *lsum; /* Local running sum of the updates to B-components */
doublecomplex *x; /* X component at step k. */
/* NOTE: x and lsum are of same size. */
doublecomplex *lusup, *dest;
doublecomplex *recvbuf, *tempv;
doublecomplex *rtemp; /* Result of full matrix-vector multiply. */
int_t **Ufstnz_br_ptr = Llu->Ufstnz_br_ptr;
int_t *Urbs, *Urbs1; /* Number of row blocks in each block column of U. */
Ucb_indptr_t **Ucb_indptr;/* Vertical linked list pointing to Uindex[] */
int_t **Ucb_valptr; /* Vertical linked list pointing to Unzval[] */
int_t iam, kcol, krow, mycol, myrow;
int_t i, ii, il, j, jj, k, lb, ljb, lk, lptr, luptr;
int_t nb, nlb, nub, nsupers;
int_t *xsup, *supno, *lsub, *usub;
int_t *ilsum; /* Starting position of each supernode in lsum (LOCAL)*/
int_t Pc, Pr;
int knsupc, nsupr;
int ldalsum; /* Number of lsum entries locally owned. */
int maxrecvsz, p, pi;
int_t **Lrowind_bc_ptr;
doublecomplex **Lnzval_bc_ptr;
MPI_Status status;
MPI_Request *send_req, recv_req;
pxgstrs_comm_t *gstrs_comm = SOLVEstruct->gstrs_comm;
/*-- Counts used for L-solve --*/
int_t *fmod; /* Modification count for L-solve --
Count the number of local block products to
be summed into lsum[lk]. */
int_t **fsendx_plist = Llu->fsendx_plist;
int_t nfrecvx = Llu->nfrecvx; /* Number of X components to be recv'd. */
int_t *frecv; /* Count of lsum[lk] contributions to be received
from processes in this row.
It is only valid on the diagonal processes. */
int_t nfrecvmod = 0; /* Count of total modifications to be recv'd. */
int_t nleaf = 0, nroot = 0;
/*-- Counts used for U-solve --*/
int_t *bmod; /* Modification count for U-solve. */
int_t **bsendx_plist = Llu->bsendx_plist;
int_t nbrecvx = Llu->nbrecvx; /* Number of X components to be recv'd. */
int_t *brecv; /* Count of modifications to be recv'd from
processes in this row. */
int_t nbrecvmod = 0; /* Count of total modifications to be recv'd. */
double t;
#if ( DEBUGlevel>=2 )
int_t Ublocks = 0;
#endif
int_t *mod_bit = Llu->mod_bit; /* flag contribution from each row block */
t = SuperLU_timer_();
/* Test input parameters. */
*info = 0;
if ( n < 0 ) *info = -1;
else if ( nrhs < 0 ) *info = -9;
if ( *info ) {
pxerbla("PZGSTRS", grid, -*info);
return;
}
/*
* Initialization.
*/
iam = grid->iam;
Pc = grid->npcol;
Pr = grid->nprow;
myrow = MYROW( iam, grid );
mycol = MYCOL( iam, grid );
xsup = Glu_persist->xsup;
supno = Glu_persist->supno;
nsupers = supno[n-1] + 1;
Lrowind_bc_ptr = Llu->Lrowind_bc_ptr;
Lnzval_bc_ptr = Llu->Lnzval_bc_ptr;
nlb = CEILING( nsupers, Pr ); /* Number of local block rows. */
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Enter pzgstrs()");
#endif
stat->ops[SOLVE] = 0.0;
Llu->SolveMsgSent = 0;
/* Save the count to be altered so it can be used by
subsequent call to PDGSTRS. */
if ( !(fmod = intMalloc_dist(nlb)) )
ABORT("Calloc fails for fmod[].");
//.........这里部分代码省略.........
示例14: get_perm_c
void
get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
/*
* Purpose
* =======
*
* GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
* minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
* or using approximate minimum degree column ordering by Davis et. al.
* The LU factorization of A*Pc tends to have less fill than the LU
* factorization of A.
*
* Arguments
* =========
*
* ispec (input) int
* Specifies the type of column ordering to reduce fill:
* = 1: minimum degree on the structure of A^T * A
* = 2: minimum degree on the structure of A^T + A
* = 3: approximate minimum degree for unsymmetric matrices
* If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
*
* A (input) SuperMatrix*
* Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
* of the linear equations is A->nrow. Currently, the type of A
* can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
* more general A can be handled.
*
* perm_c (output) int*
* Column permutation vector of size A->ncol, which defines the
* permutation matrix Pc; perm_c[i] = j means column i of A is
* in position j in A*Pc.
*
*/
{
NCformat *Astore = A->Store;
int m, n, bnz = 0, *b_colptr, i;
int delta, maxint, nofsub, *invp;
int *b_rowind, *dhead, *qsize, *llist, *marker;
double t, SuperLU_timer_();
m = A->nrow;
n = A->ncol;
t = SuperLU_timer_();
switch ( ispec ) {
case 0: /* Natural ordering */
for (i = 0; i < n; ++i) perm_c[i] = i;
#if ( PRNTlevel>=1 )
printf("Use natural column ordering.\n");
#endif
return;
case 1: /* Minimum degree ordering on A'*A */
getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
#if ( PRNTlevel>=1 )
printf("Use minimum degree ordering on A'*A.\n");
#endif
t = SuperLU_timer_() - t;
/*printf("Form A'*A time = %8.3f\n", t);*/
break;
case 2: /* Minimum degree ordering on A'+A */
if ( m != n ) ABORT("Matrix is not square");
at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
&bnz, &b_colptr, &b_rowind);
#if ( PRNTlevel>=1 )
printf("Use minimum degree ordering on A'+A.\n");
#endif
t = SuperLU_timer_() - t;
/*printf("Form A'+A time = %8.3f\n", t);*/
break;
case 3: /* Approximate minimum degree column ordering. */
get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
perm_c);
#if ( PRNTlevel>=1 )
printf(".. Use approximate minimum degree column ordering.\n");
#endif
return;
default:
ABORT("Invalid ISPEC");
}
if ( bnz != 0 ) {
t = SuperLU_timer_();
/* Initialize and allocate storage for GENMMD. */
delta = 1; /* DELTA is a parameter to allow the choice of nodes
whose degree <= min-degree + DELTA. */
maxint = 2147483647; /* 2**31 - 1 */
invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
//.........这里部分代码省略.........
示例15: pdgssvx_ABglobal
//.........这里部分代码省略.........
R = ScalePermstruct->R;
C = ScalePermstruct->C;
if ( Equil ) {
/* Allocate storage if not done so before. */
switch ( ScalePermstruct->DiagScale ) {
case NOEQUIL:
if ( !(R = (double *) doubleMalloc_dist(m)) )
ABORT("Malloc fails for R[].");
if ( !(C = (double *) doubleMalloc_dist(n)) )
ABORT("Malloc fails for C[].");
ScalePermstruct->R = R;
ScalePermstruct->C = C;
break;
case ROW:
if ( !(C = (double *) doubleMalloc_dist(n)) )
ABORT("Malloc fails for C[].");
ScalePermstruct->C = C;
break;
case COL:
if ( !(R = (double *) doubleMalloc_dist(m)) )
ABORT("Malloc fails for R[].");
ScalePermstruct->R = R;
break;
}
}
/* ------------------------------------------------------------
Diagonal scaling to equilibrate the matrix.
------------------------------------------------------------*/
if ( Equil ) {
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Enter equil");
#endif
t = SuperLU_timer_();
if ( Fact == SamePattern_SameRowPerm ) {
/* Reuse R and C. */
switch ( ScalePermstruct->DiagScale ) {
case NOEQUIL:
break;
case ROW:
for (j = 0; j < n; ++j) {
for (i = colptr[j]; i < colptr[j+1]; ++i) {
irow = rowind[i];
a[i] *= R[irow]; /* Scale rows. */
}
}
break;
case COL:
for (j = 0; j < n; ++j)
for (i = colptr[j]; i < colptr[j+1]; ++i)
a[i] *= C[j]; /* Scale columns. */
break;
case BOTH:
for (j = 0; j < n; ++j) {
for (i = colptr[j]; i < colptr[j+1]; ++i) {
irow = rowind[i];
a[i] *= R[irow] * C[j]; /* Scale rows and columns. */
}
}
break;
}
} else {
if ( !iam ) {
/* Compute row and column scalings to equilibrate matrix A. */
dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo);