本文整理汇总了C++中teuchos::BLAS::GEMV方法的典型用法代码示例。如果您正苦于以下问题:C++ BLAS::GEMV方法的具体用法?C++ BLAS::GEMV怎么用?C++ BLAS::GEMV使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类teuchos::BLAS
的用法示例。
在下文中一共展示了BLAS::GEMV方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: timer
double
do_time_teuchos_double_gemv(unsigned int m, unsigned int n, unsigned int nloop)
{
Sacado::Random<double> urand(0.0, 1.0);
Teuchos::BLAS<int,double> blas;
std::vector<double> A(m*n), B(n), C(m);
for (unsigned int j=0; j<n; j++) {
for (unsigned int i=0; i<m; i++)
A[i+j*m] = urand.number();
B[j] = urand.number();
}
for (unsigned int i=0; i<m; i++)
C[i] = urand.number();
double alpha = urand.number();
double beta = urand.number();
Teuchos::Time timer("Teuchos Double GEMV", false);
timer.start(true);
for (unsigned int j=0; j<nloop; j++) {
blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
}
timer.stop();
return timer.totalElapsedTime() / nloop;
}
示例2: GEMM
static void
GEMM (const Teuchos::ETransp transA,
const Teuchos::ETransp transB,
const Scalar& alpha,
const View<const Scalar**, LayoutLeft, DeviceType>& A,
const View<const Scalar**, LayoutLeft, DeviceType>& B,
const Scalar& beta,
const View<Scalar**, LayoutLeft, DeviceType>& C)
{
const int n = static_cast<int> (C.dimension_1 ());
const int lda = static_cast<int> (Impl::getStride2DView (A));
Teuchos::BLAS<int,Scalar> blas;
// For some BLAS implementations (e.g., MKL), GEMM when B has
// one column may be signficantly less efficient than GEMV.
if (n == 1 && transB == Teuchos::NO_TRANS) {
blas.GEMV (transA, A.dimension_0 (), A.dimension_1 (),
alpha, A.ptr_on_device (), lda,
B.ptr_on_device (), static_cast<int> (1),
beta, C.ptr_on_device (), static_cast<int> (1));
}
else {
const int m = static_cast<int> (C.dimension_0 ());
const int k = static_cast<int> (transA == Teuchos::NO_TRANS ?
A.dimension_1 () : A.dimension_0 ());
const int ldb = static_cast<int> (Impl::getStride2DView (B));
const int ldc = static_cast<int> (Impl::getStride2DView (C));
blas.GEMM (transA, transB, m, n, k, alpha,
A.ptr_on_device(), lda,
B.ptr_on_device(), ldb,
beta, C.ptr_on_device(), ldc);
}
}
示例3:
void
Stokhos::MonoProjPCEBasis<ordinal_type, value_type>::
transformCoeffs(const value_type *in, value_type *out) const
{
// Transform coefficients to normalized basis
Teuchos::BLAS<ordinal_type, value_type> blas;
blas.GEMV(Teuchos::NO_TRANS, pce_sz, this->p+1,
value_type(1.0), basis_vecs.values(), pce_sz,
in, ordinal_type(1), value_type(0.0), out, ordinal_type(1));
// Transform from normalized to original
for (ordinal_type i=0; i<pce_sz; i++)
out[i] /= pce_norms[i];
}
示例4: alpha
double
do_time_teuchos_fad_gemv(unsigned int m, unsigned int n, unsigned int ndot,
unsigned int nloop)
{
Sacado::Random<double> urand(0.0, 1.0);
Teuchos::BLAS<int,FadType> blas;
std::vector<FadType> A(m*n), B(n), C(m);
for (unsigned int j=0; j<n; j++) {
for (unsigned int i=0; i<m; i++) {
//A[i+j*m] = urand.number();
A[i+j*m] = FadType(ndot, urand.number());
for (unsigned int k=0; k<ndot; k++)
A[i+j*m].fastAccessDx(k) = urand.number();
}
B[j] = FadType(ndot, urand.number());
for (unsigned int k=0; k<ndot; k++)
B[j].fastAccessDx(k) = urand.number();
}
for (unsigned int i=0; i<m; i++) {
C[i] = FadType(ndot, urand.number());
for (unsigned int k=0; k<ndot; k++)
C[i].fastAccessDx(k) = urand.number();
}
FadType alpha(ndot, urand.number());
FadType beta(ndot, urand.number());
for (unsigned int k=0; k<ndot; k++) {
alpha.fastAccessDx(k) = urand.number();
beta.fastAccessDx(k) = urand.number();
}
Teuchos::Time timer("Teuchos Fad GEMV", false);
timer.start(true);
for (unsigned int j=0; j<nloop; j++) {
blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &A[0], m, &B[0], 1, beta, &C[0], 1);
}
timer.stop();
return timer.totalElapsedTime() / nloop;
}
示例5: computeIntegral
/*
Computes integrals of monomials over a given reference cell.
*/
void computeIntegral(Teuchos::Array<double>& testIntFixDeg, shards::CellTopology & cellTopology, int cubDegree) {
DefaultCubatureFactory<double> cubFactory; // create factory
Teuchos::RCP<Cubature<double> > myCub = cubFactory.create(cellTopology, cubDegree); // create default cubature
int cubDim = myCub->getDimension();
int numCubPoints = myCub->getNumPoints();
int numPolys = (cubDegree+1)*(cubDegree+2)*(cubDegree+3)/6;
FieldContainer<double> point(cubDim);
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
FieldContainer<double> functValues(numCubPoints, numPolys);
myCub->getCubature(cubPoints, cubWeights);
int polyCt = 0;
for (int xDeg=0; xDeg <= cubDegree; xDeg++) {
for (int yDeg=0; yDeg <= cubDegree-xDeg; yDeg++) {
for (int zDeg=0; zDeg <= cubDegree-xDeg-yDeg; zDeg++) {
for (int i=0; i<numCubPoints; i++) {
for (int j=0; j<cubDim; j++) {
point(j) = cubPoints(i,j);
}
functValues(i,polyCt) = computeMonomial(point, xDeg, yDeg, zDeg);
}
polyCt++;
}
}
}
Teuchos::BLAS<int, double> myblas;
int inc = 1;
double alpha = 1.0;
double beta = 0.0;
myblas.GEMV(Teuchos::NO_TRANS, numPolys, numCubPoints, alpha, &functValues(0,0), numPolys,
&cubWeights(0), inc, beta, &testIntFixDeg[0], inc);
}
示例6: computeIntegral
/*
Computes integrals of monomials over a given reference cell.
*/
void computeIntegral(Teuchos::Array<double>& testIntFixDeg, int cubDegree) {
CubatureGenSparse<double,3> myCub(cubDegree);
int cubDim = myCub.getDimension();
int numCubPoints = myCub.getNumPoints();
int numPolys = (cubDegree+1)*(cubDegree+2)*(cubDegree+3)/6;
FieldContainer<double> point(cubDim);
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
FieldContainer<double> functValues(numCubPoints, numPolys);
myCub.getCubature(cubPoints, cubWeights);
int polyCt = 0;
for (int xDeg=0; xDeg <= cubDegree; xDeg++) {
for (int yDeg=0; yDeg <= cubDegree-xDeg; yDeg++) {
for (int zDeg=0; zDeg <= cubDegree-xDeg-yDeg; zDeg++) {
for (int i=0; i<numCubPoints; i++) {
for (int j=0; j<cubDim; j++) {
point(j) = cubPoints(i,j);
}
functValues(i,polyCt) = computeMonomial(point, xDeg, yDeg, zDeg);
}
polyCt++;
}
}
}
Teuchos::BLAS<int, double> myblas;
int inc = 1;
double alpha = 1.0;
double beta = 0.0;
myblas.GEMV(Teuchos::NO_TRANS, numPolys, numCubPoints, alpha, &functValues(0,0), numPolys,
&cubWeights(0), inc, beta, &testIntFixDeg[0], inc);
}
示例7: valuesAll
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Apply(const Matrix& P, Matrix& Projected) const {
// We check only row maps. Column may be different.
TEUCHOS_TEST_FOR_EXCEPTION(!P.getRowMap()->isSameAs(*Projected.getRowMap()), Exceptions::Incompatible,
"Row maps are incompatible");
const size_t NSDim = X_->getNumVectors();
const size_t numRows = P.getNodeNumRows();
const Map& colMap = *P.getColMap();
const Map& PColMap = *Projected.getColMap();
Projected.resumeFill();
Teuchos::ArrayView<const LO> indices, pindices;
Teuchos::ArrayView<const SC> values, pvalues;
Teuchos::Array<SC> valuesAll(colMap.getNodeNumElements()), newValues;
LO invalid = Teuchos::OrdinalTraits<LO>::invalid();
LO oneLO = Teuchos::OrdinalTraits<LO>::one();
SC zero = Teuchos::ScalarTraits<SC> ::zero();
SC one = Teuchos::ScalarTraits<SC> ::one();
std::vector<const SC*> Xval(NSDim);
for (size_t j = 0; j < NSDim; j++)
Xval[j] = X_->getData(j).get();
for (size_t i = 0; i < numRows; i++) {
P .getLocalRowView(i, indices, values);
Projected.getLocalRowView(i, pindices, pvalues);
size_t nnz = indices.size(); // number of nonzeros in the supplied matrix
size_t pnnz = pindices.size(); // number of nonzeros in the constrained matrix
newValues.resize(pnnz);
// Step 1: fix stencil
// Projected *must* already have the correct stencil
// Step 2: copy correct stencil values
// The algorithm is very similar to the one used in the calculation of
// Frobenius dot product, see src/Transfers/Energy-Minimization/Solvers/MueLu_CGSolver_def.hpp
// NOTE: using extra array allows us to skip the search among indices
for (size_t j = 0; j < nnz; j++)
valuesAll[indices[j]] = values[j];
for (size_t j = 0; j < pnnz; j++) {
LO ind = colMap.getLocalElement(PColMap.getGlobalElement(pindices[j])); // FIXME: we could do that before the full loop just once
if (ind != invalid)
// index indices[j] is part of template, copy corresponding value
newValues[j] = valuesAll[ind];
else
newValues[j] = zero;
}
for (size_t j = 0; j < nnz; j++)
valuesAll[indices[j]] = zero;
// Step 3: project to the space
Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];
Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, pnnz, false);
for (size_t j = 0; j < pnnz; j++)
for (size_t k = 0; k < NSDim; k++)
locX(k,j) = Xval[k][pindices[j]];
Teuchos::SerialDenseVector<LO,SC> val(pnnz, false), val1(NSDim, false), val2(NSDim, false);
for (size_t j = 0; j < pnnz; j++)
val[j] = newValues[j];
Teuchos::BLAS<LO,SC> blas;
// val1 = locX * val;
blas.GEMV(Teuchos::NO_TRANS, NSDim, pnnz,
one, locX.values(), locX.stride(),
val.values(), oneLO,
zero, val1.values(), oneLO);
// val2 = XXtInv * val1
blas.GEMV(Teuchos::NO_TRANS, NSDim, NSDim,
one, XXtInv.values(), XXtInv.stride(),
val1.values(), oneLO,
zero, val2.values(), oneLO);
// val = X^T * val2
blas.GEMV(Teuchos::CONJ_TRANS, NSDim, pnnz,
one, locX.values(), locX.stride(),
val2.values(), oneLO,
zero, val.values(), oneLO);
for (size_t j = 0; j < pnnz; j++)
newValues[j] -= val[j];
Projected.replaceLocalValues(i, pindices, newValues);
}
Projected.fillComplete(Projected.getDomainMap(), Projected.getRangeMap()); //FIXME: maps needed?
}
示例8: main
int main(int argc, char **argv)
{
const unsigned int n = 5;
Sacado::Fad::Vector<unsigned int, FadType> A(n*n,0),B(n,n), C(n,n);
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
A[i+j*n] = FadType(Teuchos::ScalarTraits<double>::random());
B[i] = FadType(n, Teuchos::ScalarTraits<double>::random());
for (unsigned int j=0; j<n; j++)
B[i].fastAccessDx(j) = Teuchos::ScalarTraits<double>::random();
C[i] = 0.0;
}
double *a = A.vals();
double *b = B.vals();
double *bdx = B.dx();
std::vector<double> c(n), cdx(n*n);
Teuchos::BLAS<int,double> blas;
blas.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &a[0], n, &b[0], 1, 0.0, &c[0], 1);
blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, n, n, n, 1.0, &a[0], n, &bdx[0], n, 0.0, &cdx[0], n);
// Teuchos::BLAS<int,FadType> blas_fad;
// blas_fad.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &A[0], n, &B[0], 1, 0.0, &C[0], 1);
Teuchos::BLAS<int,FadType> sacado_fad_blas(false);
sacado_fad_blas.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &A[0], n, &B[0], 1, 0.0, &C[0], 1);
// Print the results
int p = 4;
int w = p+7;
std::cout.setf(std::ios::scientific);
std::cout.precision(p);
std::cout << "BLAS GEMV calculation:" << std::endl;
std::cout << "a = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << a[i+j*n];
std::cout << std::endl;
}
std::cout << "b = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << b[i];
}
std::cout << std::endl;
std::cout << "bdot = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << bdx[i+j*n];
std::cout << std::endl;
}
std::cout << "c = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << c[i];
}
std::cout << std::endl;
std::cout << "cdot = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << cdx[i+j*n];
std::cout << std::endl;
}
std::cout << std::endl << std::endl;
std::cout << "FAD BLAS GEMV calculation:" << std::endl;
std::cout << "A.val() (should = a) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << A[i+j*n].val();
std::cout << std::endl;
}
std::cout << "B.val() (should = b) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << B[i].val();
}
std::cout << std::endl;
std::cout << "B.dx() (should = bdot) = " << std::endl;
double *Bdx = B.dx();
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << Bdx[i+j*n];
std::cout << std::endl;
}
std::cout << "C.val() (should = c) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << C[i].val();
}
std::cout << std::endl;
std::cout << "C.dx() (should = cdot) = " << std::endl;
double *Cdx = C.dx();
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << Cdx[i+j*n];
std::cout << std::endl;
}
double tol = 1.0e-14;
bool failed = false;
for (unsigned int i=0; i<n; i++) {
//.........这里部分代码省略.........