本文整理汇总了C++中teuchos::BLAS::HERK方法的典型用法代码示例。如果您正苦于以下问题:C++ BLAS::HERK方法的具体用法?C++ BLAS::HERK怎么用?C++ BLAS::HERK使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类teuchos::BLAS
的用法示例。
在下文中一共展示了BLAS::HERK方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: exampleDenseCholByBlocks
int exampleDenseCholByBlocks(const ordinal_type mmin,
const ordinal_type mmax,
const ordinal_type minc,
const ordinal_type mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const int mkl_nthreads,
const bool check,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
typedef Kokkos::Experimental::TaskPolicy<DeviceSpaceType> PolicyType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> DenseMatrixBaseHostType;
typedef DenseMatrixView<DenseMatrixBaseHostType> DenseMatrixViewHostType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,DeviceSpaceType> DenseMatrixBaseDeviceType;
typedef DenseMatrixView<DenseMatrixBaseDeviceType> DenseMatrixViewDeviceType;
typedef TaskView<DenseMatrixViewDeviceType> DenseTaskViewDeviceType;
typedef DenseMatrixBase<DenseTaskViewDeviceType,ordinal_type,size_type,DeviceSpaceType> DenseHierMatrixBaseDeviceType;
typedef DenseMatrixView<DenseHierMatrixBaseDeviceType> DenseHierMatrixViewDeviceType;
typedef TaskView<DenseHierMatrixViewDeviceType> DenseHierTaskViewDeviceType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
std::cout << "DenseCholByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc
<< " , mb = " << mb << std::endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewDeviceType)+sizeof(PolicyType)+128);
PolicyType policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
std::ostringstream os;
os.precision(3);
os << std::scientific;
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
os.str("");
// host matrices
DenseMatrixBaseHostType AA_host("AA_host", m, m), AB_host("AB_host"), TT_host("TT_host");
// random T matrix
{
TT_host.createConfTo(AA_host);
for (ordinal_type j=0;j<TT_host.NumCols();++j) {
for (ordinal_type i=0;i<TT_host.NumRows();++i)
TT_host.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
TT_host.Value(j,j) = std::fabs(TT_host.Value(j,j));
}
}
// create SPD matrix
{
Teuchos::BLAS<ordinal_type,value_type> blas;
blas.HERK(ArgUplo == Uplo::Upper ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI,
Teuchos::CONJ_TRANS,
m, m,
1.0,
TT_host.ValuePtr(), TT_host.ColStride(),
0.0,
AA_host.ValuePtr(), AA_host.ColStride());
// preserve a copy of A
AB_host.createConfTo(AA_host);
DenseMatrixTools::copy(AB_host, AA_host);
}
const double flop = DenseFlopCount<value_type>::Chol(m);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(mkl_nthreads);
#endif
os << "DenseCholByBlocks:: m = " << m << " ";
int ierr = 0;
if (check) {
timer.reset();
DenseMatrixViewHostType A_host(AB_host);
ierr = Chol<ArgUplo,AlgoChol::ExternalLapack,Variant::One>::invoke
(policy, policy.member_single(),
A_host);
t = timer.seconds();
TACHO_TEST_FOR_ABORT( ierr, "Fail to perform Cholesky (serial)");
os << ":: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs] ";
}
//.........这里部分代码省略.........
示例2: exampleDenseCholByBlocks
KOKKOS_INLINE_FUNCTION
int exampleDenseCholByBlocks(const OrdinalType mmin,
const OrdinalType mmax,
const OrdinalType minc,
const OrdinalType mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const bool check,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "DenseCholByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewType)+196); // when 128 error
//cout << "max task size = "<< max_task_size << endl;
typename TaskFactoryType::policy_type policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
DenseMatrixBaseType TT("TT", m, m), AA("AA", m, m), AB("AB", m, m);
// random T matrix
for (ordinal_type j=0;j<AA.NumCols();++j) {
for (ordinal_type i=0;i<AA.NumRows();++i)
TT.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
TT.Value(j,j) = abs(TT.Value(j,j));
}
{
Teuchos::BLAS<ordinal_type,value_type> blas;
// should be square
const ordinal_type nn = AA.NumRows();
const ordinal_type kk = TT.NumRows();
blas.HERK(Teuchos::UPPER_TRI, Teuchos::CONJ_TRANS,
nn, kk,
1.0,
TT.ValuePtr(), TT.ColStride(),
0.0,
AA.ValuePtr(), AA.ColStride());
AB.copy(AA);
}
const double flop = get_flop_chol<value_type>(m);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(1);
#endif
int ierr = 0;
if (check) {
timer.reset();
DenseTaskViewType A(&AB);
ierr = Chol<Uplo::Upper,AlgoChol::ExternalLapack>::invoke
(TaskFactoryType::Policy(),
TaskFactoryType::Policy().member_single(),
A);
t = timer.seconds();
if (ierr)
ERROR(">> Fail to perform Cholesky (serial) : no reference solution -> no numeric error information");
cout << "DenseCholByBlocks:: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs]" << endl;
}
{
DenseHierMatrixBaseType HA;
DenseMatrixHelper::flat2hier(AA, HA, mb, mb);
DenseHierTaskViewType TA(&HA);
timer.reset();
auto future = TaskFactoryType::Policy().create_team
(Chol<Uplo::Upper,AlgoChol::DenseByBlocks>
::TaskFunctor<DenseHierTaskViewType>(TA), 0);
TaskFactoryType::Policy().spawn(future);
//.........这里部分代码省略.........