本文整理汇总了C++中OptionParser::getOptionVecInt方法的典型用法代码示例。如果您正苦于以下问题:C++ OptionParser::getOptionVecInt方法的具体用法?C++ OptionParser::getOptionVecInt怎么用?C++ OptionParser::getOptionVecInt使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类OptionParser
的用法示例。
在下文中一共展示了OptionParser::getOptionVecInt方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: GPUSetup
// ****************************************************************************
// Function: GPUSetup
//
// Purpose:
// do the necessary OpenCL setup for GPU part of the test
//
// Arguments:
// op: the options parser / parameter database
// mympirank: for printing errors in case of failure
// mynoderank: this is typically the device ID (the mapping done in main)
//
// Returns: success/failure
//
// Creation: 2009
//
// Modifications:
//
// ****************************************************************************
//
int GPUSetup(OptionParser &op, int mympirank, int mynoderank)
{
addBenchmarkSpecOptions(op);
if (op.getOptionBool("infoDevices"))
{
OpenCLNodePlatformContainer ndc1;
ndc1.Print (cout);
return (0);
}
// The device option supports specifying more than one device
int platform = op.getOptionInt("platform");
int deviceIdx = mynoderank;
if( deviceIdx >= op.getOptionVecInt( "device" ).size() )
{
std::ostringstream estr;
estr << "Warning: not enough devices specified with --device flag for task "
<< mympirank
<< " ( node rank " << mynoderank
<< ") to claim its own device; forcing to use first device ";
std::cerr << estr.str() << std::endl;
deviceIdx = 0;
}
int device = op.getOptionVecInt("device")[deviceIdx];
// Initialization
_mpicontention_ocldev = new cl::Device( ListDevicesAndGetDevice(platform, device) );
std::vector<cl::Device> ctxDevices;
ctxDevices.push_back( *_mpicontention_ocldev );
_mpicontention_ocldriver_ctx = new cl::Context( ctxDevices );
_mpicontention_ocldriver_queue = new cl::CommandQueue( *_mpicontention_ocldriver_ctx, *_mpicontention_ocldev, CL_QUEUE_PROFILING_ENABLE );
_mpicontention_gpuop = op;
return 0;
}
示例2: InvalidArgValue
// validate stencil-independent values
void
CheckOptions( const OptionParser& opts )
{
// check matrix dimensions - must be 2d, must be positive
std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
if( arrayDims.size() != 2 )
{
throw InvalidArgValue( "overall size must have two dimensions" );
}
if( (arrayDims[0] < 0) || (arrayDims[1] < 0) )
{
throw InvalidArgValue( "each size dimension must be positive" );
}
// validation error threshold must be positive
float valThreshold = opts.getOptionFloat( "val-threshold" );
if( valThreshold <= 0.0f )
{
throw InvalidArgValue( "validation threshold must be positive" );
}
// number of validation errors to print must be non-negative
int nErrsToPrint = opts.getOptionInt( "val-print-limit" );
if( nErrsToPrint < 0 )
{
throw InvalidArgValue( "number of validation errors to print must be non-negative" );
}
int nWarmupPasses = opts.getOptionInt( "warmupPasses" );
if( nWarmupPasses < 0 )
{
throw InvalidArgValue( "number of warmup passes must be non-negative" );
}
}
示例3: InvalidArgValue
void
CommonMICStencilFactory<T>::CheckOptions( const OptionParser& opts ) const
{
// let base class check its options first
StencilFactory<T>::CheckOptions( opts );
// check our options
std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
assert( arrayDims.size() == 2 );
// If both of these are zero, we're using a non-custom size, skip this test
if (arrayDims[0] == 0 && arrayDims[0] == 0)
{
return;
}
size_t gRows = (size_t)arrayDims[0];
size_t gCols = (size_t)arrayDims[1];
size_t lRows = LROWS;
size_t lCols = LCOLS;
// verify that local dimensions evenly divide global dimensions
if( ((gRows % lRows) != 0) || (lRows > gRows) )
{
throw InvalidArgValue( "number of rows must be even multiple of lsize rows" );
}
if( ((gCols % lCols) != 0) || (lCols > gCols) )
{
throw InvalidArgValue( "number of columns must be even multiple of lsize columns" );
}
// TODO ensure local dims are smaller than CUDA implementation limits
}
示例4: InvalidArgValue
void
MPICUDAStencilFactory<T>::CheckOptions( const OptionParser& opts ) const
{
// let base class check its options first
CommonCUDAStencilFactory<T>::CheckOptions( opts );
// check our options
std::vector<long long> shDims = opts.getOptionVecInt( "lsize" );
std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
if( arrayDims[0] == 0 )
{
// custom size was not specified - we are using a standard size
int sizeClass = opts.getOptionInt("size");
arrayDims = StencilFactory<T>::GetStandardProblemSize( sizeClass );
}
assert( shDims.size() == 2 );
assert( arrayDims.size() == 2 );
size_t gRows = (size_t)arrayDims[0];
size_t gCols = (size_t)arrayDims[1];
size_t lRows = shDims[0];
size_t lCols = shDims[1];
unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" );
// verify that MPI halo width will result in a matrix being passed
// to the kernel that also has its global size as a multiple of
// the local work size
//
// Because the MPI halo width is arbitrary, and the kernel halo width
// is always 1, we have to ensure that:
// ((size + 2*halo) - 2) % lsize == 0
if( (((gRows + 2*haloWidth) - 2) % lRows) != 0 )
{
throw InvalidArgValue( "rows including halo must be even multiple of lsize (e.g., lsize rows evenly divides ((rows + 2*halo) - 2) )" );
}
if( (((gCols + 2*haloWidth) - 2) % lCols) != 0 )
{
throw InvalidArgValue( "columns including halo must be even multiple of lsize (e.g., lsize cols evenly divides ((cols + 2*halo) - 2) )" );
}
}
示例5:
void
CommonMICStencilFactory<T>::ExtractOptions( const OptionParser& options,
T& wCenter,
T& wCardinal,
T& wDiagonal,
std::vector<long long>& devices )
{
// let base class extract its options
StencilFactory<T>::ExtractOptions( options, wCenter, wCardinal, wDiagonal );
// extract our options
// with hardcoded lsize, we no longer have any to extract
// determine which device to use
// We would really prefer this to be done in main() but
// since BuildStencil is a virtual function, we cannot change its
// signature, and OptionParser provides no way to override an
// option's value after it is set during parsing.
devices = options.getOptionVecInt("device");
}
示例6: main
//.........这里部分代码省略.........
mnc2.Print (cout);
mnc1.merge (mnc2);
cout << "==============\nObject1 after merging 1:\n";
mnc1.Print (cout);
mnc1.merge (mnc2);
cout << "==============\nObject1 after merging 2:\n";
mnc1.Print (cout);
#endif // DEBUG
#endif // PARALLEL
return (0);
}
bool verbose = op.getOptionBool("verbose");
// The device option supports specifying more than one device
// for now, just choose the first one.
int platform = op.getOptionInt("platform");
#ifdef PARALLEL
NodeInfo ni;
int myNodeRank = ni.nodeRank();
if (verbose)
cout << "Global rank "<<rank<<" is local rank "<<myNodeRank << endl;
#else
int myNodeRank = 0;
#endif
// If they haven't specified any devices, assume they
// want the process with in-node rank N to use device N
int deviceIdx = myNodeRank;
// If they have, then round-robin the list of devices
// among the processes on a node.
vector<long long> deviceVec = op.getOptionVecInt("device");
if (deviceVec.size() > 0)
{
int len = deviceVec.size();
deviceIdx = deviceVec[myNodeRank % len];
}
// Check for an erroneous device
if (deviceIdx >= GetNumOclDevices(platform)) {
cerr << "Warning: device index: " << deviceIdx
<< " out of range, defaulting to device 0.\n";
deviceIdx = 0;
}
// Initialization
if (verbose) cout << ">> initializing\n";
cl_device_id devID = ListDevicesAndGetDevice(platform, deviceIdx);
cl_int clErr;
cl_context ctx = clCreateContext( NULL, // properties
1, // number of devices
&devID, // device
NULL, // notification function
NULL,
&clErr );
CL_CHECK_ERROR(clErr);
cl_command_queue queue = clCreateCommandQueue( ctx,
devID,
CL_QUEUE_PROFILING_ENABLE,
&clErr );
CL_CHECK_ERROR(clErr);
ResultDatabase resultDB;
// Run the benchmark
示例7: main
int main(int argc, char *argv[])
{
int numdev=0, totalnumdev=0, numtasks, mympirank, dest, source, rc,
mypair=0, count, tag=2, mynoderank,myclusterrank,nodenprocs;
int *grp1, *grp2;
int mygrprank,grpnumtasks;
MPI_Group orig_group,bmgrp;
MPI_Comm bmcomm,nlrcomm;
ResultDatabase resultDB,resultDBWU,resultDB1;
OptionParser op;
ParallelResultDatabase pardb, pardb1;
bool amGPUTask = false;
volatile unsigned long long *mpidone;
int i,shmid;
/* Allocate System V shared memory */
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &mympirank);
MPI_Comm_group(MPI_COMM_WORLD, &orig_group);
//Add shared options to the parser
op.addOption("device", OPT_VECINT, "0", "specify device(s) to run on",
'd');
op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
op.addOption("quiet", OPT_BOOL, "",
"write minimum necessary to standard output", 'q');
op.addOption("passes", OPT_INT, "10", "specify number of passes", 'z');
op.addOption("size", OPT_VECINT, "1", "specify problem size", 's');
op.addOption("time", OPT_INT, "5", "specify running time in miuntes", 't');
op.addOption("outputFile", OPT_STRING, "output.txt", "specify output file",
'o');
op.addOption("infoDevices", OPT_BOOL, "", "show summary info for available devices",
'i');
op.addOption("fullInfoDevices", OPT_BOOL, "", "show full info for available devices");
op.addOption("MPIminmsg", OPT_INT, "0", "specify minimum MPI message size");
op.addOption("MPImaxmsg", OPT_INT, "16384",
"specify maximum MPI message size");
op.addOption("MPIiter", OPT_INT, "1000",
"specify number of MPI benchmark iterations for each size");
op.addOption("platform", OPT_INT, "0", "specify platform for device selection", 'y');
if (!op.parse(argc, argv))
{
if (mympirank == 0)
op.usage();
MPI_Finalize();
return 0;
}
int npasses = op.getOptionInt("passes");
//our simple mapping
NodeInfo NI;
mynoderank = NI.nodeRank(); // rank of my process within the node
myclusterrank = NI.clusterRank(); // cluster (essentially, node) id
MPI_Comm smpcomm = NI.getSMPComm();
if(mynoderank==0){
shmid = shmget(IPC_PRIVATE,
sizeof(unsigned long long),
(IPC_CREAT | 0600));
}
MPI_Bcast(&shmid, 1, MPI_INT, 0, NI.getSMPComm());
mpidone = ((volatile unsigned long long*) shmat(shmid, 0, 0));
if (mynoderank == 0)
shmctl(shmid, IPC_RMID, 0);
*mpidone = 0;
nlrcomm = NI.getNLRComm(); // communcator of all the lowest rank processes
// on all the nodes
int numnodes = NI.numNodes();
if ( numnodes%2!=0 )
{
if(mympirank==0)
printf("\nThis test needs an even number of nodes\n");
MPI_Finalize();
exit(0);
}
int nodealr = NI.nodeALR();
nodenprocs=NI.nodeNprocs();
// determine how many GPU devices we are to use
int devsPerNode = op.getOptionVecInt( "device" ).size();
//cout<<mympirank<<":numgpus="<<devsPerNode<<endl;
// if there are as many or more devices as the nprocs, only use half of
// the nproc
if ( devsPerNode >= nodenprocs ) devsPerNode = nodenprocs/2;
numdev = (mynoderank == 0) ? devsPerNode : 0;
MPI_Allreduce(&numdev, &totalnumdev, 1, MPI_INT, MPI_SUM,
MPI_COMM_WORLD);
numdev = devsPerNode;
//.........这里部分代码省略.........
示例8: assert
void
DoTest( const char* timerDesc, ResultDatabase& resultDB, OptionParser& opts )
{
StencilFactory<T>* stdStencilFactory = NULL;
Stencil<T>* stdStencil = NULL;
StencilFactory<T>* testStencilFactory = NULL;
Stencil<T>* testStencil = NULL;
//try
{
stdStencilFactory = new HostStencilFactory<T>;
testStencilFactory = new MICStencilFactory<T>;
assert( (stdStencilFactory != NULL) && (testStencilFactory != NULL) );
// do a sanity check on option values
CheckOptions( opts );
stdStencilFactory->CheckOptions( opts );
testStencilFactory->CheckOptions( opts );
// extract and validate options
std::vector<long long> arrayDims = opts.getOptionVecInt( "customSize" );
if( arrayDims.size() != 2 )
{
cerr << "Dim size: " << arrayDims.size() << "\n";
//throw InvalidArgValue( "all overall dimensions must be positive" );
}
if (arrayDims[0] == 0) // User has not specified a custom size
{
const int probSizes[4] = { 768, 1408, 2048, 4096 };
int sizeClass = opts.getOptionInt("size");
if (!(sizeClass >= 0 && sizeClass < 5))
{
//throw InvalidArgValue( "Size class must be between 1-4" );
}
arrayDims[0] = arrayDims[1] =probSizes[sizeClass - 1];
}
long int seed = (long)opts.getOptionInt( "seed" );
bool beVerbose = opts.getOptionBool( "verbose" );
unsigned int nIters = (unsigned int)opts.getOptionInt( "num-iters" );
double valErrThreshold = (double)opts.getOptionFloat( "val-threshold" );
unsigned int nValErrsToPrint = (unsigned int)opts.getOptionInt( "val-print-limit" );
#if defined(PARALLEL)
unsigned int haloWidth = (unsigned int)opts.getOptionInt( "iters-per-exchange" );
#else
unsigned int haloWidth = 1;
#endif // defined(PARALLEL)
float haloVal = (float)opts.getOptionFloat( "haloVal" );
// build a description of this experiment
std::ostringstream experimentDescriptionStr;
experimentDescriptionStr
<< nIters << ':'
<< arrayDims[0] << 'x' << arrayDims[1] << ':'
<< LROWS << 'x' << LCOLS;
unsigned int nPasses =(unsigned int)opts.getOptionInt( "passes" );
unsigned long npts = (arrayDims[0] + 2*haloWidth - 2) *
(arrayDims[1] + 2*haloWidth - 2);
unsigned long nflops = npts * 11 * nIters;
cout<<"flops are = "<<nflops<<endl;
// compute the expected result on the host
#if defined(PARALLEL)
int cwrank;
MPI_Comm_rank( MPI_COMM_WORLD, &cwrank );
if( cwrank == 0 )
{
#endif // defined(PARALLEL)
std::cout << "\nPerforming stencil operation on host for later comparison with MIC output\n"
<< "Depending on host capabilities, this may take a while."
<< std::endl;
#if defined(PARALLEL)
}
#endif // defined(PARALLEL)
Matrix2D<T> exp( arrayDims[0] + 2*haloWidth,
arrayDims[1] + 2*haloWidth );
Initialize<T> init( seed,
haloWidth,
haloVal );
init( exp );
if( beVerbose )
{
std::cout << "initial state:\n" << exp << std::endl;
}
Stencil<T>* stdStencil = stdStencilFactory->BuildStencil( opts );
(*stdStencil)( exp, nIters );
if( beVerbose )
{
std::cout << "expected result:\n" << exp << std::endl;
}
// compute the result on the MIC device
Matrix2D<T> data( arrayDims[0] + 2*haloWidth,
arrayDims[1] + 2*haloWidth );
//.........这里部分代码省略.........
示例9: main
// ****************************************************************************
// Function: main
//
// Purpose:
// The main function takes care of initialization (device and MPI), then
// performs the benchmark and prints results.
//
// Arguments:
//
//
// Programmer: Jeremy Meredith
// Creation:
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
// Split timing reports into detailed and summary. For serial code, we
// report all trial values, and for parallel, skip the per-process vals.
// Also detect and print outliers from parallel runs.
//
// ****************************************************************************
int main(int argc, char *argv[])
{
int ret = 0;
bool noprompt = false;
try
{
#ifdef PARALLEL
int rank, size;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
cerr << "MPI Task " << rank << "/" << size - 1 << " starting....\n";
#endif
// Get args
OptionParser op;
//Add shared options to the parser
op.addOption("device", OPT_VECINT, "0",
"specify device(s) to run on", 'd');
op.addOption("verbose", OPT_BOOL, "", "enable verbose output", 'v');
op.addOption("passes", OPT_INT, "10", "specify number of passes", 'n');
op.addOption("size", OPT_INT, "1", "specify problem size", 's');
op.addOption("infoDevices", OPT_BOOL, "",
"show info for available platforms and devices", 'i');
op.addOption("quiet", OPT_BOOL, "", "write minimum necessary to standard output", 'q');
#ifdef _WIN32
op.addOption("noprompt", OPT_BOOL, "", "don't wait for prompt at program exit");
#endif
addBenchmarkSpecOptions(op);
if (!op.parse(argc, argv))
{
#ifdef PARALLEL
if (rank == 0)
op.usage();
MPI_Finalize();
#else
op.usage();
#endif
return (op.HelpRequested() ? 0 : 1);
}
bool verbose = op.getOptionBool("verbose");
bool infoDev = op.getOptionBool("infoDevices");
#ifdef _WIN32
noprompt = op.getOptionBool("noprompt");
#endif
int device;
#ifdef PARALLEL
NodeInfo ni;
int myNodeRank = ni.nodeRank();
vector<long long> deviceVec = op.getOptionVecInt("device");
if (myNodeRank >= deviceVec.size()) {
// Default is for task i to test device i
device = myNodeRank;
} else {
device = deviceVec[myNodeRank];
}
#else
device = op.getOptionVecInt("device")[0];
#endif
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (device >= deviceCount) {
cerr << "Warning: device index: " << device <<
" out of range, defaulting to device 0.\n";
device = 0;
}
// Initialization
EnumerateDevicesAndChoose(device, infoDev);
if( infoDev )
{
return 0;
}
ResultDatabase resultDB;
//.........这里部分代码省略.........
示例10: if
void
init(OptionParser& op, bool _do_dp)
{
cl_int err;
do_dp = _do_dp;
if (!fftCtx) {
// first get the device
int device, platform = op.getOptionInt("platform");
if (op.getOptionVecInt("device").size() > 0) {
device = op.getOptionVecInt("device")[0];
}
else {
device = 0;
}
fftDev = ListDevicesAndGetDevice(platform, device);
// now get the context
fftCtx = clCreateContext(NULL, 1, &fftDev, NULL, NULL, &err);
CL_CHECK_ERROR(err);
}
if (!fftQueue) {
// get a queue
fftQueue = clCreateCommandQueue(fftCtx, fftDev, CL_QUEUE_PROFILING_ENABLE,
&err);
CL_CHECK_ERROR(err);
}
// create the program...
fftProg = clCreateProgramWithSource(fftCtx, 1, &cl_source_fft, NULL, &err);
CL_CHECK_ERROR(err);
// ...and build it
string args = " -cl-mad-enable ";
if (op.getOptionBool("use-native")) {
args += " -cl-fast-relaxed-math ";
}
if (!do_dp) {
args += " -DSINGLE_PRECISION ";
}
else if (checkExtension(fftDev, "cl_khr_fp64")) {
args += " -DK_DOUBLE_PRECISION ";
}
else if (checkExtension(fftDev, "cl_amd_fp64")) {
args += " -DAMD_DOUBLE_PRECISION ";
}
err = clBuildProgram(fftProg, 0, NULL, args.c_str(), NULL, NULL);
{
char* log = NULL;
size_t bytesRequired = 0;
err = clGetProgramBuildInfo(fftProg,
fftDev,
CL_PROGRAM_BUILD_LOG,
0,
NULL,
&bytesRequired );
log = (char*)malloc( bytesRequired + 1 );
err = clGetProgramBuildInfo(fftProg,
fftDev,
CL_PROGRAM_BUILD_LOG,
bytesRequired,
log,
NULL );
std::cout << log << std::endl;
free( log );
}
if (err != CL_SUCCESS) {
char log[50000];
size_t retsize = 0;
err = clGetProgramBuildInfo(fftProg, fftDev, CL_PROGRAM_BUILD_LOG,
50000*sizeof(char), log, &retsize);
CL_CHECK_ERROR(err);
cout << "Retsize: " << retsize << endl;
cout << "Log: " << log << endl;
dumpPTXCode(fftCtx, fftProg, "oclFFT");
exit(-1);
}
else {
// dumpPTXCode(fftCtx, fftProg, "oclFFT");
}
// Create kernel for forward FFT
fftKrnl = clCreateKernel(fftProg, "fft1D_512", &err);
CL_CHECK_ERROR(err);
// Create kernel for inverse FFT
ifftKrnl = clCreateKernel(fftProg, "ifft1D_512", &err);
CL_CHECK_ERROR(err);
// Create kernel for check
chkKrnl = clCreateKernel(fftProg, "chk1D_512", &err);
CL_CHECK_ERROR(err);
}