本文整理汇总了C++中SourcePtr::begin方法的典型用法代码示例。如果您正苦于以下问题:C++ SourcePtr::begin方法的具体用法?C++ SourcePtr::begin怎么用?C++ SourcePtr::begin使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SourcePtr
的用法示例。
在下文中一共展示了SourcePtr::begin方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: read_file_
// [[Rcpp::export]]
CharacterVector read_file_(List sourceSpec) {
SourcePtr source = Source::create(sourceSpec);
CharacterVector out(1);
out[0] = Rf_mkCharLen(source->begin(), source->end() - source->begin());
return out;
}
示例2: tokenize_
// [[Rcpp::export]]
RObject tokenize_(List sourceSpec, List tokenizerSpec, int n_max) {
Warnings warnings;
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings);
std::vector<std::vector<std::string> > rows;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (n_max > 0 && t.row() >= (size_t) n_max)
break;
if (t.row() >= rows.size()) {
rows.resize(t.row() + 1);
}
std::vector<std::string>& row = rows[t.row()];
if (t.col() >= row.size())
row.resize(t.col() + 1);
row[t.col()] = t.asString();
}
RObject out = wrap(rows);
return warnings.addAsAttribute(out);
}
示例3: guess_header_
// [[Rcpp::export]]
RObject guess_header_(List sourceSpec, List tokenizerSpec, List locale_) {
Warnings warnings;
LocaleInfo locale(locale_);
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings);
CollectorCharacter out(&locale.encoder_);
out.setWarnings(&warnings);
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (t.row() > (size_t) 0) // only read one row
break;
if (t.col() >= (size_t) out.size()) {
out.resize(t.col() + 1);
}
if (t.type() == TOKEN_STRING) {
out.setValue(t.col(), t);
}
}
return out.vector();
}
示例4:
// [[Rcpp::export]]
std::vector<std::vector<std::string> > tokenize_(List sourceSpec, List tokenizerSpec, int n) {
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
std::vector<std::vector<std::string> > rows;
int i = 0;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (n > 0 && i > n)
break;
if (t.row() >= rows.size()) {
rows.resize(t.row() + 1);
}
std::vector<std::string>& row = rows[t.row()];
if (t.col() >= row.size())
row.resize(t.col() + 1);
row[t.col()] = t.asString();
++i;
}
return rows;
}
示例5: read_lines_
// [[Rcpp::export]]
CharacterVector read_lines_(List sourceSpec, int n_max = -1) {
SourcePtr source = Source::create(sourceSpec);
TokenizerLine tokenizer;
tokenizer.tokenize(source->begin(), source->end());
int n = (n_max < 0) ? 1000 : n_max;
CharacterVector out(n);
int i = 0;
for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
if (i >= n) {
if (n_max < 0) {
n = (n * 3)/2 + 1;
out = Rf_lengthgets(out, n);
} else {
break;
}
}
if (t.type() == TOKEN_STRING)
out[i] = t.asString();
++i;
}
if (i < n) {
out = Rf_lengthgets(out, i);
}
return out;
}
示例6: collectorsGuess
// [[Rcpp::export]]
std::vector<std::string> collectorsGuess(List sourceSpec, List tokenizerSpec, int n = 100) {
Warnings warnings;
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings); // silence warnings
std::vector<CollectorCharacter> collectors;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (t.row() >= (size_t) n)
break;
// Add new collectors, if needed
if (t.col() >= collectors.size()) {
int old_p = collectors.size();
collectors.resize(t.col() + 1);
for (size_t j = old_p; j < collectors.size(); ++j) {
collectors[j].resize(n);
}
}
collectors[t.col()].setValue(t.row(), t);
}
std::vector<std::string> out;
for (size_t j = 0; j < collectors.size(); ++j) {
CharacterVector col = as<CharacterVector>(collectors[j].vector());
out.push_back(collectorGuess(col));
}
return out;
}
示例7: read_file_
// [[Rcpp::export]]
CharacterVector read_file_(List sourceSpec, List locale_) {
SourcePtr source = Source::create(sourceSpec);
LocaleInfo locale(locale_);
return CharacterVector::create(
locale.encoder_.makeSEXP(source->begin(), source->end())
);
}
示例8: dim_tokens_
// [[Rcpp::export]]
IntegerVector dim_tokens_(List sourceSpec, List tokenizerSpec) {
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
size_t rows = -1, cols = -1;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
rows = t.row();
if (t.col() > cols)
cols = t.col();
}
return IntegerVector::create(rows + 1, cols + 1);
}
示例9: read_lines_
// [[Rcpp::export]]
CharacterVector read_lines_(List sourceSpec, List locale_, int n_max = -1,
bool progress = true) {
SourcePtr source = Source::create(sourceSpec);
TokenizerLine tokenizer;
tokenizer.tokenize(source->begin(), source->end());
LocaleInfo locale(locale_);
Progress progressBar;
int n = (n_max < 0) ? 10000 : n_max;
CharacterVector out(n);
int i = 0;
for (Token t = tokenizer.nextToken(); t.type() != TOKEN_EOF; t = tokenizer.nextToken()) {
if (progress && (i + 1) % 25000 == 0)
progressBar.show(tokenizer.progress());
if (i >= n) {
if (n_max < 0) {
// Estimate rows in full dataset
n = (i / tokenizer.progress().first) * 1.2;
out = Rf_lengthgets(out, n);
} else {
break;
}
}
if (t.type() == TOKEN_STRING)
out[i] = t.asSEXP(&locale.encoder_);
++i;
}
if (i < n) {
out = Rf_lengthgets(out, i);
}
if (progress)
progressBar.show(tokenizer.progress());
progressBar.stop();
return out;
}
示例10: parse_
// [[Rcpp::export]]
SEXP parse_(List sourceSpec, List tokenizerSpec, List collectorSpec) {
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
boost::shared_ptr<Collector> out = Collector::create(collectorSpec);
out->resize(100);
int i = 0;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (i >= out->size())
out->resize(i * 2);
out->setValue(i, t);
++i;
}
if (i != out->size()) {
out->resize(i);
}
return out->vector();
}
示例11: locale
// [[Rcpp::export]]
std::vector<std::string> guess_types_(List sourceSpec, List tokenizerSpec,
Rcpp::List locale_, int n = 100) {
Warnings warnings;
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings); // silence warnings
LocaleInfo locale(locale_);
std::vector<CollectorPtr> collectors;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (t.row() >= (size_t) n)
break;
// Add new collectors, if needed
if (t.col() >= collectors.size()) {
int p = collectors.size() - t.col() + 1;
for (int j = 0; j < p; ++j) {
CollectorPtr col = CollectorPtr(new CollectorCharacter(&locale.encoder_));
col->setWarnings(&warnings);
col->resize(n);
collectors.push_back(col);
}
}
collectors[t.col()]->setValue(t.row(), t);
}
std::vector<std::string> out;
for (size_t j = 0; j < collectors.size(); ++j) {
CharacterVector col = as<CharacterVector>(collectors[j]->vector());
out.push_back(collectorGuess(col, locale_));
}
return out;
}
示例12: read_tokens
// [[Rcpp::export]]
RObject read_tokens(List sourceSpec, List tokenizerSpec, ListOf<List> colSpecs,
CharacterVector colNames, List locale_, int n_max = -1,
bool progress = true) {
Warnings warnings;
LocaleInfo locale(locale_);
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings);
std::vector<CollectorPtr> collectors = collectorsCreate(colSpecs, &locale, &warnings);
Progress progressBar;
// Work out how many output columns we have
size_t p = collectors.size();
size_t pOut = 0;
for (size_t j = 0; j < p; ++j) {
if (collectors[j]->skip())
continue;
pOut++;
}
// Match colNames to with non-skipped collectors
if (p != (size_t) colNames.size())
stop("colSpec and colNames must be same size");
CharacterVector outNames(pOut);
int cj = 0;
for (size_t j = 0; j < p; ++j) {
if (collectors[j]->skip())
continue;
outNames[cj] = colNames[j];
cj++;
}
size_t n = (n_max < 0) ? 1000 : n_max;
collectorsResize(collectors, n);
int i = -1, j = -1, cells = 0;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (progress && (cells++) % 250000 == 0)
progressBar.show(tokenizer->progress());
if (t.col() == 0 && i != -1)
checkColumns(&warnings, i, j, p);
if (t.row() >= n) {
if (n_max >= 0)
break;
// Estimate rows in full dataset
n = (i / tokenizer->progress().first) * 1.2;
collectorsResize(collectors, n);
}
if (t.col() < p)
collectors[t.col()]->setValue(t.row(), t);
i = t.row();
j = t.col();
}
if (i != -1)
checkColumns(&warnings, i, j, p);
if (progress)
progressBar.show(tokenizer->progress());
progressBar.stop();
if (i != (int) n - 1) {
collectorsResize(collectors, i + 1);
}
// Save individual columns into a data frame
List out(pOut);
j = 0;
for(CollectorItr cur = collectors.begin(); cur != collectors.end(); ++cur) {
if ((*cur)->skip())
continue;
out[j] = (*cur)->vector();
j++;
}
out.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
out.attr("row.names") = IntegerVector::create(NA_INTEGER, -(i + 1));
out.attr("names") = outNames;
return warnings.addAsAttribute(out);
}
示例13: read_tokens
// [[Rcpp::export]]
RObject read_tokens(List sourceSpec, List tokenizerSpec, ListOf<List> colSpecs,
CharacterVector col_names, int n_max = -1,
bool progress = true) {
Warnings warnings;
SourcePtr source = Source::create(sourceSpec);
TokenizerPtr tokenizer = Tokenizer::create(tokenizerSpec);
tokenizer->tokenize(source->begin(), source->end());
tokenizer->setWarnings(&warnings);
std::vector<CollectorPtr> collectors = collectorsCreate(colSpecs, &warnings);
Progress progressBar;
size_t p = collectors.size();
// Work out how many output columns we have
size_t pOut = 0;
for(CollectorItr cur = collectors.begin(); cur != collectors.end(); ++cur) {
if (!(*cur)->skip())
pOut++;
}
// Allow either one name for column, or one name per output col
if (p != pOut && (size_t) col_names.size() == p) {
CharacterVector col_names2(pOut);
int cj = 0;
for (size_t j = 0; j < p; ++j) {
if (collectors[j]->skip())
continue;
col_names2[cj++] = col_names[j];
}
col_names = col_names2;
}
if (pOut != (size_t) col_names.size()) {
Rcpp::stop("You have %i column names, but %i columns",
col_names.size(), pOut);
}
size_t n = (n_max < 0) ? 1000 : n_max;
collectorsResize(collectors, n);
size_t i = 0, cells = 0;
for (Token t = tokenizer->nextToken(); t.type() != TOKEN_EOF; t = tokenizer->nextToken()) {
if (progress && (cells++) % 250000 == 0)
progressBar.show(tokenizer->progress());
if (t.col() >= p) {
warnings.addWarning(t.row(), t.col(), tfm::format("Only %i columns", p), "");
continue;
}
if (t.row() >= n) {
if (n_max >= 0)
break;
// Estimate rows in full dataset
n = (i / tokenizer->progress().first) * 1.2;
collectorsResize(collectors, n);
}
collectors[t.col()]->setValue(t.row(), t);
i = t.row();
}
progressBar.show(tokenizer->progress());
progressBar.stop();
if (i <= n) {
collectorsResize(collectors, i + 1);
}
// Save individual columns into a data frame
List out(pOut);
int j = 0;
for(CollectorItr cur = collectors.begin(); cur != collectors.end(); ++cur) {
if ((*cur)->skip())
continue;
out[j] = (*cur)->vector();
j++;
}
out.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
out.attr("row.names") = IntegerVector::create(NA_INTEGER, -(i + 1));
out.attr("names") = col_names;
return warnings.addAsAttribute(out);
}