本文整理汇总了C++中RegexMatcher类的典型用法代码示例。如果您正苦于以下问题:C++ RegexMatcher类的具体用法?C++ RegexMatcher怎么用?C++ RegexMatcher使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RegexMatcher类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: stri_detect_regex
/**
* Detect if a pattern occurs in a string
*
* @param str R character vector
* @param pattern R character vector containing regular expressions
* @param opts_regex list
*
* @version 0.1 (Marcin Bujarski)
* @version 0.2 (Marek Gagolewski) - use StriContainerUTF16
* @version 0.3 (Marek Gagolewski) - use StriContainerUTF16's vectorization
* @version 0.4 (Marek Gagolewski, 2013-06-18) use StriContainerRegexPattern + opts_regex
*/
SEXP stri_detect_regex(SEXP str, SEXP pattern, SEXP opts_regex)
{
str = stri_prepare_arg_string(str, "str");
pattern = stri_prepare_arg_string(pattern, "pattern");
R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));
// this will work for vectorize_length == 0:
uint32_t pattern_flags = StriContainerRegexPattern::getRegexFlags(opts_regex);
STRI__ERROR_HANDLER_BEGIN
StriContainerUTF16 str_cont(str, vectorize_length);
// MG: tried StriContainerUTF8 + utext_openUTF8 - this was slower
StriContainerRegexPattern pattern_cont(pattern, vectorize_length, pattern_flags);
SEXP ret;
PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length));
int* ret_tab = LOGICAL(ret);
for (R_len_t i = pattern_cont.vectorize_init();
i != pattern_cont.vectorize_end();
i = pattern_cont.vectorize_next(i))
{
STRI__CONTINUE_ON_EMPTY_OR_NA_STR_PATTERN(str_cont, pattern_cont, ret_tab[i] = NA_LOGICAL, ret_tab[i] = FALSE)
RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
matcher->reset(str_cont.get(i));
ret_tab[i] = (int)matcher->find();
}
UNPROTECT(1);
return ret;
STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
示例2: findText
bool t4p::FinderClass::FindNextRegularExpression(const UnicodeString& text, int32_t start) {
if (U_SUCCESS(PatternErrorCode) && Pattern != NULL) {
UnicodeString findText(text);
if (start > 0 && start < text.length()) {
findText.setTo(text, start);
} else if (start > 0) {
findText = UNICODE_STRING_SIMPLE("");
}
int32_t foundPos = 0,
length = 0,
endPos = 0;
UErrorCode error = U_ZERO_ERROR;
RegexMatcher* matcher = Pattern->matcher(findText, error);
if (U_SUCCESS(error) && matcher) {
if (matcher->find()) {
foundPos = matcher->start(error);
endPos = matcher->end(error);
if (U_SUCCESS(error) && U_SUCCESS(error)) {
IsFound = true;
length = endPos - foundPos; // end is the index after the match
// if search was started from the middle of a string,
// need to correct the found position
LastPosition = start > 0 ? foundPos + start : foundPos;
LastLength = length;
}
}
delete matcher;
}
}
return IsFound;
}
示例3: matchedText
bool t4p::FinderClass::GetLastReplacementText(const UnicodeString& text, UnicodeString& replacementText) const {
UBool matchFound = FALSE;
if (IsFound && (LastPosition + LastLength) <= text.length()) {
UnicodeString matchedText(text, LastPosition, LastLength);
UnicodeString replaceWith = ReplaceExpression;
UErrorCode error = U_ZERO_ERROR;
RegexMatcher* matcher = NULL;
switch (Mode) {
case EXACT:
matchFound = Expression == matchedText;
if (matchFound) {
replacementText = replaceWith;
}
break;
case REGULAR_EXPRESSION:
matcher = Pattern->matcher(matchedText, error);
if (U_SUCCESS(error) && matcher && matcher->matches(error) && U_SUCCESS(error)) {
replacementText = matcher->replaceFirst(replaceWith, error);
matchFound = TRUE;
}
break;
}
if (matcher) {
delete matcher;
}
}
return matchFound == TRUE;
}
示例4: stri__match_firstlast_regex
/**
* Extract all capture groups of the first/last occurrence
* of a regex pattern in each string
*
* @param str character vector
* @param pattern character vector
* @param opts_regex list
* @param first logical - search for the first or the last occurrence?
* @param cg_missing single string
* @return character matrix
*
* @version 0.1-??? (Marek Gagolewski, 2013-06-22)
*
* @version 0.3-1 (Marek Gagolewski, 2014-11-05)
* Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
*
* @version 0.4-1 (Marek Gagolewski, 2014-12-06)
* new arg: cg_missing
*
* @version 1.0-2 (Marek Gagolewski, 2016-01-29)
* Issue #214: allow a regex pattern like `.*` to match an empty string
*/
SEXP stri__match_firstlast_regex(SEXP str, SEXP pattern, SEXP cg_missing, SEXP opts_regex, bool first)
{
// @TODO: capture_groups arg (integer vector/set - which capture groups to extract)
PROTECT(str = stri_prepare_arg_string(str, "str")); // prepare string argument
PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern")); // prepare string argument
PROTECT(cg_missing = stri_prepare_arg_string_1(cg_missing, "cg_missing"));
R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));
uint32_t pattern_flags = StriContainerRegexPattern::getRegexFlags(opts_regex);
UText* str_text = NULL; // may potentially be slower, but definitely is more convenient!
STRI__ERROR_HANDLER_BEGIN(3)
StriContainerUTF8 str_cont(str, vectorize_length);
StriContainerRegexPattern pattern_cont(pattern, vectorize_length, pattern_flags);
StriContainerUTF8 cg_missing_cont(cg_missing, 1);
STRI__PROTECT(cg_missing = STRING_ELT(cg_missing, 0));
// we don't know how many capture groups are there:
vector< vector< pair<const char*, const char*> > > occurrences(vectorize_length);
R_len_t occurrences_max = 1;
for (R_len_t i = pattern_cont.vectorize_init();
i != pattern_cont.vectorize_end();
i = pattern_cont.vectorize_next(i))
{
STRI__CONTINUE_ON_EMPTY_OR_NA_PATTERN(str_cont, pattern_cont,
/*do nothing*/; )
UErrorCode status = U_ZERO_ERROR;
RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
int pattern_cur_groups = matcher->groupCount();
if (occurrences_max < pattern_cur_groups+1) occurrences_max=pattern_cur_groups+1;
str_text = utext_openUTF8(str_text, str_cont.get(i).c_str(), str_cont.get(i).length(), &status);
STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
示例5: matcher
//---------------------------------------------------------------------
//
// matcher(UnicodeString, err)
//
//---------------------------------------------------------------------
RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
UErrorCode &status) const {
RegexMatcher *retMatcher = matcher(status);
if (retMatcher != NULL) {
retMatcher->reset(input);
}
return retMatcher;
};
示例6: matcher
//
// matcher, UText mode
//
RegexMatcher *RegexPattern::matcher(UText *input,
PatternIsUTextFlag /*flag*/,
UErrorCode &status) const {
RegexMatcher *retMatcher = matcher(status);
if (retMatcher != NULL) {
retMatcher->fDeferredStatus = status;
retMatcher->reset(input);
}
return retMatcher;
}
示例7: stri_detect_regex
/**
* Detect if a pattern occurs in a string
*
* @param str R character vector
* @param pattern R character vector containing regular expressions
* @param negate single bool
* @param max_count single int
* @param opts_regex list
*
* @version 0.1-?? (Marcin Bujarski)
*
* @version 0.1-?? (Marek Gagolewski)
* use StriContainerUTF16
*
* @version 0.1-?? (Marek Gagolewski)
* use StriContainerUTF16's vectorization
*
* @version 0.1-?? (Marek Gagolewski, 2013-06-18)
* use StriContainerRegexPattern + opts_regex
*
* @version 0.3-1 (Marek Gagolewski, 2014-11-05)
* Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
*
* @version 1.0-2 (Marek Gagolewski, 2016-01-29)
* Issue #214: allow a regex pattern like `.*` to match an empty string
*
* @version 1.0-3 (Marek Gagolewski, 2016-02-03)
* FR #216: `negate` arg added
*
* @version 1.3.1 (Marek Gagolewski, 2019-02-08)
* #232: `max_count` arg added
*/
SEXP stri_detect_regex(SEXP str, SEXP pattern, SEXP negate,
SEXP max_count, SEXP opts_regex)
{
bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate");
int max_count_1 = stri__prepare_arg_integer_1_notNA(max_count, "max_count");
PROTECT(str = stri_prepare_arg_string(str, "str"));
PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern"));
R_len_t vectorize_length =
stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));
uint32_t pattern_flags = StriContainerRegexPattern::getRegexFlags(opts_regex);
STRI__ERROR_HANDLER_BEGIN(2)
StriContainerUTF16 str_cont(str, vectorize_length);
// StriContainerUTF8 str_cont(str, vectorize_length); // utext_openUTF8, see below
StriContainerRegexPattern pattern_cont(pattern, vectorize_length, pattern_flags);
SEXP ret;
STRI__PROTECT(ret = Rf_allocVector(LGLSXP, vectorize_length));
int* ret_tab = LOGICAL(ret);
for (R_len_t i = pattern_cont.vectorize_init();
i != pattern_cont.vectorize_end();
i = pattern_cont.vectorize_next(i))
{
if (max_count_1 == 0) {
ret_tab[i] = NA_LOGICAL;
continue;
}
STRI__CONTINUE_ON_EMPTY_OR_NA_PATTERN(str_cont,
pattern_cont, ret_tab[i] = NA_LOGICAL)
RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
matcher->reset(str_cont.get(i));
ret_tab[i] = (int)matcher->find(); // returns UBool
if (negate_1) ret_tab[i] = !ret_tab[i];
if (max_count_1 > 0 && ret_tab[i]) --max_count_1;
// // mbmark-regex-detect1.R: UTF16 0.07171792 s; UText 0.10531605 s
// UText* str_text = NULL;
// UErrorCode status = U_ZERO_ERROR;
// RegexMatcher *matcher = pattern_cont.getMatcher(i); // will be deleted automatically
// str_text = utext_openUTF8(str_text, str_cont.get(i).c_str(), str_cont.get(i).length(), &status);
// STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
// matcher->reset(str_text);
// ret_tab[i] = (int)matcher->find(); // returns UBool
// utext_close(str_text);
}
STRI__UNPROTECT_ALL
return ret;
STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
示例8:
//---------------------------------------------------------------------
//
// matches Convenience function to test for a match, starting
// with a pattern string and a data string.
//
//---------------------------------------------------------------------
UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
const UnicodeString &input,
UParseError &pe,
UErrorCode &status) {
if (U_FAILURE(status)) {return FALSE;}
UBool retVal;
RegexPattern *pat = NULL;
RegexMatcher *matcher = NULL;
pat = RegexPattern::compile(regex, 0, pe, status);
matcher = pat->matcher(input, status);
retVal = matcher->matches(status);
delete matcher;
delete pat;
return retVal;
}
示例9:
//
// matches, UText mode
//
UBool U_EXPORT2 RegexPattern::matches(UText *regex,
UText *input,
UParseError &pe,
UErrorCode &status) {
if (U_FAILURE(status)) {return FALSE;}
UBool retVal;
RegexPattern *pat = NULL;
RegexMatcher *matcher = NULL;
pat = RegexPattern::compile(regex, 0, pe, status);
matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
retVal = matcher->matches(status);
delete matcher;
delete pat;
return retVal;
}
示例10: dest
int t4p::FinderClass::ReplaceAllMatches(UnicodeString& text) const {
int matches = 0;
// no check for ReplaceExpression.isEmpty() allow for empty replacements
// this allows the user to 'delete' parts of a strin
if (IsPrepared) {
UnicodeString replacement = ReplaceExpression;
RegexMatcher* matcher = NULL;
UErrorCode error = U_ZERO_ERROR;
UnicodeString dest(text.length(), ' ', 0);
int32_t pos = 0;
if (EXACT == Mode || (REGULAR_EXPRESSION == Mode && ReplaceExpression.isEmpty())) {
pos = text.indexOf(Expression, 0);
while (pos >= 0) {
text.replaceBetween(pos, pos + Expression.length(), replacement);
pos = text.indexOf(Expression, pos + replacement.length());
++matches;
}
} else {
matcher = Pattern->matcher(text, error);
if (U_SUCCESS(error) && matcher) {
while (matcher->find()) {
if (U_SUCCESS(error)) {
matcher->appendReplacement(dest, replacement, error);
if (U_SUCCESS(error)) {
++matches;
}
}
}
matcher->appendTail(dest);
text = dest;
}
}
if (matcher) {
delete matcher;
}
}
return matches;
}
示例11: ReadAndConvertFile
void DecimalFormatTest::DataDrivenTests() {
char tdd[2048];
const char *srcPath;
UErrorCode status = U_ZERO_ERROR;
int32_t lineNum = 0;
//
// Open and read the test data file.
//
srcPath=getPath(tdd, "dcfmtest.txt");
if(srcPath==NULL) {
return; /* something went wrong, error already output */
}
int32_t len;
UChar *testData = ReadAndConvertFile(srcPath, len, status);
if (U_FAILURE(status)) {
return; /* something went wrong, error already output */
}
//
// Put the test data into a UnicodeString
//
UnicodeString testString(FALSE, testData, len);
RegexMatcher parseLineMat(UnicodeString(
"(?i)\\s*parse\\s+"
"\"([^\"]*)\"\\s+" // Capture group 1: input text
"([ild])\\s+" // Capture group 2: expected parsed type
"\"([^\"]*)\"\\s+" // Capture group 3: expected parsed decimal
"\\s*(?:#.*)?"), // Trailing comment
0, status);
RegexMatcher formatLineMat(UnicodeString(
"(?i)\\s*format\\s+"
"(\\S+)\\s+" // Capture group 1: pattern
"(ceiling|floor|down|up|halfeven|halfdown|halfup|default)\\s+" // Capture group 2: Rounding Mode
"\"([^\"]*)\"\\s+" // Capture group 3: input
"\"([^\"]*)\"" // Capture group 4: expected output
"\\s*(?:#.*)?"), // Trailing comment
0, status);
RegexMatcher commentMat (UNICODE_STRING_SIMPLE("\\s*(#.*)?$"), 0, status);
RegexMatcher lineMat(UNICODE_STRING_SIMPLE("(?m)^(.*?)$"), testString, 0, status);
if (U_FAILURE(status)){
dataerrln("Construct RegexMatcher() error.");
delete [] testData;
return;
}
//
// Loop over the test data file, once per line.
//
while (lineMat.find()) {
lineNum++;
if (U_FAILURE(status)) {
errln("File dcfmtest.txt, line %d: ICU Error \"%s\"", lineNum, u_errorName(status));
}
status = U_ZERO_ERROR;
UnicodeString testLine = lineMat.group(1, status);
// printf("%s\n", UnicodeStringPiece(testLine).data());
if (testLine.length() == 0) {
continue;
}
//
// Parse the test line. Skip blank and comment only lines.
// Separate out the three main fields - pattern, flags, target.
//
commentMat.reset(testLine);
if (commentMat.lookingAt(status)) {
// This line is a comment, or blank.
continue;
}
//
// Handle "parse" test case line from file
//
parseLineMat.reset(testLine);
if (parseLineMat.lookingAt(status)) {
execParseTest(lineNum,
parseLineMat.group(1, status), // input
parseLineMat.group(2, status), // Expected Type
parseLineMat.group(3, status), // Expected Decimal String
status
);
continue;
}
//
// Handle "format" test case line
//
formatLineMat.reset(testLine);
if (formatLineMat.lookingAt(status)) {
execFormatTest(lineNum,
formatLineMat.group(1, status), // Pattern
//.........这里部分代码省略.........
示例12: main
//------------------------------------------------------------------------------------------
//
// main for ugrep
//
// Structurally, all use of the ICU Regular Expression API is in main(),
// and all of the supporting stuff necessary to make a running program, but
// not directly related to regular expressions, is factored out into these other
// functions.
//
//------------------------------------------------------------------------------------------
int main(int argc, const char** argv) {
UBool matchFound = FALSE;
//
// Process the commmand line options.
//
processOptions(argc, argv);
//
// Create a RegexPattern object from the user supplied pattern string.
//
UErrorCode status = U_ZERO_ERROR; // All ICU operations report success or failure
// in a status variable.
UParseError parseErr; // In the event of a syntax error in the regex pattern,
// this struct will contain the position of the
// error.
RegexPattern *rePat = RegexPattern::compile(pattern, parseErr, status);
// Note that C++ is doing an automatic conversion
// of the (char *) pattern to a temporary
// UnicodeString object.
if (U_FAILURE(status)) {
fprintf(stderr, "ugrep: error in pattern: \"%s\" at position %d\n",
u_errorName(status), parseErr.offset);
exit(-1);
}
//
// Create a RegexMatcher from the newly created pattern.
//
UnicodeString empty;
RegexMatcher *matcher = rePat->matcher(empty, status);
if (U_FAILURE(status)) {
fprintf(stderr, "ugrep: error in creating RegexMatcher: \"%s\"\n",
u_errorName(status));
exit(-1);
}
//
// Loop, processing each of the input files.
//
for (int fileNum=firstFileNum; fileNum < argc; fileNum++) {
readFile(argv[fileNum]);
//
// Loop through the lines of a file, trying to match the regex pattern on each.
//
for (nextLine(0); lineStart<fileLen; nextLine(lineEnd)) {
UnicodeString s(FALSE, ucharBuf+lineStart, lineEnd-lineStart);
matcher->reset(s);
if (matcher->find()) {
matchFound = TRUE;
printMatch();
}
}
}
//
// Clean up
//
delete matcher;
delete rePat;
free(ucharBuf);
free(charBuf);
ucnv_close(outConverter);
u_cleanup(); // shut down ICU, release any cached data it owns.
return matchFound? 0: 1;
}