Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Simple Regex matching
#1
Lightbulb 
A simple wrapper for the standard library regex_match() function. Save all files into the QB64pe folder to test.

qbregex.h
Code: (Select All)
//====================================================================
//=== Regular Expressions support ====================================
//====================================================================

#include <regex>

// Check whether the given string does match the given regular expression.
// The regex must match entirely to be true (ie. without any additional
// characters before or after the match), hence the use of ^ or $ for
// line start or line end respectively is not required/supported.
//  In: string, regex (both STRINGs, add CHR$(0) to end of strings)
// Out: match         (INTEGER, 0 = no match, 1 = positive match)
// Err: out < 0       (call RegexError() to get the error message)
//--------------------------------------------------------------------
int16_t RegexMatch(const char *qbStr, const char *qbRegex) {
    int16_t result;
    try {result = regex_match(qbStr, std::regex(qbRegex));}
    catch (const std::regex_error& e) {result = ~e.code();}
    return result;
}

// Return a detailed error description message for any negative error code,
// which might be returned by the RegexMatch() function.
//  In: error code (INTEGER, usually the code returned by RegexMatch())
// Out: error text (STRING, description for the given error code)
//--------------------------------------------------------------------
const char *RegexError(int16_t errCode) {
    switch (~errCode) {
        // just in case somebody pass in the regular matching result as error
        case -2: {return "No error, it was a positive RegEx match."; break;}
        case -1: {return "No error, the RegEx just didn't match."; break;}
        // and now the real errors known to the regex library
        case std::regex_constants::error_collate: {return "RegEx has an invalid collating element name."; break;}
        case std::regex_constants::error_ctype: {return "RegEx has an invalid character class name."; break;}
        case std::regex_constants::error_escape: {return "RegEx has an invalid escaped character, or a trailing escape."; break;}
        case std::regex_constants::error_backref: {return "RegEx has an invalid back reference."; break;}
        case std::regex_constants::error_brack: {return "RegEx has mismatched brackets [ and ]."; break;}
        case std::regex_constants::error_paren: {return "RegEx has mismatched parentheses ( and )."; break;}
        case std::regex_constants::error_brace: {return "RegEx has mismatched braces { and }."; break;}
        case std::regex_constants::error_badbrace: {return "RegEx has an invalid range between braces { and }."; break;}
        case std::regex_constants::error_range: {return "RegEx has an invalid character range."; break;}
        case std::regex_constants::error_space: {return "Out of memory while converting RegEx into a finite state machine."; break;}
        case std::regex_constants::error_badrepeat: {return "RegEx has a repeat specifier, one of *?+{, that was not preceded by a valid token."; break;}
        case std::regex_constants::error_complexity: {return "Complexity of an attempted match exceeded a pre-set level."; break;}
        case std::regex_constants::error_stack: {return "Out of memory while trying to match the specified string."; break;}
        // everything else is unknown
        default: {return "Unknown RegEx error."; break;}
    }
}

qbregex.bi
Code: (Select All)
DECLARE LIBRARY "qbregex" 'Do not add .h here !!
    FUNCTION RegexMatch% (qbStr$, qbRegex$) 'add CHR$(0) to both
    FUNCTION RegexError$ (BYVAL errCode%)
END DECLARE

RE-Test.bas
Code: (Select All)
'$INCLUDE: 'qbregex.bi'

PRINT "Type a short phrase with your or others username in it: "
LINE INPUT "Phrase: "; phrase$
PRINT

'remove one open or close parantheses to check error part
you$ = "(.*)grymmjack(.*)"

res% = RegexMatch%(UCASE$(phrase$) + CHR$(0), UCASE$(you$) + CHR$(0)) 'match ignoring case
IF res% > 0 THEN
    PRINT "Hey, must be you, grymmjack."
ELSEIF res% = 0 THEN
    PRINT "Hello unknown user."
ELSE
    PRINT "Error: "; RegexError$(res%)
END IF

END
Reply
#2
That is so cool how you put the function body into the .h file and avoid the difficulties of compilation flags, and target nuances.
No need to make separate Windows, Linux and Mac versions, or to access an object file when compiling.

I suppose it bloats output a bit, with an inline body every time called, but the convenience feels worth it.

I made a little extension, to add RegexSearch to search for a pattern match within a string, and to allow flags for RegexMatch
for case insensitivity or other search options.
https://github.com/mstasak/QB64PE-Projec.../qbregex.h
https://github.com/mstasak/QB64PE-Projec...qbregex.bi
https://github.com/mstasak/QB64PE-Projec...extest.bas

Code: (Select All)
OPTION _EXPLICIT
'$INCLUDE: 'qbregex.bi'

PRINT RegexMatch%("It's Mark." + CHR$(0), "^.*Mark.*$" + CHR$(0)) 'match ignoring case
PRINT RegexMatch%("It's Mark." + CHR$(0), ".*Mark.*" + CHR$(0)) 'match ignoring case
PRINT RegexMatch%("It's Mark." + CHR$(0), "Mark" + CHR$(0)) 'match ignoring case
PRINT RegexMatch%("It's Mark." + CHR$(0), "^.*MARK.*$" + CHR$(0)) 'match ignoring case
PRINT RegexMatchEx%("It's Mark." + CHR$(0), "^.*MARK.*$" + CHR$(0), REGEX_icase) 'match ignoring case

PRINT RegExpSearch("This is a test", "is", 0)
PRINT RegExpSearch("This is a test", "Is", 0)
PRINT RegExpSearch("This is a test", "IS", 0)
PRINT RegExpSearch("This IS a test", "IS", 0)
PRINT RegExpSearch("This IS a test", "iS", 0)

PRINT RegExpSearch("This is a test", "is", REGEX_icase)
PRINT RegExpSearch("This is a test", "Is", REGEX_icase)
PRINT RegExpSearch("This is a test", "IS", REGEX_icase)
PRINT RegExpSearch("This IS a test", "IS", REGEX_icase)
PRINT RegExpSearch("This IS a test", "iS", REGEX_icase)
END

FUNCTION RegExpSearch$ (s AS STRING, pattern AS STRING, flags AS LONG)
REDIM buffr(0 TO 511) AS LONG
DIM res AS INTEGER
DIM rslt AS STRING
res = RegexSearch(s + CHR$(0), pattern + CHR$(0), flags, UBOUND(buffr) - LBOUND(buffr) + 1, _OFFSET(buffr(0))) 'match ignoring case
IF res > 0 THEN
rslt = "Found '" + MID$(s, buffr(0), buffr(1)) + "' at position: " + STR$(buffr(0)) + ", length: " + STR$(buffr(1))
ELSEIF res = 0 THEN
rslt = "*** NO MATCH ***"
ELSE
rslt = "Error: " + RegexError$(res%)
END IF
rslt = rslt + STR$(buffr(31))
RegExpSearch = rslt
END FUNCTION
Reply
#3
Well done Mark , that's certainly a useful addition which comes in handy in the future.
Reply


Forum Jump:


Users browsing this thread: 1 Guest(s)