General Information |
Anchoring Characters |
Character Class
|
Description
|
^ |
Anchor the expression to the start of a line |
$ |
Anchor the expression to the end of a line |
|
Equivalence Classes |
Character Class |
Description |
= = |
Oracle supports the equivalence classes through
the POSIX '[==]' syntax. A base letter and all of its accented versions
constitute an equivalence class. For example, the equivalence class
'[=a=]' matches ä and â. The equivalence classes are valid only inside the bracketed expression
|
|
Match Options |
Character Class
|
Description
|
c
|
Case sensitive matching
|
i |
Case insensitive matching |
m |
Treat source string as multi-line activating Anchor chars
|
n |
Allow the period (.) to match any newline
character |
|
Posix Characters |
Character Class |
Description |
[:alnum:] |
Alphanumeric characters |
[:alpha:] |
Alphabetic characters |
[:blank:] |
Blank Space Characters |
[:cntrl:] |
Control characters (nonprinting) |
[:digit:] |
Numeric digits |
[:graph:] |
Any [:punct:], [:upper:], [:lower:], and [:digit:]
chars |
[:lower:] |
Lowercase alphabetic characters |
[:print:] |
Printable characters |
[:punct:] |
Punctuation characters |
[:space:] |
Space characters (nonprinting), such as carriage return, newline,
vertical tab, and form feed |
[:upper:] |
Uppercase alphabetic characters |
[:xdigit:] |
Hexidecimal characters |
|
Quantifier Characters |
Character Class |
Description |
* |
Match 0 or more times |
? |
Match 0 or 1 time |
+ |
Match 1 or more times |
{m} |
Match exactly m times |
{m,} |
Match at least m times |
{m, n} |
Match at least m times but no more than n times |
\n |
Cause the previous expression to be repeated
n times |
|
Alternative Matching And
Grouping Characters |
Character
Class |
Description |
| |
Separates alternates, often used with grouping operator () |
( ) |
Groups subexpression into a unit for alternations, for quantifiers,
or for backreferencing (see "Backreferences" section) |
[char] |
Indicates a character list; most metacharacters inside a character
list are understood as literals, with the exception of character classes, and the ^ and -
metacharacters |
|
Demo Table |
CREATE TABLE test (
testcol VARCHAR2(50));
INSERT INTO test VALUES ('abcde');
INSERT INTO test VALUES ('12345');
INSERT INTO test VALUES ('1a4A5');
INSERT INTO test VALUES ('12a45');
INSERT INTO test VALUES ('12aBC');
INSERT INTO test VALUES ('12abc');
INSERT INTO test VALUES ('12ab5');
INSERT INTO test VALUES ('12aa5');
INSERT INTO test VALUES ('12AB5');
INSERT INTO test VALUES ('ABCDE');
INSERT INTO test VALUES ('123-5');
INSERT INTO test VALUES ('12.45');
INSERT INTO test VALUES ('1a4b5');
INSERT INTO test VALUES ('1 3 5');
INSERT INTO test VALUES ('1 45');
INSERT INTO test VALUES ('1 5');
INSERT INTO test VALUES ('a b c d');
INSERT INTO test VALUES ('a b c d e');
INSERT INTO test VALUES ('a
e');
INSERT INTO test VALUES ('Steven');
INSERT INTO test VALUES ('Stephen');
INSERT INTO test VALUES ('111.222.3333');
INSERT INTO test VALUES ('222.333.4444');
INSERT INTO test VALUES ('333.444.5555');
INSERT INTO test VALUES ('abcdefabcdefabcxyz');
COMMIT; |
|
REGEXP_COUNT |
Syntax |
REGEXP_COUNT(<source_string>,
<pattern>[[, <start_position>], [<match_parameter>]])
-- match parameter:
'c' = case sensitive
'i' = case insensitive search
'm' = treats the source string as multiple lines
'n' = allows the period (.) wild character to match newline
'x' = ignore whitespace characters |
Count's occurrences based on a regular expression |
SELECT REGEXP_COUNT(testcol,
'2a', 1, 'i') RESULT
FROM test;
SELECT REGEXP_COUNT(testcol, 'e', 1, 'i') RESULT
FROM test; |
|
REGEXP_INSTR |
Syntax |
REGEXP_INSTR(<source_string>,
<pattern>[[, <start_position>][, <occurrence>][, <return_option>][,
<match_parameter>][, <sub_expression>]]) |
Find character 'o' followed by any 3 alphabetic characters: case insensitiveOur thanks to Cassio for spotting a typo here. |
SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood
Shores, CA', '[o][[:alpha:]]{3}', 1, 1, 0, 'i') RESULT
FROM DUAL;
SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood
Shores, CA', '[o][[:alpha:]]{3}', 1, 1, 1, 'i') RESULT
FROM DUAL;
SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA',
'[o][[:alpha:]]{3}', 1, 2, 0, 'i') RESULT
FROM DUAL;
SELECT REGEXP_INSTR('500 Oracle Pkwy, Redwood Shores, CA',
'[o][[:alpha:]]{3}', 1, 2, 1, 'i') RESULT
FROM DUAL; |
Find the position of try, trying,
tried or tries |
SELECT REGEXP_INSTR('We are trying to make the subject
easier.', 'tr(y(ing)?|(ied)|(ies))') RESULTNUM
FROM DUAL; |
Using Sub-Expression option |
SELECT testcol, REGEXP_INSTR(testcol,
'ab', 1, 1, 0, 'i', 0)
FROM test;
SELECT testcol, REGEXP_INSTR(testcol, 'ab',
1, 1, 0, 'i', 1)
FROM test;
SELECT testcol, REGEXP_INSTR(testcol, 'a(b)',
1, 1, 0, 'i', 1)
FROM test; |
|
REGEXP_LIKE |
Syntax |
REGEXP_LIKE(<source_string>,
<pattern>, <match_parameter>) |
AlphaNumeric Characters |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alnum:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alnum:]]{3}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alnum:]]{5}'); |
Alphabetic Characters |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alpha:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alpha:]]{3}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:alpha:]]{5}'); |
Control Characters |
INSERT INTO test
VALUES ('zyx' || CHR(13) || 'wvu');
COMMIT;
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:cntrl:]]{1}'); |
Digits |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:digit:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:digit:]]{3}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:digit:]]{5}'); |
Lower Case |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:lower:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:lower:]]{2}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:lower:]]{3}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:lower:]]{5}'); |
Printable Characters |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:print:]]{5}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:print:]]{6}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:print:]]{7}'); |
Punctuation |
TRUNCATE TABLE test;
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:punct:]]'); |
Spaces |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:space:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:space:]]{2}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:space:]]{3}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:space:]]{5}'); |
Upper Case |
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:upper:]]');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:upper:]]{2}');
SELECT *
FROM test
WHERE REGEXP_LIKE(testcol, '[[:upper:]]{3}'); |
Values Starting with 'a%b' |
SELECT testcol
FROM test
WHERE REGEXP_LIKE(testcol, '^ab*'); |
'a' is the third value |
SELECT testcol
FROM test
WHERE REGEXP_LIKE(testcol, '^..a.'); |
Contains two consecutive occurances
of the letter 'a' or 'z' |
SELECT testcol
FROM test
WHERE REGEXP_LIKE(testcol, '([az])\1', 'i'); |
Begins with 'Ste' ends with 'en' and
contains either 'v' or 'ph' in the center |
SELECT testcol
FROM test
WHERE REGEXP_LIKE(testcol, '^Ste(v|ph)en$'); |
Use a regular expression in a check
constraint |
CREATE TABLE mytest (c1 VARCHAR2(20),
CHECK (REGEXP_LIKE(c1, '^[[:alpha:]]+$'))); |
Identify SSN
Thanks: Byron Bush HIOUG |
CREATE TABLE ssn_test (
ssn_col VARCHAR2(20));
INSERT INTO ssn_test VALUES ('111-22-3333');
INSERT INTO ssn_test VALUES ('111=22-3333');
INSERT INTO ssn_test VALUES ('111-A2-3333');
INSERT INTO ssn_test VALUES ('111-22-33339');
INSERT INTO ssn_test VALUES ('111-2-23333');
INSERT INTO ssn_test VALUES ('987-65-4321');
COMMIT;
SELECT ssn_col
from ssn_test
WHERE REGEXP_LIKE(ssn_col,'^[0-9]{3}-[0-9]{2}-[0-9]{4}$'); |
|
REGEXP_REPLACE |
Syntax |
REGEXP_REPLACE(<source_string>,
<pattern>,
<replace_string>, <position>, <occurrence>, <match_parameter>) |
Looks for the pattern xxx.xxx.xxxx
and reformats pattern to (xxx) xxx-xxxx |
col testcol
format a15
col result format a15
SELECT testcol, REGEXP_REPLACE(testcol,
'([[:digit:]]{3})\.([[:digit:]]{3})\.([[:digit:]]{4})',
'(\1) \2-\3') RESULT
FROM test
WHERE LENGTH(testcol) = 12; |
Put a space after every character |
SELECT testcol, REGEXP_REPLACE(testcol, '(.)', '\1 ') RESULT
FROM test
WHERE testcol like 'S%'; |
Replace multiple spaces with a single space |
SELECT REGEXP_REPLACE('500 Oracle Parkway, Redwood Shores, CA', '( ){2,}', ' ')
RESULT
FROM DUAL; |
Insert a space between a lower case character followed by an upper case character |
SELECT REGEXP_REPLACE('George McGovern', '([[:lower:]])([[:upper:]])', '\1
\2') CITY
FROM DUAL;
(Produces 'George Mc Govern')
|
Replace the period with a string (note use of '\') |
SELECT REGEXP_REPLACE('We are trying to make the subject easier.','\.',' for you.') REGEXT_SAMPLE
FROM DUAL; |
Demo |
CREATE TABLE t(
testcol VARCHAR2(10));
INSERT INTO t VALUES ('1');
INSERT INTO t VALUES ('2 ');
INSERT INTO t VALUES ('3 new ');
col newval format a10
SELECT LENGTH(testcol) len, testcol origval,
REGEXP_REPLACE(testcol, '\W+$', ' ')
newval,
LENGTH(REGEXP_REPLACE(testcol, '\W+$', ' '))
newlen
FROM t; |
Code snippet courtesy of Valentin Matak. This is a handy way to remove duplicate characters from a string. This example shows the 1+ repeatability qualifier in use. |
SELECT REGEXP_REPLACE('ABBBCABBCCCAABAAAAA', '(A|B|C)\1+', '\1')
FROM DUAL;
|
Code snippet courtesy of Jonathan Linder. This checks for a valid email address and then extracts the domain name. |
SELECT REGEXP_REPLACE('[email protected]','^(\S+)@(\S+)','\2')
FROM DUAL d
WHERE
REGEXP_LIKE('[email protected]','^[A-Za-z0-9._%+-]+@([A-Za-z0-9-]+.)?+[A-Za-z]{2,63}$')
|
Code snippet courtesy of Juraj Drusc. This example will convert Oracle's SYS_GUID() to a JAVA UUID.
|
SELECT lower(REGEXP_REPLACE(sys_guid(),
'([[:alnum:]]{8})([[:alnum:]]{4})([[:alnum:]]{4})([[:alnum:]]{4})([[:alnum:]]{12})',
'1-2-3-4-5'))
FROM DUAL;
|
|
REGEXP_SUBSTR |
Syntax |
REGEXP_SUBSTR(source_string, pattern
[, position [, occurrence
[, match_parameter]]]) |
Searches for a comma followed by one or more occurrences of non-comma characters followed by a comma |
SELECT REGEXP_SUBSTR('500 Oracle Parkway, Redwood Shores, CA', ',[^,]+,')
RESULT
FROM DUAL; |
Look for http:// followed by a substring of one or more alphanumeric characters and optionally, a period (.) |
col result format
a50
SELECT REGEXP_SUBSTR('Go to http://www.oracle.com/products and click
on database',
'http://([[:alnum:]]+\.?){3,4}/?') RESULT
FROM DUAL; |
Extracts try, trying, tried or tries |
SELECT REGEXP_SUBSTR('We are trying to make the subject
easier.','tr(y(ing)?|(ied)|(ies))')
FROM DUAL; |
Extract the 3rd field treating ':' as
a delimiter |
SELECT REGEXP_SUBSTR('system/pwd@orabase:1521:sidval',
'[^:]+', 1, 3) RESULT
FROM DUAL; |
Extract from string with vertical bar delimiter |
CREATE TABLE regexp (
testcol VARCHAR2(50));
INSERT INTO regexp
(testcol)
VALUES
('One|Two|Three|Four|Five');
SELECT * FROM regexp;
SELECT REGEXP_SUBSTR(testcol,'[^|]+', 1, 3)
FROM regexp; |
Equivalence classes |
SELECT REGEXP_SUBSTR('iSelfSchooling NOT
ISelfSchooling', '[[=i=]]SelfSchooling') RESULT
FROM DUAL;
|
Parsing Demo |
set serveroutput on
DECLARE
x VARCHAR2(2);
y VARCHAR2(2);
c VARCHAR2(40) := '1:3,4:6,8:10,3:4,7:6,11:12';
BEGIN
x := REGEXP_SUBSTR(c,'[^:]+', 1, 1);
y := REGEXP_SUBSTR(c,'[^,]+', 3, 1);
dbms_output.put_line(x ||' '|| y);
END;
/
Gary Whitaker wrote in with an addition to this parsing demo, and had the following comments:
The parsing demo above uses the regular expression '[^,]+' which does not work when there is a NULL element in the list. This could result in returning the wrong element's data. Consider this simple example with comments:
set serveroutput on
DECLARE
x VARCHAR2(1);
y VARCHAR2(1);
z VARCHAR2(1);
c VARCHAR2(40) := '1,2,,4,5,6,7';
BEGIN
-- Works as expected if the value you seek is before any null value in the list:
x := REGEXP_SUBSTR(c, '[^,]+', 1, 2);
-- This form only returns the 4th element when all elements are present.
-- It will return the 4th non-null element, which in this example is really '5',
-- which could be misleading.
-- if you are really after the 4th element regardless if there is a null element:
y := REGEXP_SUBSTR(c, '[^,]+', 1, 4);
-- This form gets the actual 4th element, allowing for the null element.
-- Get the 1st substring of the 4th instance of a set of characters that are not a comma,
-- when followed by a comma or the end of the line:
z := REGEXP_SUBSTR(c, '([^,]*)(,|$)', 1, 4, NULL, 1);
dbms_output.put_line(x);
dbms_output.put_line(y);
dbms_output.put_line(z);
END;
/
|