import re
# Regex Cheat sheet : https://www.dataquest.io/blog/regex-cheatsheet/
# Regex python tester : https://pythex.org/
# re doc : https://docs.python.org/3/library/re.html
text = "i like train"
reg = r"[a-c]" #the group of char a to c
if re.match(reg, text): #Check if regex is correct
print(text)
else:
print("Not any match")
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# s - Matches whitespace
# S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> [ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> s
import re
# The string you want to find a pattern within
test_string = 'Hello greppers!'
# Creating a regular expression pattern
# This is a simple one which finds "Hello"
pattern = re.compile(r'Hello')
# This locates and returns all the occurences of the pattern
# within the test_string
match = pattern.finditer(test_string)
# Outputs all the ocurrences which were returned as
# as match objects
for match in matches:
print(match)
import re
# returns a match object if found else None
txt = "Hello world"
x = re.search(r"[a-zA-z]+", txt)
if x:
print("YES! We have a match!", x)
else:
print("No match")
# output YES! We have a match! <re.Match object; span=(0, 5), match='Hello'>
# returns a list of all matches found - regular express finds all vowels in this example
txt = "This is a test"
x = re.findall(r"[aeiou]", txt)
print(x)
# output ['i', 'i', 'a', 'e']
# returns a list of all matches found - regular expression find is or test in string case-insensitive
txt = "This iS a Test"
x = re.findall("(is|test)", txt, flags=re.IGNORECASE)
print(x)
# output ['is', 'iS', 'Test']
txt = "This is a silly string"
# splits a string into a list using regular expression
x = re.split(r"silly", txt)
print(x)
# output ['This is a ', ' string']
# replace concatenated tototo with to
txt = "We need tototo run "
x = re.sub(r"(to)+", "to", txt)
print(x)
# output We need to run
# Step-By-Step breakdown:
import re # We need this module
# First make a regex object containing your regex search pattern. Replace REGEX_GOES_HERE with your regex search. Use either of these:
regex_obj = re.compile(r'REGEX_GOES_HERE', flags=re.IGNORECASE) # Case-insensitive search:
regex_obj = re.compile(r'REGEX_GOES_HERE') # Case-sensitive search
# Define the string you want to search inside:
search_txt = "These are oranges and apples and pears"
# Combine the two to find your result/s:
regex_obj.findall(search_txt)
#And it wrapped in print:
print(regex_obj.findall(search_txt)) # Will return a LIST of all matches. Will return empty list on no matches.
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> [ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> s
>>> pattern = re.compile("o")
>>> pattern.match("dog") # No match as "o" is not at the start of "dog".
>>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog".
<re.Match object; span=(1, 2), match='o'>
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# s - Matches whitespace
# S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
import re
text = "test1, test2, test3"
regex = re.compile(r"test1")
# Returns range of first match
print(regex.match(text).span())
# Returns text with all matches replaces with other text
print(regex.sub("replace", text))
# Returns every match
print(regex.findall(text))
# OUT:
#
# (0, 5)
# replace, replace, replace
# ['test1', 'test2', 'test3']
'''
Regex (Regular Expression) are incredibly powerful,
and can do much more than regular text search.
'''
import re
# a. The dot Regex, how to know how to match an arbitrary character
# by using the dot regex.
text = '''A blockchain, originally block chain,
is a growing list of records, called blocks,
which are linked using cryptography.
'''
print(re.findall('b...k', text)) # Output: ['block', 'block', 'block']
# b. The asterisk Regex, match text that begins and ends with the character
# and an arbitrary number of characters. We also can use
# the asterisk operator in combination
print(re.findall('cr.*', text)) # Output: ['cryptography.']
print(re.findall('y.*y', text)) # Output: ['yptography']
# c. The Zero-or-one Regex / '?' chracter, to know how to match zero
# or one characters.
print(re.findall('blocks?', text)) # Output: ['block', 'block', 'blocks']
# Let's say you want to check for a phone number in a string
# Note: Remove indentation
import re
phone_num_regex = re.compile(r'ddd-ddd-dddd')
mobile_string = 'My number is 415-555-4242' # Not real number
any_phone_numbers = phone_num_regex.search(mobile_string)
print(any_phone_numbers)
The r in front of the string means it's a raw string (/n, /t, etc doesn't work)
In regex, if we use d, it will look for any digit in your string (0-9)
If we search for ddd-ddd-dddd, it will look for anywhere in the
string where there is a digit, followed by a digit, followed by a digit, followed
by a hyphen, ...
You can also use it in an if statement to check if there is a match or not
between a regex and a string with 're.match(regex, string)'
# A Python program to demonstrate working of re.match().
import re
# Lets use a regular expression to match a date string
# in the form of Month name followed by day number
regex = r"([a-zA-Z]+) (d+)"
match = re.search(regex, "I was born on June 24")
if match != None:
# We reach here when the expression "([a-zA-Z]+) (d+)"
# matches the date string.
# This will print [14, 21), since it matches at index 14
# and ends at 21.
print ("Match at index %s, %s" % (match.start(), match.end()))
# We us group() method to get all the matches and
# captured groups. The groups contain the matched values.
# In particular:
# match.group(0) always returns the fully matched string
# match.group(1) match.group(2), ... return the capture
# groups in order from left to right in the input string
# match.group() is equivalent to match.group(0)
# So this will print "June 24"
print ("Full match: %s" % (match.group(0)))
# So this will print "June"
print ("Month: %s" % (match.group(1)))
# So this will print "24"
print ("Day: %s" % (match.group(2)))
else:
print ("The regex pattern does not match.")
# Recursive Python3 program to find if a given pattern is
# present in a text
def exactMatch(text, pat, text_index, pat_index):
if text_index == len(text) and pat_index != len(pat):
return 0
# Else If last character of pattern reaches
if pat_index == len(pat):
return 1
if text[text_index] == pat[pat_index]:
return exactMatch(text, pat, text_index+1, pat_index+1)
return 0
# This function returns true if 'text' contain 'pat'
def contains(text, pat, text_index, pat_index):
# If last character of text reaches
if text_index == len(text):
return 0
# If current characters of pat and text match
if text[text_index] == pat[pat_index]:
if exactMatch(text, pat, text_index, pat_index):
return 1
else:
return contains(text, pat, text_index+1, pat_index)
# If current characters of pat and tex don't match
return contains(text , pat, text_index+1, pat_index)
# Driver program to test the above function
print(contains("geeksforgeeks", "geeks", 0, 0))
print(contains("geeksforgeeks", "geeksquiz", 0, 0))
print(contains("geeksquizgeeksquiz", "quiz", 0, 0))
# This code is contributed by ankush_953.
>>> pattern = re.compile("o")
>>> pattern.match("dog") # No match as "o" is not at the start of "dog".
>>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog".
<re.Match object; span=(1, 2), match='o'>
import re
s = 'GeeksforGeeks: A computer science portal for geeks'
match = re.search(r'portal', s)
print('Start Index:', match.start())
print('End Index:', match.end())