import re
# Regex Cheat sheet : https://www.dataquest.io/blog/regex-cheatsheet/
# Regex python tester : https://pythex.org/
# re doc : https://docs.python.org/3/library/re.html
text = "i like train"
reg = r"[a-c]" #the group of char a to c
if re.match(reg, text): #Check if regex is correct
print(text)
else:
print("Not any match")
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# s - Matches whitespace
# S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> [ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> s
import re
text = "test1, test2, test3"
regex = re.compile(r"test1")
# Returns range of first match
print(regex.match(text).span())
# Returns text with all matches replaces with other text
print(regex.sub("replace", text))
# Returns every match
print(regex.findall(text))
# OUT:
#
# (0, 5)
# replace, replace, replace
# ['test1', 'test2', 'test3']
'''
Regex (Regular Expression) are incredibly powerful,
and can do much more than regular text search.
'''
import re
# a. The dot Regex, how to know how to match an arbitrary character
# by using the dot regex.
text = '''A blockchain, originally block chain,
is a growing list of records, called blocks,
which are linked using cryptography.
'''
print(re.findall('b...k', text)) # Output: ['block', 'block', 'block']
# b. The asterisk Regex, match text that begins and ends with the character
# and an arbitrary number of characters. We also can use
# the asterisk operator in combination
print(re.findall('cr.*', text)) # Output: ['cryptography.']
print(re.findall('y.*y', text)) # Output: ['yptography']
# c. The Zero-or-one Regex / '?' chracter, to know how to match zero
# or one characters.
print(re.findall('blocks?', text)) # Output: ['block', 'block', 'blocks']
# Let's say you want to check for a phone number in a string
# Note: Remove indentation
import re
phone_num_regex = re.compile(r'ddd-ddd-dddd')
mobile_string = 'My number is 415-555-4242' # Not real number
any_phone_numbers = phone_num_regex.search(mobile_string)
print(any_phone_numbers)
The r in front of the string means it's a raw string (/n, /t, etc doesn't work)
In regex, if we use d, it will look for any digit in your string (0-9)
If we search for ddd-ddd-dddd, it will look for anywhere in the
string where there is a digit, followed by a digit, followed by a digit, followed
by a hyphen, ...
You can also use it in an if statement to check if there is a match or not
between a regex and a string with 're.match(regex, string)'
# Recursive Python3 program to find if a given pattern is
# present in a text
def exactMatch(text, pat, text_index, pat_index):
if text_index == len(text) and pat_index != len(pat):
return 0
# Else If last character of pattern reaches
if pat_index == len(pat):
return 1
if text[text_index] == pat[pat_index]:
return exactMatch(text, pat, text_index+1, pat_index+1)
return 0
# This function returns true if 'text' contain 'pat'
def contains(text, pat, text_index, pat_index):
# If last character of text reaches
if text_index == len(text):
return 0
# If current characters of pat and text match
if text[text_index] == pat[pat_index]:
if exactMatch(text, pat, text_index, pat_index):
return 1
else:
return contains(text, pat, text_index+1, pat_index)
# If current characters of pat and tex don't match
return contains(text , pat, text_index+1, pat_index)
# Driver program to test the above function
print(contains("geeksforgeeks", "geeks", 0, 0))
print(contains("geeksforgeeks", "geeksquiz", 0, 0))
print(contains("geeksquizgeeksquiz", "quiz", 0, 0))
# This code is contributed by ankush_953.
re.search(r'cake$', "Cake! Let's eat cake").group()
## The next search will return the NONE value, try it:
re.search(r'cake$', "Let's get some cake on our way home!").group()