Python Regex Cheat Sheet
Quick-reference for Python's re module organized by function. Each section shows the API call with real-world pattern examples.
Quick Syntax Reference
Every pattern in Python regex uses a raw string prefix r'...' so backslashes pass through to the regex engine unchanged.
import re
re.findall(r'\d', 'abc123') # => ['1', '2', '3'] (digits)
re.findall(r'\D', 'abc123') # => ['a', 'b', 'c'] (non-digits)
re.findall(r'\w', 'hi there!') # => ['h', 'i', 't', 'h', 'e', 'r', 'e']
re.findall(r'\W', 'hi there!') # => [' ', '!'] (non-word chars)
re.findall(r'\s', 'a b\tc') # => [' ', '\t'] (whitespace)import re
re.findall(r'\d+', 'a1 bb22 ccc333') # => ['1', '22', '333'] (one or more)
re.findall(r'\d?', '1a2') # => ['1', '', '2', ''] (zero or one)
re.findall(r'\d*', 'a12b') # => ['', '12', '', ''] (zero or more)
re.findall(r'\d{2,3}', '1 22 333') # => ['22', '333'] (2 to 3 digits)import re
re.search(r'^Hello', 'Hello world') # => <Match 'Hello'>
re.search(r'^Hello', 'Say Hello') # => None (not at start)
re.search(r'world$', 'Hello world') # => <Match 'world'>
re.findall(r'\bcat\b', 'cat concatenate') # => ['cat'] (word boundary)re.search() vs re.match() vs re.fullmatch()
search() scans the entire string. match() checks only the beginning. fullmatch() requires the whole string to match.
import re
text = 'abc123def'
re.search(r'\d+', text) # => <Match '123'> (found in middle)
re.match(r'\d+', text) # => None (not at position 0)
re.fullmatch(r'\d+', text) # => None (entire string is not digits)
re.fullmatch(r'\d+', '123') # => <Match '123'> (entire string matches)import re
m = re.search(r'\d+', 'order-456-abc')
if m:
m.group() # => '456'
m.start() # => 6
m.end() # => 9
m.span() # => (6, 9)re.findall() and re.finditer()
findall returns a flat list of strings. finditer yields Match objects with position data.
import re
re.findall(r'\d+', 'age 25, score 100') # => ['25', '100']
# With groups, findall returns the group content (not the full match)
re.findall(r'(\d+)-(\d+)', '12-34 56-78')
# => [('12', '34'), ('56', '78')]import re
for m in re.finditer(r'\d+', 'age 25, score 100'):
print(f'{m.group()} at position {m.start()}')
# 25 at position 4
# 100 at position 16Use finditer when you need match positions or when processing large strings to avoid building the full list in memory.
re.sub() and re.subn()
sub replaces all matches and returns the new string. subn also returns the count of replacements made.
import re
re.sub(r'\d', 'X', 'abc123') # => 'abcXXX'
re.sub(r'\d+', 'NUM', 'a1 b22') # => 'aNUM bNUM'
# Limit replacements with count
re.sub(r'\d+', 'NUM', 'a1 b2 c3', count=2) # => 'aNUM bNUM c3'import re
re.subn(r'\d', 'X', 'abc123') # => ('abcXXX', 3)import re
def double(m):
return str(int(m.group()) * 2)
re.sub(r'\d+', double, 'buy 3 get 1') # => 'buy 6 get 2'import re
# Swap first and last name
re.sub(r'(\w+) (\w+)', r'\2, \1', 'Jane Doe') # => 'Doe, Jane're.compile() and Pattern Objects
Compiling a pattern once and reusing it avoids recompilation on each call. The Pattern object exposes the same methods as the re module.
import re
email_pat = re.compile(r'[\w.+-]+@[\w-]+\.[\w.]+')
email_pat.search('contact us at hi@example.com') # => <Match 'hi@example.com'>
email_pat.findall('a@b.com and c@d.org') # => ['a@b.com', 'c@d.org']import re
pat = re.compile(r'^error:', re.IGNORECASE | re.MULTILINE)
pat.findall('Error: disk\nerror: memory') # => ['Error:', 'error:']Python caches recently used patterns internally, so compile() helps most when the same pattern runs in a tight loop or across many function calls.
Groups, Named Groups, and Backreferences
Parentheses capture substrings. Named groups add labels for readability. group(), groups(), and groupdict() extract captured values.
import re
m = re.search(r'(\d{4})-(\d{2})-(\d{2})', '2026-02-23')
m.group(0) # => '2026-02-23' (entire match)
m.group(1) # => '2026'
m.group(2) # => '02'
m.groups() # => ('2026', '02', '23')import re
m = re.search(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', '2026-02-23')
m.group('year') # => '2026'
m.groupdict() # => {'year': '2026', 'month': '02', 'day': '23'}import re
# Find repeated words
re.search(r'\b(\w+)\s+\1\b', 'the the quick fox')
# => <Match 'the the'>
# Named backreference
re.search(r'(?P<word>\w+)\s+(?P=word)', 'the the quick fox')
# => <Match 'the the'>Lookahead and Lookbehind
Zero-width assertions that check what comes before or after a position without consuming characters.
import re
# Match digits followed by 'px'
re.findall(r'\d+(?=px)', '12px 5em 30px') # => ['12', '30']import re
# Match digits NOT followed by 'px'
re.findall(r'\d+(?!px)', '12px 5em 30px') # => ['1', '5', '3']The results are '1', '5', '3' because the engine finds the longest non-px-followed digit substring at each position. Use \b or anchors to get full numbers.
import re
# Match digits preceded by '$'
re.findall(r'(?<=\$)\d+', 'costs $50 or $100') # => ['50', '100']import re
# Match 'cat' not preceded by 'scat'
re.findall(r'(?<!s)cat', 'cat scatter concatenate') # => ['cat', 'cat']Flags
Flags modify how the regex engine interprets the pattern. Pass them as a third argument or embed inline with (?flag).
import re
re.findall(r'error', 'Error ERROR error', re.IGNORECASE)
# => ['Error', 'ERROR', 'error']import re
text = 'first line\nsecond line'
re.findall(r'^\w+', text) # => ['first'] (default: ^ = start of string)
re.findall(r'^\w+', text, re.MULTILINE) # => ['first', 'second'] (^ = start of each line)import re
text = 'start\nmiddle\nend'
re.search(r'start.*end', text) # => None (. doesn't match \n)
re.search(r'start.*end', text, re.DOTALL) # => <Match 'start\nmiddle\nend'>import re
email_pat = re.compile(r'''
[\w.+-]+ # local part
@ # at symbol
[\w-]+ # domain name
\. # dot
[\w.]+ # top-level domain
''', re.VERBOSE)
email_pat.search('user@example.com') # => <Match 'user@example.com'>import re
re.findall(r'(?i)error', 'Error ERROR error')
# => ['Error', 'ERROR', 'error']
# Combine flags
re.findall(r'(?im)^\w+', 'first\nSecond')
# => ['first', 'Second']Common Practical Patterns
Ready-to-use patterns for everyday validation and extraction. Each uses an re module function, not standalone regex strings.
import re
text = 'reach us at support@example.com or sales@company.co.uk'
re.findall(r'[\w.+-]+@[\w-]+\.[\w.]+', text)
# => ['support@example.com', 'sales@company.co.uk']This catches most common emails. For RFC-compliant validation, use a dedicated library.
import re
text = 'visit https://example.com/path?q=1 or http://test.org'
re.findall(r'https?://[\w.-]+(?:/[\w./?&=-]*)?', text)
# => ['https://example.com/path?q=1', 'http://test.org']import re
text = 'call 555-123-4567 or (555) 987-6543'
re.findall(r'\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}', text)
# => ['555-123-4567', '(555) 987-6543']import re
log = '2026-02-23 server started, 2026-02-24 backup done'
dates = re.findall(r'\d{4}-\d{2}-\d{2}', log)
# => ['2026-02-23', '2026-02-24']
# With named groups for structured extraction
for m in re.finditer(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', log):
print(m.groupdict())
# {'year': '2026', 'month': '02', 'day': '23'}
# {'year': '2026', 'month': '02', 'day': '24'}Can you write this from memory?
Search for pattern 'error' anywhere in string `log_line`