# -*- coding: utf-8 -*-
import typing as T
import re
import textwrap
[docs]
def match(
name: str,
include: list[str],
exclude: list[str],
) -> bool:
"""
Match a name against include and exclude lists using wildcard or regex patterns.
The include/exclude pattern system works like a two-stage filter where name
must pass both inclusion and exclusion criteria to be selected for processing.
Pattern Types:
- Wildcard patterns: Use * to match any characters (e.g., ``"EMPLOYEE*"``, ``"*_TEMP"``)
- Regex patterns: Automatically detected when regex metacharacters are present
(e.g., ``"^EMP.*"``, ``".*_TABLE$"``, ``"[A-Z]+_\\d+"``)
Matching Rules:
- Default inclusion: When include list is empty, all names are included by default
- Include matching: When include patterns exist, a name must match ANY include
pattern (logical OR) to be considered
- Exclude override: If a name matches ANY exclude pattern, it's rejected
regardless of include matches
- Case insensitive: All pattern matching is case-insensitive
:param name: The name to match (e.g., table name, column name)
:param include: List of patterns to include. Empty list means include all.
:param exclude: List of patterns to exclude. Takes precedence over include.
Returns:
bool: True if the name matches the criteria, False otherwise.
Examples:
>>> # Include all employee tables
>>> match("EMPLOYEES", ["EMPLOYEE*"], [])
True
>>> match("EMPLOYEE_HISTORY", ["EMPLOYEE*"], [])
True
>>> match("MANAGERS", ["EMPLOYEE*"], [])
False
>>> # Include all, but exclude temporary tables
>>> match("USERS", [], ["*_TEMP", "*_TMP"])
True
>>> match("USERS_TEMP", [], ["*_TEMP", "*_TMP"])
False
>>> # Include specific tables with regex
>>> match("EMP_2023", ["^EMP_\\d{4}$"], [])
True
>>> match("EMP_ARCHIVE", ["^EMP_\\d{4}$"], [])
False
>>> # Complex filtering: include employee/manager tables, exclude history
>>> match("EMPLOYEE_CURRENT", ["EMPLOYEE*", "MANAGER*"], ["*_HISTORY"])
True
>>> match("EMPLOYEE_HISTORY", ["EMPLOYEE*", "MANAGER*"], ["*_HISTORY"])
False
>>> # Case insensitive matching
>>> match("employees", ["EMPLOYEES"], [])
True
>>> match("EMPLOYEES", ["employees"], [])
True
"""
# Convert wildcard patterns to regex patterns
def pattern_to_regex(pattern: str) -> T.Pattern:
# Check if pattern contains regex metacharacters (excluding *)
# If it does, treat it as a regex pattern, otherwise treat * as wildcard
regex_chars = r"[.+?^${}()|[\]\\]"
has_regex = bool(re.search(regex_chars, pattern.replace("*", "")))
if has_regex:
# It's a regex pattern, compile as-is
regex_pattern = pattern
else:
# It's a wildcard pattern, escape everything except *
regex_pattern = re.escape(pattern)
# Replace escaped \* with .* for wildcard matching
regex_pattern = regex_pattern.replace(r"\*", ".*")
# Compile with case-insensitive flag, use fullmatch to match entire string
compiled = re.compile(regex_pattern, re.IGNORECASE)
return compiled
# Convert all patterns to compiled regex objects
include_patterns = [pattern_to_regex(p) for p in include]
exclude_patterns = [pattern_to_regex(p) for p in exclude]
# Check exclude patterns first - if any match, return False
for pattern in exclude_patterns:
if pattern.fullmatch(name):
return False
# If no include patterns, everything is included by default
if not include_patterns:
return True
# If include patterns exist, name must match at least one
for pattern in include_patterns:
if pattern.fullmatch(name):
return True
return False
[docs]
def dedent(text: str) -> str:
"""
Dedent a string by removing common leading whitespace.
This is useful for cleaning up multi-line strings that may have inconsistent
indentation levels.
:param text: The input string to dedent.
:return: A dedented version of the input string.
"""
return textwrap.dedent(text).strip()