Skip to content

scaffold_kit.utils.pattern_processor

Converts glob-like patterns to regular expressions.

This module provides classes for processing .gitignore-style glob patterns and converting them into equivalent regular expressions. It uses a handler-based, “strategy” pattern to process different types of characters (e.g., wildcards, character classes, literals) and handles complex rules like recursive wildcards and root-anchored patterns.

Demo

To run the module’s demonstration code, use the following command:

$ uv run python -m scaffold_kit.utils.pattern_processor

CharacterClassHandler #

Bases: CharacterHandler

Handles ‘[…]’ character classes.

Captures the entire character class including its content and closing bracket.

Source code in src/scaffold_kit/utils/pattern_processor.py
class CharacterClassHandler(CharacterHandler):
    """Handles '[...]' character classes.

    Captures the entire character class including its content and closing
    bracket.
    """

    def can_handle(self, char: str) -> bool:
        """Checks if the character is a '['.

        Args:
            char: The single character to check.

        Returns:
            True if the character is a character class, False otherwise.
        """
        return char == "["

    def handle(self, text: str, position: int) -> Tuple[str, int]:
        """Extracts the entire character class from the text.

        Args:
            text: The full text being processed.
            position: Current position in the text.

        Returns:
            A tuple containing:
                - The regex string for the character class.
                - The new position in the text after processing.
        """
        start = position
        i = position + 1  # Skip opening '['.

        # Handle negation characters.
        if i < len(text) and text[i] in ("!", "^"):
            i += 1

        # Handle immediate closing bracket.
        if i < len(text) and text[i] == "]":
            i += 1

        # Find the closing bracket.
        while i < len(text) and text[i] != "]":
            i += 1

        if i < len(text):  # Found closing bracket.
            return text[start : i + 1], i + 1
        # No closing bracket found, treat as literal.
        return re.escape("["), position + 1

can_handle(char) #

Checks if the character is a ‘[‘.

Parameters:

Name Type Description Default
char str

The single character to check.

required

Returns:

Type Description
bool

True if the character is a character class, False otherwise.

Source code in src/scaffold_kit/utils/pattern_processor.py
def can_handle(self, char: str) -> bool:
    """Checks if the character is a '['.

    Args:
        char: The single character to check.

    Returns:
        True if the character is a character class, False otherwise.
    """
    return char == "["

handle(text, position) #

Extracts the entire character class from the text.

Parameters:

Name Type Description Default
text str

The full text being processed.

required
position int

Current position in the text.

required

Returns:

Type Description
Tuple[str, int]

A tuple containing: - The regex string for the character class. - The new position in the text after processing.

Source code in src/scaffold_kit/utils/pattern_processor.py
def handle(self, text: str, position: int) -> Tuple[str, int]:
    """Extracts the entire character class from the text.

    Args:
        text: The full text being processed.
        position: Current position in the text.

    Returns:
        A tuple containing:
            - The regex string for the character class.
            - The new position in the text after processing.
    """
    start = position
    i = position + 1  # Skip opening '['.

    # Handle negation characters.
    if i < len(text) and text[i] in ("!", "^"):
        i += 1

    # Handle immediate closing bracket.
    if i < len(text) and text[i] == "]":
        i += 1

    # Find the closing bracket.
    while i < len(text) and text[i] != "]":
        i += 1

    if i < len(text):  # Found closing bracket.
        return text[start : i + 1], i + 1
    # No closing bracket found, treat as literal.
    return re.escape("["), position + 1

CharacterHandler #

Bases: ABC

Abstract base class for character handlers.

Character handlers define the logic for converting a specific type of pattern character into its regex equivalent.

Source code in src/scaffold_kit/utils/pattern_processor.py
class CharacterHandler(ABC):
    """Abstract base class for character handlers.

    Character handlers define the logic for converting a specific type of
    pattern character into its regex equivalent.
    """

    @abstractmethod
    def can_handle(self, char: str) -> bool:
        """Checks if this handler can process the given character.

        Args:
            char: The single character to check.

        Returns:
            True if the handler can process the character, False otherwise.
        """
        pass

    @abstractmethod
    def handle(self, text: str, position: int) -> Tuple[str, int]:
        """Handles the character at the given position.

        Args:
            text: The full text being processed.
            position: Current position in the text.

        Returns:
            A tuple containing:
                - The replacement string for the character(s).
                - The new position in the text after processing.
        """
        pass

can_handle(char) abstractmethod #

Checks if this handler can process the given character.

Parameters:

Name Type Description Default
char str

The single character to check.

required

Returns:

Type Description
bool

True if the handler can process the character, False otherwise.

Source code in src/scaffold_kit/utils/pattern_processor.py
@abstractmethod
def can_handle(self, char: str) -> bool:
    """Checks if this handler can process the given character.

    Args:
        char: The single character to check.

    Returns:
        True if the handler can process the character, False otherwise.
    """
    pass

handle(text, position) abstractmethod #

Handles the character at the given position.

Parameters:

Name Type Description Default
text str

The full text being processed.

required
position int

Current position in the text.

required

Returns:

Type Description
Tuple[str, int]

A tuple containing: - The replacement string for the character(s). - The new position in the text after processing.

Source code in src/scaffold_kit/utils/pattern_processor.py
@abstractmethod
def handle(self, text: str, position: int) -> Tuple[str, int]:
    """Handles the character at the given position.

    Args:
        text: The full text being processed.
        position: Current position in the text.

    Returns:
        A tuple containing:
            - The replacement string for the character(s).
            - The new position in the text after processing.
    """
    pass

GlobProcessor #

Processes glob patterns using the strategy pattern.

This class iterates through a glob string, applying the appropriate CharacterHandler to each character to build a regex string part.

Source code in src/scaffold_kit/utils/pattern_processor.py
class GlobProcessor:
    """Processes glob patterns using the strategy pattern.

    This class iterates through a glob string, applying the appropriate
    CharacterHandler to each character to build a regex string part.
    """

    def __init__(self):
        """Initializes the GlobProcessor with a list of handlers.

        Note that the order of the handlers is crucial. More specific handlers
        (e.g., wildcards, character classes) must come before the generic
        fallback handler (LiteralCharHandler).
        """
        # Order matters! More specific handlers should come first.
        self.handlers = [
            WildcardHandler(),
            SingleCharHandler(),
            CharacterClassHandler(),
            LiteralCharHandler(),  # Fallback handler - must be last.
        ]

    def convert_glob_part(self, part: str) -> str:
        """Converts a single glob part to regex using character handlers.

        Args:
            part: A single string part of a glob pattern
                (e.g., 'path', '*', '**').

        Returns:
            The regex equivalent of the glob part.
        """
        # 1. Handle recursive wildcard special case.
        if part == "**":
            return ".*"

        result = ""
        position = 0

        while position < len(part):
            char = part[position]

            # 2. Find the first handler that can process this character.
            handler = self._find_handler(char)
            replacement, new_position = handler.handle(part, position)

            result += replacement
            position = new_position

        return result

    def _find_handler(self, char: str) -> CharacterHandler:
        """Finds the appropriate handler for the given character.

        Args:
            char: The single character to find a handler for.

        Returns:
            The first matching `CharacterHandler` instance.

        Raises:
            RuntimeError: If no handler is found for the given character.
                This should not happen if LiteralCharHandler is present.
        """
        for handler in self.handlers:
            if handler.can_handle(char):
                return handler

        # This should never happen since LiteralCharHandler handles everything.
        raise RuntimeError(f"No handler found for character: {char}")

__init__() #

Initializes the GlobProcessor with a list of handlers.

Note that the order of the handlers is crucial. More specific handlers (e.g., wildcards, character classes) must come before the generic fallback handler (LiteralCharHandler).

Source code in src/scaffold_kit/utils/pattern_processor.py
def __init__(self):
    """Initializes the GlobProcessor with a list of handlers.

    Note that the order of the handlers is crucial. More specific handlers
    (e.g., wildcards, character classes) must come before the generic
    fallback handler (LiteralCharHandler).
    """
    # Order matters! More specific handlers should come first.
    self.handlers = [
        WildcardHandler(),
        SingleCharHandler(),
        CharacterClassHandler(),
        LiteralCharHandler(),  # Fallback handler - must be last.
    ]

convert_glob_part(part) #

Converts a single glob part to regex using character handlers.

Parameters:

Name Type Description Default
part str

A single string part of a glob pattern (e.g., ‘path’, ‘’, ‘*’).

required

Returns:

Type Description
str

The regex equivalent of the glob part.

Source code in src/scaffold_kit/utils/pattern_processor.py
def convert_glob_part(self, part: str) -> str:
    """Converts a single glob part to regex using character handlers.

    Args:
        part: A single string part of a glob pattern
            (e.g., 'path', '*', '**').

    Returns:
        The regex equivalent of the glob part.
    """
    # 1. Handle recursive wildcard special case.
    if part == "**":
        return ".*"

    result = ""
    position = 0

    while position < len(part):
        char = part[position]

        # 2. Find the first handler that can process this character.
        handler = self._find_handler(char)
        replacement, new_position = handler.handle(part, position)

        result += replacement
        position = new_position

    return result

LiteralCharHandler #

Bases: CharacterHandler

Handles literal characters (default handler).

Converts a literal character to a regex-escaped string.

Source code in src/scaffold_kit/utils/pattern_processor.py
class LiteralCharHandler(CharacterHandler):
    """Handles literal characters (default handler).

    Converts a literal character to a regex-escaped string.
    """

    def can_handle(self, char: str) -> bool:
        """Checks if this is the fallback handler.

        This is the fallback handler, so it can handle any character.

        Args:
            char: The single character to check.

        Returns:
            True.
        """
        return True

    def handle(self, text: str, position: int) -> Tuple[str, int]:
        """Escapes a single literal character for regex.

        Args:
            text: The full text being processed.
            position: Current position in the text.

        Returns:
            A tuple of the escaped character and the new position.
        """
        return re.escape(text[position]), position + 1

can_handle(char) #

Checks if this is the fallback handler.

This is the fallback handler, so it can handle any character.

Parameters:

Name Type Description Default
char str

The single character to check.

required

Returns:

Type Description
bool

True.

Source code in src/scaffold_kit/utils/pattern_processor.py
def can_handle(self, char: str) -> bool:
    """Checks if this is the fallback handler.

    This is the fallback handler, so it can handle any character.

    Args:
        char: The single character to check.

    Returns:
        True.
    """
    return True

handle(text, position) #

Escapes a single literal character for regex.

Parameters:

Name Type Description Default
text str

The full text being processed.

required
position int

Current position in the text.

required

Returns:

Type Description
Tuple[str, int]

A tuple of the escaped character and the new position.

Source code in src/scaffold_kit/utils/pattern_processor.py
def handle(self, text: str, position: int) -> Tuple[str, int]:
    """Escapes a single literal character for regex.

    Args:
        text: The full text being processed.
        position: Current position in the text.

    Returns:
        A tuple of the escaped character and the new position.
    """
    return re.escape(text[position]), position + 1

PatternProcessor #

Main class for converting glob patterns to regex.

This class orchestrates the entire conversion process, handling normalization, splitting, and joining of the regex parts.

Source code in src/scaffold_kit/utils/pattern_processor.py
class PatternProcessor:
    """Main class for converting glob patterns to regex.

    This class orchestrates the entire conversion process, handling
    normalization, splitting, and joining of the regex parts.
    """

    def __init__(self):
        """Initializes the processor with a GlobProcessor instance."""
        self.glob_processor = GlobProcessor()

    def pattern_to_regex(self, pattern: str) -> str:
        """Converts a .gitignore-style glob pattern to a regex.

        Args:
            pattern: The glob pattern string to convert.

        Returns:
            The complete, anchored regular expression string.
        """
        # 1. Normalize the pattern.
        normalized_pattern = self._normalize_pattern(pattern)

        # 2. Split into parts and convert each part.
        parts = normalized_pattern.split("/")
        regex_parts = [
            self.glob_processor.convert_glob_part(part) for part in parts
        ]

        # 3. Join the parts with appropriate separators.
        joined_regex = self._join_regex_parts(regex_parts)

        # 4. Add anchors.
        return f"^{joined_regex}$"

    def _normalize_pattern(self, pattern: str) -> str:
        """Applies initial pattern transformations.

        Args:
            pattern: The glob pattern string.

        Returns:
            The normalized pattern string.
        """
        # If a pattern has no slashes, it is treated as if it were
        # preceded by '**/'.
        if "/" not in pattern:
            pattern = f"**/{pattern}"

        # If a pattern starts with a slash, it is anchored to the project root.
        if pattern.startswith("/"):
            pattern = pattern[1:]

        return pattern

    def _join_regex_parts(self, regex_parts: list[str]) -> str:
        """Joins regex parts with appropriate separators.

        Args:
            regex_parts: A list of regex strings to join.

        Returns:
            The joined regex string.
        """
        if not regex_parts:
            return ""

        result = regex_parts[0]

        for i in range(1, len(regex_parts)):
            prev_part = regex_parts[i - 1]
            curr_part = regex_parts[i]

            # Add separator unless dealing with '.*' parts.
            if prev_part != ".*" and curr_part != ".*":
                result += "/"

            result += curr_part

        return result

__init__() #

Initializes the processor with a GlobProcessor instance.

Source code in src/scaffold_kit/utils/pattern_processor.py
def __init__(self):
    """Initializes the processor with a GlobProcessor instance."""
    self.glob_processor = GlobProcessor()

pattern_to_regex(pattern) #

Converts a .gitignore-style glob pattern to a regex.

Parameters:

Name Type Description Default
pattern str

The glob pattern string to convert.

required

Returns:

Type Description
str

The complete, anchored regular expression string.

Source code in src/scaffold_kit/utils/pattern_processor.py
def pattern_to_regex(self, pattern: str) -> str:
    """Converts a .gitignore-style glob pattern to a regex.

    Args:
        pattern: The glob pattern string to convert.

    Returns:
        The complete, anchored regular expression string.
    """
    # 1. Normalize the pattern.
    normalized_pattern = self._normalize_pattern(pattern)

    # 2. Split into parts and convert each part.
    parts = normalized_pattern.split("/")
    regex_parts = [
        self.glob_processor.convert_glob_part(part) for part in parts
    ]

    # 3. Join the parts with appropriate separators.
    joined_regex = self._join_regex_parts(regex_parts)

    # 4. Add anchors.
    return f"^{joined_regex}$"

SingleCharHandler #

Bases: CharacterHandler

Handles ‘?’ single character wildcards.

Converts a single ‘?’ glob character into its regex equivalent.

Source code in src/scaffold_kit/utils/pattern_processor.py
class SingleCharHandler(CharacterHandler):
    """Handles '?' single character wildcards.

    Converts a single '?' glob character into its regex equivalent.
    """

    def can_handle(self, char: str) -> bool:
        """Checks if the character is a '?'.

        Args:
            char: The single character to check.

        Returns:
            True if the character is a single-char wildcard, False otherwise.
        """
        return char == "?"

    def handle(self, text: str, position: int) -> Tuple[str, int]:
        """Converts '?' to '[^/]'.

        Args:
            text: The full text being processed.
            position: Current position in the text.

        Returns:
            A tuple of the replacement regex and the new position.
        """
        return "[^/]", position + 1

can_handle(char) #

Checks if the character is a ‘?’.

Parameters:

Name Type Description Default
char str

The single character to check.

required

Returns:

Type Description
bool

True if the character is a single-char wildcard, False otherwise.

Source code in src/scaffold_kit/utils/pattern_processor.py
def can_handle(self, char: str) -> bool:
    """Checks if the character is a '?'.

    Args:
        char: The single character to check.

    Returns:
        True if the character is a single-char wildcard, False otherwise.
    """
    return char == "?"

handle(text, position) #

Converts ‘?’ to ‘[^/]’.

Parameters:

Name Type Description Default
text str

The full text being processed.

required
position int

Current position in the text.

required

Returns:

Type Description
Tuple[str, int]

A tuple of the replacement regex and the new position.

Source code in src/scaffold_kit/utils/pattern_processor.py
def handle(self, text: str, position: int) -> Tuple[str, int]:
    """Converts '?' to '[^/]'.

    Args:
        text: The full text being processed.
        position: Current position in the text.

    Returns:
        A tuple of the replacement regex and the new position.
    """
    return "[^/]", position + 1

WildcardHandler #

Bases: CharacterHandler

Handles ‘*’ wildcard characters.

Converts a single ‘*’ glob character into its regex equivalent.

Source code in src/scaffold_kit/utils/pattern_processor.py
class WildcardHandler(CharacterHandler):
    """Handles '*' wildcard characters.

    Converts a single '*' glob character into its regex equivalent.
    """

    def can_handle(self, char: str) -> bool:
        """Checks if the character is a '*'.

        Args:
            char: The single character to check.

        Returns:
            True if the character is a wildcard, False otherwise.
        """
        return char == "*"

    def handle(self, text: str, position: int) -> Tuple[str, int]:
        """Converts '*' to '[^/]*'.

        Args:
            text: The full text being processed.
            position: Current position in the text.

        Returns:
            A tuple of the replacement regex and the new position.
        """
        return "[^/]*", position + 1

can_handle(char) #

Checks if the character is a ‘*’.

Parameters:

Name Type Description Default
char str

The single character to check.

required

Returns:

Type Description
bool

True if the character is a wildcard, False otherwise.

Source code in src/scaffold_kit/utils/pattern_processor.py
def can_handle(self, char: str) -> bool:
    """Checks if the character is a '*'.

    Args:
        char: The single character to check.

    Returns:
        True if the character is a wildcard, False otherwise.
    """
    return char == "*"

handle(text, position) #

Converts ‘’ to ‘[^/]’.

Parameters:

Name Type Description Default
text str

The full text being processed.

required
position int

Current position in the text.

required

Returns:

Type Description
Tuple[str, int]

A tuple of the replacement regex and the new position.

Source code in src/scaffold_kit/utils/pattern_processor.py
def handle(self, text: str, position: int) -> Tuple[str, int]:
    """Converts '*' to '[^/]*'.

    Args:
        text: The full text being processed.
        position: Current position in the text.

    Returns:
        A tuple of the replacement regex and the new position.
    """
    return "[^/]*", position + 1