Source code for crosstl.backend.GLSL.OpenglLexer

"""Lexer for importing GLSL source into CrossGL Translator."""

import re
from typing import Iterator, Tuple, List, Optional, Dict

from .preprocessor import GLSLPreprocessor

# Tokens to skip entirely
SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}

HEX_NUMBER = r"0[xX][0-9a-fA-F]+"
DECIMAL_FLOAT = r"(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?"
DECIMAL_EXP = r"\d+[eE][+-]?\d+"
DECIMAL_INT = r"\d+"
NUMBER_PATTERN = (
    rf"(?:{HEX_NUMBER}|{DECIMAL_FLOAT}|{DECIMAL_EXP}|{DECIMAL_INT})(?:[uU])?"
)

# Order matters: longer tokens first
TOKENS = tuple(
    [
        ("COMMENT_SINGLE", r"//[^\n]*"),
        ("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
        ("NEWLINE", r"\n+"),
        ("ASSIGN_SHIFT_LEFT", r"<<="),
        ("ASSIGN_SHIFT_RIGHT", r">>="),
        ("INCREMENT", r"\+\+"),
        ("DECREMENT", r"--"),
        ("PLUS_EQUALS", r"\+="),
        ("MINUS_EQUALS", r"-="),
        ("MULTIPLY_EQUALS", r"\*="),
        ("DIVIDE_EQUALS", r"/="),
        ("MOD_EQUALS", r"%="),
        ("ASSIGN_AND", r"&="),
        ("ASSIGN_OR", r"\|="),
        ("ASSIGN_XOR", r"\^="),
        ("SHIFT_LEFT", r"<<"),
        ("SHIFT_RIGHT", r">>"),
        ("LESS_EQUAL", r"<="),
        ("GREATER_EQUAL", r">="),
        ("EQUAL", r"=="),
        ("NOT_EQUAL", r"!="),
        ("LOGICAL_AND", r"&&"),
        ("LOGICAL_OR", r"\|\|"),
        ("LOGICAL_NOT", r"!"),
        ("BITWISE_NOT", r"~"),
        ("BITWISE_XOR", r"\^"),
        ("BITWISE_OR", r"\|"),
        ("BITWISE_AND", r"&"),
        ("EQUALS", r"="),
        ("HASH", r"#"),
        ("LBRACE", r"\{"),
        ("RBRACE", r"\}"),
        ("LPAREN", r"\("),
        ("RPAREN", r"\)"),
        ("LBRACKET", r"\["),
        ("RBRACKET", r"\]"),
        ("SEMICOLON", r";"),
        ("COMMA", r","),
        ("COLON", r":"),
        ("QUESTION", r"\?"),
        ("NUMBER", NUMBER_PATTERN),
        ("DOT", r"\."),
        ("PLUS", r"\+"),
        ("MINUS", r"-"),
        ("MULTIPLY", r"\*"),
        ("DIVIDE", r"/"),
        ("MOD", r"%"),
        ("LESS_THAN", r"<"),
        ("GREATER_THAN", r">"),
        ("STRING", r'"([^"\\]|\\.)*"'),
        ("CHAR_LITERAL", r"'(?:[^'\\]|\\.)'"),
        ("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
        ("WHITESPACE", r"[ \t\r\f\v]+"),
    ]
)

KEYWORDS = {
    "struct": "STRUCT",
    "uniform": "UNIFORM",
    "const": "CONST",
    "flat": "FLAT",
    "smooth": "SMOOTH",
    "noperspective": "NOPERSPECTIVE",
    "centroid": "CENTROID",
    "sample": "SAMPLE",
    "patch": "PATCH",
    "invariant": "INVARIANT",
    "precise": "PRECISE",
    "in": "IN",
    "out": "OUT",
    "inout": "INOUT",
    "layout": "LAYOUT",
    "attribute": "ATTRIBUTE",
    "varying": "VARYING",
    "buffer": "BUFFER",
    "shared": "SHARED",
    "readonly": "READONLY",
    "writeonly": "WRITEONLY",
    "coherent": "COHERENT",
    "volatile": "VOLATILE",
    "restrict": "RESTRICT",
    "precision": "PRECISION",
    "lowp": "LOWP",
    "mediump": "MEDIUMP",
    "highp": "HIGHP",
    "void": "VOID",
    "bool": "BOOL",
    "int": "INT",
    "uint": "UINT",
    "float": "FLOAT",
    "double": "DOUBLE",
    "vec2": "VECTOR",
    "vec3": "VECTOR",
    "vec4": "VECTOR",
    "ivec2": "VECTOR",
    "ivec3": "VECTOR",
    "ivec4": "VECTOR",
    "uvec2": "VECTOR",
    "uvec3": "VECTOR",
    "uvec4": "VECTOR",
    "bvec2": "VECTOR",
    "bvec3": "VECTOR",
    "bvec4": "VECTOR",
    "dvec2": "VECTOR",
    "dvec3": "VECTOR",
    "dvec4": "VECTOR",
    "mat2": "MATRIX",
    "mat3": "MATRIX",
    "mat4": "MATRIX",
    "mat2x2": "MATRIX",
    "mat2x3": "MATRIX",
    "mat2x4": "MATRIX",
    "mat3x2": "MATRIX",
    "mat3x3": "MATRIX",
    "mat3x4": "MATRIX",
    "mat4x2": "MATRIX",
    "mat4x3": "MATRIX",
    "mat4x4": "MATRIX",
    "sampler2D": "SAMPLER2D",
    "sampler3D": "SAMPLER3D",
    "samplerCube": "SAMPLERCUBE",
    "sampler1D": "SAMPLER1D",
    "sampler1DArray": "SAMPLER1DARRAY",
    "sampler1DShadow": "SAMPLER1DSHADOW",
    "sampler1DArrayShadow": "SAMPLER1DARRAYSHADOW",
    "sampler2DArray": "SAMPLER2DARRAY",
    "sampler2DArrayShadow": "SAMPLER2DARRAYSHADOW",
    "samplerCubeArray": "SAMPLERCUBEARRAY",
    "samplerCubeArrayShadow": "SAMPLERCUBEARRAYSHADOW",
    "sampler2DShadow": "SAMPLER2DSHADOW",
    "sampler2DRect": "SAMPLER2DRECT",
    "sampler2DRectShadow": "SAMPLER2DRECTSHADOW",
    "samplerBuffer": "SAMPLERBUFFER",
    "samplerCubeShadow": "SAMPLERCUBESHADOW",
    "sampler2DMS": "SAMPLER2DMS",
    "sampler2DMSArray": "SAMPLER2DMSARRAY",
    "isampler1D": "ISAMPLER1D",
    "isampler2D": "ISAMPLER2D",
    "isampler3D": "ISAMPLER3D",
    "isamplerCube": "ISAMPLERCUBE",
    "isampler1DArray": "ISAMPLER1DARRAY",
    "isampler2DArray": "ISAMPLER2DARRAY",
    "isamplerCubeArray": "ISAMPLERCUBEARRAY",
    "isampler2DRect": "ISAMPLER2DRECT",
    "isamplerBuffer": "ISAMPLERBUFFER",
    "isampler2DMS": "ISAMPLER2DMS",
    "isampler2DMSArray": "ISAMPLER2DMSARRAY",
    "usampler1D": "USAMPLER1D",
    "usampler2D": "USAMPLER2D",
    "usampler3D": "USAMPLER3D",
    "usamplerCube": "USAMPLERCUBE",
    "usampler1DArray": "USAMPLER1DARRAY",
    "usampler2DArray": "USAMPLER2DARRAY",
    "usamplerCubeArray": "USAMPLERCUBEARRAY",
    "usampler2DRect": "USAMPLER2DRECT",
    "usamplerBuffer": "USAMPLERBUFFER",
    "usampler2DMS": "USAMPLER2DMS",
    "usampler2DMSArray": "USAMPLER2DMSARRAY",
    "image1D": "IMAGE1D",
    "image2D": "IMAGE2D",
    "image3D": "IMAGE3D",
    "imageCube": "IMAGECUBE",
    "image1DArray": "IMAGE1DARRAY",
    "image2DArray": "IMAGE2DARRAY",
    "imageCubeArray": "IMAGECUBEARRAY",
    "image2DRect": "IMAGE2DRECT",
    "imageBuffer": "IMAGEBUFFER",
    "image2DMS": "IMAGE2DMS",
    "image2DMSArray": "IMAGE2DMSARRAY",
    "iimage1D": "IIMAGE1D",
    "iimage2D": "IIMAGE2D",
    "iimage3D": "IIMAGE3D",
    "iimageCube": "IIMAGECUBE",
    "iimage1DArray": "IIMAGE1DARRAY",
    "iimage2DArray": "IIMAGE2DARRAY",
    "iimageCubeArray": "IIMAGECUBEARRAY",
    "iimage2DRect": "IIMAGE2DRECT",
    "iimageBuffer": "IIMAGEBUFFER",
    "iimage2DMS": "IIMAGE2DMS",
    "iimage2DMSArray": "IIMAGE2DMSARRAY",
    "uimage1D": "UIMAGE1D",
    "uimage2D": "UIMAGE2D",
    "uimage3D": "UIMAGE3D",
    "uimageCube": "UIMAGECUBE",
    "uimage1DArray": "UIMAGE1DARRAY",
    "uimage2DArray": "UIMAGE2DARRAY",
    "uimageCubeArray": "UIMAGECUBEARRAY",
    "uimage2DRect": "UIMAGE2DRECT",
    "uimageBuffer": "UIMAGEBUFFER",
    "uimage2DMS": "UIMAGE2DMS",
    "uimage2DMSArray": "UIMAGE2DMSARRAY",
    "atomic_uint": "ATOMIC_UINT",
    "subroutine": "SUBROUTINE",
    "if": "IF",
    "else": "ELSE",
    "for": "FOR",
    "while": "WHILE",
    "do": "DO",
    "switch": "SWITCH",
    "case": "CASE",
    "default": "DEFAULT",
    "break": "BREAK",
    "continue": "CONTINUE",
    "return": "RETURN",
    "discard": "DISCARD",
    "true": "TRUE",
    "false": "FALSE",
}



[docs]
class GLSLLexer:
    """Tokenize GLSL source for the OpenGL parser."""

    def __init__(
        self,
        code: str,
        preprocess: bool = True,
        include_paths: Optional[List[str]] = None,
        defines: Optional[Dict[str, str]] = None,
        strict_preprocessor: bool = True,
        max_expansion_depth: int = 64,
        file_path: Optional[str] = None,
    ):
        """Initialize the lexer and optionally run the GLSL preprocessor."""
        if preprocess:
            preprocessor = GLSLPreprocessor(
                include_paths=include_paths,
                defines=defines,
                strict=strict_preprocessor,
                max_expansion_depth=max_expansion_depth,
            )
            code = preprocessor.preprocess(code, file_path=file_path)
        self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
        self.code = code
        self._length = len(code)


[docs]
    def tokenize(self) -> List[Tuple[str, str]]:
        """Return the full token stream as ``(token_type, text)`` tuples."""
        return list(self.token_generator())



[docs]
    def token_generator(self) -> Iterator[Tuple[str, str]]:
        """Yield GLSL tokens while skipping whitespace and comments."""
        pos = 0
        while pos < self._length:
            if self.code.startswith("/*", pos):
                if self.code.find("*/", pos + 2) == -1:
                    line_num = self.code[:pos].count("\n") + 1
                    col_num = pos - self.code.rfind("\n", 0, pos)
                    raise SyntaxError(
                        f"Unterminated block comment at line {line_num}, column {col_num}"
                    )
            token = self._next_token(pos)
            if token is None:
                line_num = self.code[:pos].count("\n") + 1
                col_num = pos - self.code.rfind("\n", 0, pos)
                context = self.code[max(0, pos - 20) : min(self._length, pos + 20)]
                raise SyntaxError(
                    f"Illegal character '{self.code[pos]}' at line {line_num}, column {col_num}\n"
                    f"Context: ...{context}..."
                )

            new_pos, token_type, text = token

            if token_type == "IDENTIFIER" and text in KEYWORDS:
                token_type = KEYWORDS[text]

            if token_type not in SKIP_TOKENS:
                yield (token_type, text)

            pos = new_pos

        yield ("EOF", "")


    def _next_token(self, pos: int) -> Optional[Tuple[int, str, str]]:
        """Match the next token at ``pos`` and return its end offset."""
        for token_type, pattern in self._token_patterns:
            match = pattern.match(self.code, pos)
            if match:
                return match.end(0), token_type, match.group(0)
        return None


[docs]
    @classmethod
    def from_file(
        cls,
        filepath: str,
        preprocess: bool = True,
        include_paths: Optional[List[str]] = None,
        defines: Optional[Dict[str, str]] = None,
        strict_preprocessor: bool = True,
        max_expansion_depth: int = 64,
    ) -> "GLSLLexer":
        """Create a lexer instance from a GLSL source file."""
        with open(filepath, "r", encoding="utf-8") as f:
            return cls(
                f.read(),
                preprocess=preprocess,
                include_paths=include_paths,
                defines=defines,
                strict_preprocessor=strict_preprocessor,
                max_expansion_depth=max_expansion_depth,
                file_path=filepath,
            )