"""Lexer for importing HIP source into CrossGL Translator."""
import re
from typing import List
[docs]
class Token:
"""Token object carrying HIP token type, text, and source location."""
def __init__(self, token_type: str, value: str, line: int = 1, column: int = 1):
"""Store token metadata produced by ``HipLexer``."""
self.type = token_type
self.value = value
self.line = line
self.column = column
def __repr__(self):
"""Return a developer-readable token representation."""
return f"Token({self.type}, '{self.value}')"
SKIP_TOKENS = {"WHITESPACE", "COMMENT_SINGLE", "COMMENT_MULTI"}
# HIP token definitions - order matters for correct tokenization
TOKENS = tuple(
[
# Comments
("COMMENT_SINGLE", r"//.*"),
("COMMENT_MULTI", r"/\*[\s\S]*?\*/"),
# HIP execution configuration (must come before operators)
("KERNEL_LAUNCH_START", r"<<<"),
("KERNEL_LAUNCH_END", r">>>"),
# HIP keywords and qualifiers
("__GLOBAL__", r"\b__global__\b"),
("__DEVICE__", r"\b__device__\b"),
("__HOST__", r"\b__host__\b"),
("__SHARED__", r"\b__shared__\b"),
("__CONSTANT__", r"\b__constant__\b"),
("__RESTRICT__", r"\b__restrict__\b"),
("__MANAGED__", r"\b__managed__\b"),
("__NOINLINE__", r"\b__noinline__\b"),
("__FORCEINLINE__", r"\b__forceinline__\b"),
# HIP built-in variables
("THREADIDX", r"\bthreadIdx\b"),
("BLOCKIDX", r"\bblockIdx\b"),
("GRIDDIM", r"\bgridDim\b"),
("BLOCKDIM", r"\bblockDim\b"),
("WARPSIZE", r"\bwarpSize\b"),
("HIPTHREADIDX", r"\bhipThreadIdx_x\b|hipThreadIdx_y\b|hipThreadIdx_z\b"),
("HIPBLOCKIDX", r"\bhipBlockIdx_x\b|hipBlockIdx_y\b|hipBlockIdx_z\b"),
("HIPBLOCKDIM", r"\bhipBlockDim_x\b|hipBlockDim_y\b|hipBlockDim_z\b"),
("HIPGRIDDIM", r"\bhipGridDim_x\b|hipGridDim_y\b|hipGridDim_z\b"),
# HIP built-in functions
("SYNCTHREADS", r"\b__syncthreads\b|hipDeviceSynchronize\b"),
("SYNCWARP", r"\b__syncwarp\b"),
("ATOMICADD", r"\batomicAdd\b|hipAtomicAdd\b"),
("ATOMICSUB", r"\batomicSub\b|hipAtomicSub\b"),
("ATOMICMAX", r"\batomicMax\b|hipAtomicMax\b"),
("ATOMICMIN", r"\batomicMin\b|hipAtomicMin\b"),
("ATOMICEXCH", r"\batomicExch\b|hipAtomicExch\b"),
("ATOMICCAS", r"\batomicCAS\b|hipAtomicCAS\b"),
# HIP error handling
("HIPERROR", r"\bhipError_t\b"),
("HIPSUCCESS", r"\bhipSuccess\b"),
# Standard C/C++ keywords
("TYPEDEF", r"\btypedef\b"),
("STRUCT", r"\bstruct\b"),
("UNION", r"\bunion\b"),
("ENUM", r"\benum\b"),
("CLASS", r"\bclass\b"),
("NAMESPACE", r"\bnamespace\b"),
("TEMPLATE", r"\btemplate\b"),
("TYPENAME", r"\btypename\b"),
("EXTERN", r"\bextern\b"),
("STATIC", r"\bstatic\b"),
("INLINE", r"\binline\b"),
("CONST", r"\bconst\b"),
("VOLATILE", r"\bvolatile\b"),
("MUTABLE", r"\bmutable\b"),
("VIRTUAL", r"\bvirtual\b"),
("PUBLIC", r"\bpublic\b"),
("PRIVATE", r"\bprivate\b"),
("PROTECTED", r"\bprotected\b"),
# Control flow
("IF", r"\bif\b"),
("ELSE", r"\belse\b"),
("FOR", r"\bfor\b"),
("WHILE", r"\bwhile\b"),
("DO", r"\bdo\b"),
("SWITCH", r"\bswitch\b"),
("CASE", r"\bcase\b"),
("DEFAULT", r"\bdefault\b"),
("BREAK", r"\bbreak\b"),
("CONTINUE", r"\bcontinue\b"),
("RETURN", r"\breturn\b"),
("GOTO", r"\bgoto\b"),
# HIP and C++ types
("VOID", r"\bvoid\b"),
("BOOL", r"\bbool\b"),
("CHAR", r"\bchar\b"),
("SHORT", r"\bshort\b"),
("INT", r"\bint\b"),
("LONG", r"\blong\b"),
("FLOAT", r"\bfloat\b"),
("DOUBLE", r"\bdouble\b"),
("SIGNED", r"\bsigned\b"),
("UNSIGNED", r"\bunsigned\b"),
("SIZE_T", r"\bsize_t\b"),
# HIP vector types
("FLOAT2", r"\bfloat2\b"),
("FLOAT3", r"\bfloat3\b"),
("FLOAT4", r"\bfloat4\b"),
("DOUBLE2", r"\bdouble2\b"),
("DOUBLE3", r"\bdouble3\b"),
("DOUBLE4", r"\bdouble4\b"),
("INT2", r"\bint2\b"),
("INT3", r"\bint3\b"),
("INT4", r"\bint4\b"),
("UINT2", r"\buint2\b"),
("UINT3", r"\buint3\b"),
("UINT4", r"\buint4\b"),
("CHAR2", r"\bchar2\b"),
("CHAR3", r"\bchar3\b"),
("CHAR4", r"\bchar4\b"),
("UCHAR2", r"\buchar2\b"),
("UCHAR3", r"\buchar3\b"),
("UCHAR4", r"\buchar4\b"),
("SHORT2", r"\bshort2\b"),
("SHORT3", r"\bshort3\b"),
("SHORT4", r"\bshort4\b"),
("USHORT2", r"\bushort2\b"),
("USHORT3", r"\bushort3\b"),
("USHORT4", r"\bushort4\b"),
("LONG2", r"\blong2\b"),
("LONG3", r"\blong3\b"),
("LONG4", r"\blong4\b"),
("ULONG2", r"\bulong2\b"),
("ULONG3", r"\bulong3\b"),
("ULONG4", r"\bulong4\b"),
("LONGLONG2", r"\blonglong2\b"),
("LONGLONG3", r"\blonglong3\b"),
("LONGLONG4", r"\blonglong4\b"),
("ULONGLONG2", r"\bulonglong2\b"),
("ULONGLONG3", r"\bulonglong3\b"),
("ULONGLONG4", r"\bulonglong4\b"),
# HIP texture types
("TEXTURE", r"\btexture\b"),
("SURFACE", r"\bsurface\b"),
("HIPARRAY", r"\bhipArray\b"),
("HIPARRAYT", r"\bhipArray_t\b"),
# Boolean literals
("TRUE", r"\btrue\b"),
("FALSE", r"\bfalse\b"),
("NULL", r"\bNULL\b"),
("NULLPTR", r"\bnullptr\b"),
# Identifiers and literals (must come after keywords)
("IDENTIFIER", r"[a-zA-Z_][a-zA-Z0-9_]*"),
(
"FLOAT",
r"(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?[fFdDlL]*|\d+[eE][+-]?\d+[fFdDlL]*|\d+[fFdDlL]",
),
("INTEGER", r"(?:0[xX][0-9a-fA-F]+|0[bB][01]+|\d+)[lLuU]*"),
("STRING", r'"([^"\\]|\\.)*"'),
("CHAR", r"'([^'\\]|\\.)'"),
# Preprocessor
("HASH", r"#"),
# Operators (multi-character first)
("SCOPE", r"::"),
("LSHIFT_ASSIGN", r"<<="),
("RSHIFT_ASSIGN", r">>="),
("LSHIFT", r"<<"),
("RSHIFT", r">>"),
("PLUS_ASSIGN", r"\+="),
("MINUS_ASSIGN", r"-="),
("STAR_ASSIGN", r"\*="),
("SLASH_ASSIGN", r"/="),
("PERCENT_ASSIGN", r"%="),
("AND_ASSIGN", r"&="),
("OR_ASSIGN", r"\|="),
("XOR_ASSIGN", r"\^="),
("AND", r"&&"),
("OR", r"\|\|"),
("EQ", r"=="),
("NE", r"!="),
("LE", r"<="),
("GE", r">="),
("INCREMENT", r"\+\+"),
("DECREMENT", r"--"),
("ARROW", r"->"),
("DOT", r"\."),
# Single character operators
("PLUS", r"\+"),
("MINUS", r"-"),
("STAR", r"\*"),
("SLASH", r"/"),
("PERCENT", r"%"),
("ASSIGN", r"="),
("LT", r"<"),
("GT", r">"),
("NOT", r"!"),
("AMPERSAND", r"&"),
("PIPE", r"\|"),
("XOR", r"\^"),
("TILDE", r"~"),
("QUESTION", r"\?"),
("COLON", r":"),
# Delimiters
("LBRACE", r"\{"),
("RBRACE", r"\}"),
("LPAREN", r"\("),
("RPAREN", r"\)"),
("LBRACKET", r"\["),
("RBRACKET", r"\]"),
("SEMICOLON", r";"),
("COMMA", r","),
# Whitespace and newlines (must be last)
("NEWLINE", r"\n"),
("WHITESPACE", r"[ \t\r]+"),
]
)
# Keywords mapping for reserved word detection
KEYWORDS = {
"__global__": "__GLOBAL__",
"__device__": "__DEVICE__",
"__host__": "__HOST__",
"__shared__": "__SHARED__",
"__constant__": "__CONSTANT__",
"__restrict__": "__RESTRICT__",
"__managed__": "__MANAGED__",
"__noinline__": "__NOINLINE__",
"__forceinline__": "__FORCEINLINE__",
"threadIdx": "THREADIDX",
"blockIdx": "BLOCKIDX",
"gridDim": "GRIDDIM",
"blockDim": "BLOCKDIM",
"warpSize": "WARPSIZE",
}
[docs]
class HipLexer:
"""Tokenize HIP source for the HIP backend parser."""
def __init__(self, code: str):
"""Initialize the lexer with raw HIP source text."""
self._token_patterns = [(name, re.compile(pattern)) for name, pattern in TOKENS]
self.code = code
self._length = len(code)
self.reserved_keywords = KEYWORDS
self.line = 1
self.column = 1
[docs]
def tokenize(self) -> List[Token]:
"""Return the full HIP token stream with source locations."""
tokens = []
pos = 0
while pos < self._length:
token = self._next_token(pos)
if token is None:
raise SyntaxError(
f"Illegal character '{self.code[pos]}' at position {pos}"
)
new_pos, token_type, text = token
if token_type == "IDENTIFIER" and text in self.reserved_keywords:
token_type = self.reserved_keywords[text]
if token_type not in SKIP_TOKENS:
tokens.append(Token(token_type, text, self.line, self.column))
if token_type == "NEWLINE":
self.line += 1
self.column = 1
else:
self.column += len(text)
pos = new_pos
return tokens
def _next_token(self, pos: int):
"""Match the next token at ``pos`` and return its end offset."""
for token_type, pattern in self._token_patterns:
match = pattern.match(self.code, pos)
if match:
text = match.group(0)
return match.end(), token_type, text
return None
# For compatibility with existing test expectations
[docs]
def parse_hip_code(code: str):
"""Parse HIP source text and return the backend AST."""
from .HipParser import HipParser
lexer = HipLexer(code)
tokens = lexer.tokenize()
parser = HipParser(tokens)
return parser.parse()