tilelang.autotuner.param module#

The auto-tune parameters.

class tilelang.autotuner.param.AutotuneResult(latency: Optional[float] = None, config: Optional[dict] = None, ref_latency: Optional[float] = None, libcode: Optional[str] = None, func: Optional[Callable] = None, kernel: Optional[Callable] = None)#

Bases: object

Results from auto-tuning process.

latency#

Best achieved execution latency.

Type:: Optional[float]

config#

Configuration that produced the best result.

Type:: Optional[dict]

ref_latency#

Reference implementation latency.

Type:: Optional[float]

libcode#

Generated library code.

Type:: Optional[str]

func#

Optimized function.

Type:: Optional[Callable]

kernel#

Compiled kernel function.

Type:: Optional[Callable]

config: Optional[dict] = None#

func: Optional[Callable] = None#

kernel: Optional[Callable] = None#

latency: Optional[float] = None#

libcode: Optional[str] = None#

classmethod load_from_disk(path: Path, compile_args: CompileArgs) → AutotuneResult#

ref_latency: Optional[float] = None#

save_to_disk(path: Path)#

class tilelang.autotuner.param.CompileArgs(out_idx: Optional[Union[int, List[int]]] = None, execution_backend: Literal['dlpack', 'ctypes', 'cython'] = 'cython', target: Literal['auto', 'cuda', 'hip'] = 'auto', target_host: Optional[Union[str, Target]] = None, verbose: bool = False, pass_configs: Optional[Dict[str, Any]] = None)#

Bases: object

Compile arguments for the auto-tuner. Detailed description can be found in tilelang.jit.compile. .. attribute:: out_idx

List of output tensor indices.

type:

Optional[Union[int, List[int]]]

execution_backend#

Execution backend to use for kernel execution (default: “cython”).

Type:: Literal[‘dlpack’, ‘ctypes’, ‘cython’]

target#

Compilation target, either as a string or a TVM Target object (default: “auto”).

Type:: Literal[‘auto’, ‘cuda’, ‘hip’]

target_host#

Target host for cross-compilation (default: None).

Type:: Union[str, tvm.target.target.Target]

verbose#

Whether to enable verbose output (default: False).

Type:: bool

pass_configs#

Additional keyword arguments to pass to the Compiler PassContext.

Type:: Optional[Dict[str, Any]]

Available options: “tir.disable_vectorize”: bool, default: False “tl.disable_tma_lower”: bool, default: False “tl.disable_warp_specialized”: bool, default: False “tl.config_index_bitwidth”: int, default: None “tl.disable_dynamic_tail_split”: bool, default: False “tl.dynamic_vectorize_size_bits”: int, default: 128 “tl.disable_safe_memory_legalize”: bool, default: False

compile_program(program: PrimFunc)#

execution_backend: Literal['dlpack', 'ctypes', 'cython'] = 'cython'#

out_idx: Optional[Union[int, List[int]]] = None#

pass_configs: Optional[Dict[str, Any]] = None#

target: Literal['auto', 'cuda', 'hip'] = 'auto'#

target_host: Union[str, Target] = None#

verbose: bool = False#

class tilelang.autotuner.param.ProfileArgs(warmup: int = 25, rep: int = 100, timeout: int = 30, supply_type: TensorSupplyType = TensorSupplyType.Auto, ref_prog: Optional[Callable] = None, supply_prog: Optional[Callable] = None, rtol: float = 0.01, atol: float = 0.01, max_mismatched_ratio: float = 0.01, skip_check: bool = False, manual_check_prog: Optional[Callable] = None, cache_input_tensors: bool = True)#

Bases: object

Profile arguments for the auto-tuner.

warmup#

Number of warmup iterations.

Type:: int

rep#

Number of repetitions for timing.

Type:: int

timeout#

Maximum time per configuration.

Type:: int

supply_type#

Type of tensor supply mechanism.

Type:: tilelang.utils.tensor.TensorSupplyType

ref_prog#

Reference program for correctness validation.

Type:: Callable

supply_prog#

Supply program for input tensors.

Type:: Callable

out_idx#: Union[List[int], int] = -1

supply_type#

tilelang.TensorSupplyType = tilelang.TensorSupplyType.Auto

Type:: tilelang.utils.tensor.TensorSupplyType

ref_prog#

Callable = None

Type:: Callable

supply_prog#

Callable = None

Type:: Callable

rtol#

float = 1e-2

Type:: float

atol#

float = 1e-2

Type:: float

max_mismatched_ratio#

float = 0.01

Type:: float

skip_check#

bool = False

Type:: bool

manual_check_prog#

Callable = None

Type:: Callable

cache_input_tensors#

bool = True

Type:: bool

atol: float = 0.01#

cache_input_tensors: bool = True#

manual_check_prog: Callable = None#

max_mismatched_ratio: float = 0.01#

ref_prog: Callable = None#

rep: int = 100#

rtol: float = 0.01#

skip_check: bool = False#

supply_prog: Callable = None#

supply_type: TensorSupplyType = 7#

timeout: int = 30#

warmup: int = 25#