tilelang.language package#
Subpackages#
- tilelang.language.tir package
- Submodules
- tilelang.language.tir.entry module
- tilelang.language.tir.ir module
- tilelang.language.tir.op module
TVMBackendAllocWorkspace()
TVMBackendFreeWorkspace()
abs()
acos()
acosh()
address_of()
all()
any()
anylist_getitem()
anylist_resetitem()
anylist_setitem_call_cpacked()
anylist_setitem_call_packed()
asin()
asinh()
assume()
atan()
atan2()
atanh()
bitwise_and()
bitwise_not()
bitwise_or()
bitwise_xor()
call_cpacked()
call_cpacked_lowered()
call_extern()
call_intrin()
call_llvm_intrin()
call_llvm_pure_intrin()
call_packed()
call_packed_lowered()
call_pure_extern()
call_tir()
ceil()
ceildiv()
clz()
comm_reducer()
copysign()
cos()
cosh()
create_barriers()
div()
end_profile_intrinsic()
erf()
exp()
exp10()
exp2()
floor()
floordiv()
floormod()
fmod()
hypot()
if_then_else()
indexdiv()
indexmod()
infinity()
isfinite()
isinf()
isnan()
isnullptr()
ldexp()
likely()
log()
log10()
log1p()
log2()
lookup_param()
max_value()
min_value()
mma_fill()
mma_store()
nearbyint()
nextafter()
popcount()
pow()
pow_of_int()
power()
ptx_arrive_barrier()
ptx_arrive_barrier_expect_tx()
ptx_commit_group()
ptx_cp_async()
ptx_cp_async_barrier()
ptx_cp_async_bulk()
ptx_init_barrier_thread_count()
ptx_ldmatrix()
ptx_mma()
ptx_mma_sp()
ptx_wait_barrier()
ptx_wait_group()
q_multiply_shift()
q_multiply_shift_per_axis()
reinterpret()
ret()
round()
rsqrt()
shift_left()
shift_right()
sigmoid()
sin()
sinh()
sqrt()
start_profile_intrinsic()
tan()
tanh()
trace()
trunc()
truncdiv()
truncmod()
tvm_access_ptr()
tvm_bmma_sync()
tvm_check_return()
tvm_fill_fragment()
tvm_load_matrix_sync()
tvm_mfma()
tvm_mfma_store()
tvm_mma_sync()
tvm_rdna_wmma()
tvm_rdna_wmma_store()
tvm_stack_alloca()
tvm_stack_make_array()
tvm_stack_make_shape()
tvm_storage_sync()
tvm_store_matrix_sync()
tvm_struct_get()
tvm_struct_set()
tvm_thread_allreduce()
tvm_thread_invariant()
tvm_throw_last_error()
tvm_tuple()
tvm_warp_activemask()
tvm_warp_shuffle()
tvm_warp_shuffle_down()
tvm_warp_shuffle_up()
type_annotation()
undef()
vectorcombine()
vectorhigh()
vectorlow()
vscale()
- Module contents
- Submodules
Submodules#
- tilelang.language.allocate module
- tilelang.language.builtin module
barrier_arrive()
barrier_wait()
create_list_of_mbarrier()
create_tma_descriptor()
dec_max_nreg()
fence_proxy_async()
get_mbarrier()
inc_max_nreg()
mbarrier_arrive()
mbarrier_expect_tx()
mbarrier_wait_parity()
no_set_max_nreg()
set_max_nreg()
shfl_down()
shfl_up()
shfl_xor()
sync_global()
sync_thread_partial()
sync_threads()
tma_load()
tma_store_arrive()
tma_store_wait()
wait_wgmma()
- tilelang.language.copy module
- tilelang.language.customize module
- tilelang.language.fill module
- tilelang.language.frame module
- tilelang.language.gemm module
- tilelang.language.kernel module
FrameStack
Kernel()
KernelLaunchFrame
KernelLaunchFrame.Current()
KernelLaunchFrame.blocks
KernelLaunchFrame.get_block_binding()
KernelLaunchFrame.get_block_bindings()
KernelLaunchFrame.get_block_extent()
KernelLaunchFrame.get_block_extents()
KernelLaunchFrame.get_num_threads()
KernelLaunchFrame.get_thread_binding()
KernelLaunchFrame.get_thread_bindings()
KernelLaunchFrame.get_thread_extent()
KernelLaunchFrame.get_thread_extents()
KernelLaunchFrame.num_threads
KernelLaunchFrame.threads
get_block_binding()
get_block_bindings()
get_block_extent()
get_block_extents()
get_thread_binding()
get_thread_bindings()
get_thread_extent()
get_thread_extents()
- tilelang.language.logical module
- tilelang.language.memscope module
- tilelang.language.parallel module
- tilelang.language.pipeline module
- tilelang.language.print module
- tilelang.language.proxy module
- tilelang.language.reduce module
- tilelang.language.warpgroup module
Module contents#
The language interface for tl programs.
- tilelang.language.annotate_l2_hit_ratio(l2_hit_ratio_map: Dict)#
Annotate the L2 hit ratio of the buffer, detailed explanation please refer to: https://docs.nvidia.com/cuda/cuda-c-programming-guide/#l2-policy-for-persisting-accesses
- Parameters:
l2_hit_ratio_map (dict) – a dictionary of buffer to L2 hit ratio value
Example
# 0.5 is the hit ratio T.annotate_l2_hit_ratio({A: 0.5})
- tilelang.language.annotate_layout(layout_map: Dict)#
Annotate the layout of the buffer
- Parameters:
layout_map (Dict) – a dictionary of buffer to layout
- Returns:
a block attribute
- Return type:
block_attr
Example
@T.prim_func def main(
A: T.Tensor((M, N), dtype), B: T.Tensor((M, N), dtype),
- ):
# Initialize Kernel Context with T.Kernel(T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=128) as (bx, by):
A_shared = T.alloc_shared((block_M, block_N), dtype)
T.annotate_layout({A_shared: layout}) for i, j in T.Parallel(block_M, block_N):
A_shared[i, j] = A[by * block_M + i, bx * block_N + j]
- for i, j in T.Parallel(block_M, block_N):
B[by * block_M + i, bx * block_N + j] = A_shared[i, j]
return main
- tilelang.language.annotate_padding(padding_map: Dict)#
Annotate the padding of the buffer
- Parameters:
padding_map (dict) – a dictionary of buffer to padding value
- Returns:
a block attribute
- Return type:
block_attr
Example
@T.prim_func def main(
A: T.Tensor((M, N), dtype), B: T.Tensor((M, N), dtype),
- ):
# Initialize Kernel Context with T.Kernel(T.ceildiv(N, block_N), T.ceildiv(M, block_M), threads=128) as (bx, by):
A_shared = T.alloc_shared((block_M, block_N), dtype)
T.annotate_padding({A_shared: pad_value}) for i, j in T.Parallel(block_M, block_N):
A_shared[i, j] = A[by * block_M + i - 10, bx * block_N + j]
- for i, j in T.Parallel(block_M, block_N):
B[by * block_M + i, bx * block_N + j] = A_shared[i, j]
return main
- tilelang.language.import_source(source: Optional[str] = None)#
- tilelang.language.symbolic(name: str, dtype: str = 'int32')#
- tilelang.language.use_swizzle(panel_size: int, order: str = 'row', enable: boolean = True)#