Index A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z A A (tilelang.primitives.gemm.base.GemmBaseParams attribute) abs() (in module tilelang.language.tir.op) accum_dtype (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA property) acos() (in module tilelang.language.tir.op) acosh() (in module tilelang.language.tir.op) adapt_torch2tvm() (in module tilelang.utils.tensor) adapter (tilelang.jit.kernel.JITKernel attribute), [1] (tilelang.profiler.Profiler attribute), [1] add_next() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_prev() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) add_tag() (tilelang.carver.roller.node.Node method) address_of() (in module tilelang.language.tir.op) all() (in module tilelang.language.tir.op) all_of() (in module tilelang.language.logical) alloc_fragment() (in module tilelang.language.allocate) alloc_local() (in module tilelang.language.allocate) alloc_shared() (in module tilelang.language.allocate) alloc_var() (in module tilelang.language.allocate) analysis() (tilelang.tools.Analyzer.Analyzer class method) AnalysisResult (class in tilelang.tools.Analyzer) analyze() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) Analyzer (class in tilelang.tools.Analyzer) annotate_layout() (in module tilelang.language) AnnotateDeviceRegions() (in module tilelang.transform) any() (in module tilelang.language.tir.op) any_of() (in module tilelang.language.logical) anylist_getitem() (in module tilelang.language.tir.op) anylist_resetitem() (in module tilelang.language.tir.op) anylist_setitem_call_cpacked() (in module tilelang.language.tir.op) anylist_setitem_call_packed() (in module tilelang.language.tir.op) apply_simplify() (in module tilelang.transform.simplify) arch (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.carver.template.base.BaseTemplate property) array_reduce() (in module tilelang.utils.language) artifact (tilelang.jit.kernel.JITKernel attribute), [1] asin() (in module tilelang.language.tir.op) asinh() (in module tilelang.language.tir.op) assert_allclose() (tilelang.profiler.Profiler method) assert_consistent() (tilelang.profiler.Profiler method) assign_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) assign_device_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_host_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_optimized_module() (tilelang.jit.adapter.wrapper.TLWrapper method) assign_pass_configs() (tilelang.jit.adapter.wrapper.TLWrapper method) assume() (in module tilelang.language.tir.op) asyncEngineCount (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) atan() (in module tilelang.language.tir.op) atan2() (in module tilelang.language.tir.op) atanh() (in module tilelang.language.tir.op) atol (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] atomic_add() (in module tilelang.language.customize) atomic_addx2() (in module tilelang.language.customize) Auto (tilelang.utils.tensor.TensorSupplyType attribute) auto_infer_current_arch() (in module tilelang.carver.arch) auto_inline_consumer_chain() (in module tilelang.carver.matmul_analysis) auto_inline_consumers() (in module tilelang.carver.matmul_analysis) auto_inline_producers() (in module tilelang.carver.matmul_analysis) autotune() (in module tilelang.autotuner) AutoTuner (class in tilelang.autotuner) AutotuneResult (class in tilelang.autotuner) ax (tilelang.carver.roller.hint.Stride property) B B (tilelang.primitives.gemm.base.GemmBaseParams attribute) backend (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) BaseKernelAdapter (class in tilelang.jit.adapter.base) BaseTemplate (class in tilelang.carver.template.base) BaseTensorProxy (class in tilelang.language.proxy) BaseWrapper (class in tilelang.jit.adapter.wrapper) batch_size (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) BestFit (class in tilelang.carver.roller.bestfit) bitwise_and() (in module tilelang.language.tir.op) bitwise_not() (in module tilelang.language.tir.op) bitwise_or() (in module tilelang.language.tir.op) bitwise_xor() (in module tilelang.language.tir.op) Block (class in tilelang.carver.roller.bestfit) block_col_warps (tilelang.primitives.gemm.base.GemmBaseParams attribute) block_reduction_depth (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) block_row_warps (tilelang.primitives.gemm.base.GemmBaseParams attribute) block_rv (tilelang.carver.analysis.BlockInfo attribute) BlockAnalyzer (class in tilelang.carver.roller.node) BlockInfo (class in tilelang.carver.analysis) blocks (tilelang.language.kernel.KernelLaunchFrame property) buffer_device_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_dtype_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) buffer_load_to_tile_region() (in module tilelang.language.copy) buffer_region_to_tile_region() (in module tilelang.language.copy) buffer_to_tile_region() (in module tilelang.language.copy) BufferProxy (class in tilelang.language.proxy) C C (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.primitives.gemm.base.GemmBaseParams attribute) c2d_im2col() (in module tilelang.language.copy) cache_input_tensors (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] cached() (in module tilelang.cache) (tilelang.cache.kernel_cache.KernelCache method) calculate() (tilelang.tools.Analyzer.Analyzer method) call_cpacked() (in module tilelang.language.tir.op) call_cpacked_lowered() (in module tilelang.language.tir.op) call_extern() (in module tilelang.language.tir.op) call_intrin() (in module tilelang.language.tir.op) call_llvm_intrin() (in module tilelang.language.tir.op) call_llvm_pure_intrin() (in module tilelang.language.tir.op) call_packed() (in module tilelang.language.tir.op) call_packed_lowered() (in module tilelang.language.tir.op) CALL_PREFIX (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) call_pure_extern() (in module tilelang.language.tir.op) call_tir() (in module tilelang.language.tir.op) callback_libdevice_path() (in module tilelang.contrib.nvcc) canMapHostMemory (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) canon_target_host() (in module tilelang.engine.lower) cdiv() (in module tilelang.math) CDNA (class in tilelang.carver.arch.cdna) ceil() (in module tilelang.language.tir.op) ceildiv() (in module tilelang.language.tir.op) check_cuda_availability() (in module tilelang.utils.target) check_func_with_dynamic() (in module tilelang.carver.analysis) check_hip_availability() (in module tilelang.utils.target) check_sm_version() (in module tilelang.carver.arch.cuda) check_tensor_list_compatibility() (in module tilelang.autotuner) check_tile_shape_isvalid() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) chunk (tilelang.primitives.gemm.base.GemmBaseParams attribute) clamp() (in module tilelang.language.customize) class_attributes (tilelang.carver.template.conv.ConvTemplate property) (tilelang.carver.template.elementwise.ElementwiseTemplate property) (tilelang.carver.template.flashattention.FlashAttentionTemplate property) (tilelang.carver.template.gemv.GEMVTemplate property) (tilelang.carver.template.general_reduce.GeneralReductionTemplate property) (tilelang.carver.template.matmul.MatmulTemplate property) (tilelang.primitives.gemm.base.GemmBaseParams property) clear() (in module tilelang.language.fill) clear_cache() (in module tilelang.cache) (tilelang.cache.kernel_cache.KernelCache method) clockRate (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) ClusterPlanning() (in module tilelang.transform) clz() (in module tilelang.language.tir.op) coalesced_factor() (in module tilelang.carver.roller.policy.common) coalesced_tensor_shape() (in module tilelang.carver.roller.policy.common) collect_block_iter_vars_used_in_access_region() (in module tilelang.carver.analysis) collect_vars_from_expr() (in module tilelang.carver.matmul_analysis) collect_vars_used_in_prim_expr() (in module tilelang.carver.analysis) comm_reducer() (in module tilelang.language.tir.op) compile() (in module tilelang.jit) compile_cuda() (in module tilelang.contrib.nvcc) compile_hip() (in module tilelang.contrib.hipcc) compile_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) CompileArgs (class in tilelang.autotuner) CompiledArtifact (class in tilelang.engine.param) complete_config() (tilelang.carver.roller.hint.Hint method) compute_elements_from_shape() (tilelang.carver.roller.hint.Stride method) compute_node_stride_map() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) compute_strides_from_shape() (tilelang.carver.roller.hint.Stride method) compute_tile_dict() (tilelang.carver.roller.policy.default.DefaultPolicy method) compute_warp_partition() (tilelang.primitives.gemm.base.GemmWarpPolicy method) compute_workload_per_item() (tilelang.carver.roller.policy.default.DefaultPolicy method) computeMode (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) concurrentKernels (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) condense_rep_var() (tilelang.layout.fragment.Fragment method) config (tilelang.autotuner.AutotuneResult attribute), [1] ConfigIndexBitwidth() (in module tilelang.transform) construct_dependency_target() (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) convert_func() (in module tilelang.contrib.dlpack) ConvTemplate (class in tilelang.carver.template.conv) copy() (in module tilelang.language.copy) copysign() (in module tilelang.language.tir.op) cos() (in module tilelang.language.tir.op) cosh() (in module tilelang.language.tir.op) CPU (class in tilelang.carver.arch.cpu) create_barriers() (in module tilelang.language.tir.op) create_call_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) create_dispatch_func() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) create_executable() (in module tilelang.contrib.cc) create_shared() (in module tilelang.contrib.cc) create_staticlib() (in module tilelang.contrib.cc) CreateListofMBarrierOp() (in module tilelang.language.builtin) CreateTMADescriptorOp() (in module tilelang.language.builtin) cross_compiler() (in module tilelang.contrib.cc) CtypesKernelAdapter (class in tilelang.jit.adapter.ctypes.adapter) CUDA (class in tilelang.carver.arch.cuda) cudaDeviceProp (class in tilelang.carver.arch.driver.cuda_driver) Current() (tilelang.language.frame.LetFrame class method) (tilelang.language.kernel.KernelLaunchFrame class method) CythonKernelAdapter (class in tilelang.jit.adapter.cython.adapter) CythonKernelWrapper (class in tilelang.jit.adapter.cython.cython_wrapper) D D (tilelang.carver.template.conv.ConvTemplate attribute), [1] deduplicate() (tilelang.carver.roller.shape_inference.tir.TensorDepNode method) default_align (tilelang.language.proxy.BaseTensorProxy attribute) default_offset_factor (tilelang.language.proxy.BaseTensorProxy attribute) default_scope (tilelang.language.proxy.BaseTensorProxy attribute) (tilelang.language.proxy.FragmentBufferProxy attribute) (tilelang.language.proxy.LocalBufferProxy attribute) (tilelang.language.proxy.SharedBufferProxy attribute) DefaultPolicy (class in tilelang.carver.roller.policy.default) DependencyAnalysis (class in tilelang.carver.roller.shape_inference.tir) deprecated() (in module tilelang.utils.deprecated) detect_dominant_read() (in module tilelang.carver.analysis) detect_iter_traits() (in module tilelang.carver.matmul_analysis) determine_profiler() (tilelang.profiler.Profiler method) determine_target() (in module tilelang.utils.target) device_codegen() (in module tilelang.engine.lower) device_codegen_without_compile() (in module tilelang.engine.lower) device_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) deviceOverlap (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) dfs_smem_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) disable_cache() (in module tilelang.env) div() (in module tilelang.language.tir.op) do_bench() (in module tilelang.profiler.bench) (tilelang.profiler.Profiler method) dom (tilelang.carver.analysis.IterInfo property) dom() (tilelang.carver.analysis.BlockInfo method) dom_kind() (tilelang.carver.analysis.BlockInfo method) dp4a() (in module tilelang.language.customize) dst_id (tilelang.carver.roller.node.Edge attribute) dst_node (tilelang.carver.roller.node.Edge attribute) dtype (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) dtype_abbrv (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) dynamic_symbolic_map (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) E ECCEnabled (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) Edge (class in tilelang.carver.roller.node) ElementwiseTemplate (class in tilelang.carver.template.elementwise) emit() (tilelang.TqdmLoggingHandler method) emit_config() (tilelang.carver.roller.policy.default.DefaultPolicy method) enable_cache() (in module tilelang.env) end_profile_intrinsic() (in module tilelang.language.tir.op) equivalent_function() (tilelang.carver.template.base.BaseTemplate method) erf() (in module tilelang.language.tir.op) estimated_time (tilelang.tools.Analyzer.AnalysisResult attribute), [1] exp() (in module tilelang.language.tir.op) exp10() (in module tilelang.language.tir.op) exp2() (in module tilelang.language.tir.op) expected_bandwidth_GBps (tilelang.tools.Analyzer.AnalysisResult attribute) expected_tflops (tilelang.tools.Analyzer.AnalysisResult attribute) export_library() (tilelang.jit.kernel.JITKernel method) extent (tilelang.carver.matmul_analysis.IterTrait attribute) extent_wrapper() (tilelang.carver.roller.node.PrimFuncNode method) extrac_params() (in module tilelang.engine.lower) extract_thread_binding() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) F F (tilelang.carver.template.conv.ConvTemplate attribute), [1] factorize() (in module tilelang.carver.roller.policy.common) FenceProxyAsyncOp() (in module tilelang.language.builtin) fill() (in module tilelang.language.fill) find_arg_idx_from_buffer_chain() (in module tilelang.carver.matmul_analysis) find_cuda_path() (in module tilelang.contrib.nvcc) find_first_similar_buffer() (in module tilelang.carver.matmul_analysis) find_first_similar_region() (in module tilelang.carver.matmul_analysis) find_last_producer_from_buffer() (in module tilelang.carver.matmul_analysis) find_lld() (in module tilelang.contrib.rocm) find_path_from_source() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) find_rocm_path() (in module tilelang.contrib.rocm) find_topo_sort() (in module tilelang.carver.roller.node) find_topo_sort_priority() (in module tilelang.carver.roller.node) find_var_from_func() (in module tilelang.carver.analysis) FlashAttentionTemplate (class in tilelang.carver.template.flashattention) FlattenBuffer() (in module tilelang.transform) floor() (in module tilelang.language.tir.op) floordiv() (in module tilelang.language.tir.op) floormod() (in module tilelang.language.tir.op) fmod() (in module tilelang.language.tir.op) footprint() (tilelang.carver.roller.node.PrimFuncNode method) forward() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) Fragment (class in tilelang.layout.fragment) FragmentBufferProxy (class in tilelang.language.proxy) FrameStack (class in tilelang.language.frame) (class in tilelang.language.kernel) free() (tilelang.carver.roller.bestfit.BestFit method) from_buffer() (tilelang.engine.param.KernelParam class method) from_database() (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter class method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter class method) (tilelang.jit.kernel.JITKernel class method) from_dict() (tilelang.carver.roller.hint.Hint class method) from_kernel() (tilelang.autotuner.AutoTuner class method) from_output_nodes() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_prim_func() (tilelang.carver.roller.policy.default.DefaultPolicy class method) from_ptr() (tilelang.language.proxy.BaseTensorProxy method) (tilelang.language.proxy.BufferProxy method) from_tilelang_function() (tilelang.jit.kernel.JITKernel class method) from_var() (tilelang.engine.param.KernelParam class method) from_warp_partition() (tilelang.primitives.gemm.base.GemmWarpPolicy class method) FrontendLegalize() (in module tilelang.transform) FullCol (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) FullRow (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) func (tilelang.autotuner.AutotuneResult attribute), [1] (tilelang.carver.roller.policy.default.DefaultPolicy attribute) (tilelang.jit.adapter.base.BaseKernelAdapter attribute) (tilelang.profiler.Profiler property) G gemm() (in module tilelang.language.gemm) (in module tilelang.primitives.gemm) gemm_rrr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_rsr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_srr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) gemm_ssr() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) GemmBaseParams (class in tilelang.primitives.gemm.base) GemmPrimitiveMMA (class in tilelang.primitives.gemm.gemm_mma) GemmWarpPolicy (class in tilelang.primitives.gemm.base) GEMVTemplate (class in tilelang.carver.template.gemv) GeneralReductionTemplate (class in tilelang.carver.template.general_reduce) generate_tma_descriptor_args() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) get_all_factors() (in module tilelang.carver.roller.policy.common) get_analyzer_by_tir() (in module tilelang.carver.roller.shape_inference.tir) get_annotated_mod() (in module tilelang.jit.adapter.utils) get_arch() (in module tilelang.carver.arch) get_available_cpu_count() (in module tilelang.autotuner) get_avaliable_tensorintrin_shapes() (tilelang.carver.arch.arch_base.TileDevice method) (tilelang.carver.arch.cuda.CUDA method) get_base_tile() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_block() (in module tilelang.carver.common_schedules) get_block_binding() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_block_extent() (tilelang.language.kernel.KernelLaunchFrame method) get_block_info() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_name() (tilelang.carver.roller.node.BlockAnalyzer method) get_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) get_buffer_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_buffer_elems() (in module tilelang.utils.language) get_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_cache_dir() (in module tilelang.jit.adapter.cython.adapter) get_cached_lib() (in module tilelang.jit.adapter.cython.adapter) get_cc() (in module tilelang.contrib.cc) get_coalesced_veclen() (in module tilelang.carver.analysis) get_code() (tilelang.carver.roller.rasterization.NoRasterization method) (tilelang.carver.roller.rasterization.Rasterization method) (tilelang.carver.roller.rasterization.Rasterization2DColumn method) (tilelang.carver.roller.rasterization.Rasterization2DRow method) get_consumer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_cplus_compiler() (in module tilelang.contrib.cc) get_cpu_init_func() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) get_cuda_device_properties() (in module tilelang.carver.arch.driver.cuda_driver) get_cuda_version() (in module tilelang.contrib.nvcc) get_cython_compiler() (in module tilelang.jit.adapter.cython.adapter) get_dequantize_block() (in module tilelang.carver.matmul_analysis) get_device_attribute() (in module tilelang.carver.arch.driver.cuda_driver) get_device_call() (in module tilelang.engine.lower) get_device_function() (tilelang.carver.roller.rasterization.Rasterization2DColumn method) get_device_name() (in module tilelang.carver.arch.driver.cuda_driver) get_dtype() (tilelang.carver.roller.node.Node method) get_dynamic_symbolic_set() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) get_forward_vars() (tilelang.layout.layout.Layout method) get_global_symbol_section_map() (in module tilelang.contrib.cc) get_hardware_aware_configs() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) get_host_call() (in module tilelang.engine.lower) get_host_source() (tilelang.jit.kernel.JITKernel method) get_in_out_dtypes() (in module tilelang.carver.matmul_analysis) get_index_map() (in module tilelang.carver.matmul_analysis) get_init_func() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_input_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) (tilelang.carver.roller.node.PrimFuncNode method) get_input_exprs() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) get_input_shape() (tilelang.layout.layout.Layout method) get_ir() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) (tilelang.carver.roller.node.PlaceHolderNode method) get_kernel_source() (tilelang.jit.adapter.base.BaseKernelAdapter method) (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter method) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter method) (tilelang.jit.kernel.JITKernel method) get_ladder_stage3_map() (in module tilelang.carver.matmul_analysis) get_ldmatrix_index_map() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) get_ldmatrix_offset() (in module tilelang.intrinsics.utils) get_let_value() (in module tilelang.language.frame) get_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_max_dynamic_shared_size_bytes() (in module tilelang.carver.arch.driver.cuda_driver) get_max_shared_memory_per_block() (in module tilelang.carver.analysis) get_max_threads_per_block() (in module tilelang.carver.analysis) get_mma_micro_size() (in module tilelang.intrinsics.utils) get_node_reduce_step_candidates() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) get_num_threads() (tilelang.language.kernel.KernelLaunchFrame method) get_opt_shape() (tilelang.carver.roller.node.PrimFuncNode method) get_or_create_node() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) get_output_blocks() (in module tilelang.carver.common_schedules) get_output_buffers() (tilelang.carver.roller.node.BlockAnalyzer method) get_output_shape() (tilelang.layout.layout.Layout method) get_producer_blocks() (tilelang.carver.roller.node.BlockAnalyzer method) get_profiler() (tilelang.jit.kernel.JITKernel method) get_propagate_map() (in module tilelang.carver.matmul_analysis) get_rasterization_code() (in module tilelang.carver.utils) get_reduce_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_reduce_inputs_dtype() (tilelang.carver.roller.node.PrimFuncNode method) get_reduction_blocks() (in module tilelang.carver.analysis) get_roller_hints_from_func() (in module tilelang.carver.utils) get_roller_hints_from_output_nodes() (in module tilelang.carver.utils) get_root_block() (in module tilelang.carver.analysis) get_rstep() (tilelang.carver.roller.hint.TileDict method) get_shape() (tilelang.carver.roller.node.Node method) get_shared_memory_per_block() (in module tilelang.carver.arch.driver.cuda_driver) get_source_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) get_space_dim() (tilelang.carver.roller.node.PrimFuncNode method) get_spatial_axis() (tilelang.carver.roller.node.BlockAnalyzer method) get_store_index_map() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) get_stream_type() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) (tilelang.jit.adapter.wrapper.TLHIPSourceWrapper method) get_swizzle_layout() (in module tilelang.intrinsics.mma_layout) get_tag() (tilelang.carver.roller.node.Node method) get_target_by_dump_machine() (in module tilelang.contrib.cc) get_tensor_supply() (in module tilelang.utils.tensor) get_tensorized_func_and_tags() (in module tilelang.carver.matmul_analysis) get_thread_binding() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_bindings() (in module tilelang.language.kernel) (tilelang.language.kernel.KernelLaunchFrame method) get_thread_extent() (tilelang.language.kernel.KernelLaunchFrame method) get_thread_size() (tilelang.layout.fragment.Fragment method) get_tile() (tilelang.carver.roller.hint.TileDict method) get_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) get_warp_size() (tilelang.primitives.gemm.base.GemmBaseParams method) GetMBarrierOp() (in module tilelang.language.builtin) globalL1CacheSupported (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) grid() (in module tilelang.language.tir.ir) H H (tilelang.carver.template.conv.ConvTemplate attribute), [1] has_arch() (tilelang.carver.template.base.BaseTemplate method) has_device_kernel_launch() (in module tilelang.engine.lower) has_let_value() (in module tilelang.language.frame) has_mma_support() (in module tilelang.carver.arch.cuda) has_value() (tilelang.language.frame.FrameStack method) (tilelang.language.frame.LetFrame static method) have_cudagraph() (in module tilelang.contrib.nvcc) have_fp16() (in module tilelang.contrib.nvcc) have_int8() (in module tilelang.contrib.nvcc) have_matrixcore() (in module tilelang.contrib.rocm) have_tensorcore() (in module tilelang.contrib.nvcc) head_dim (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) Hint (class in tilelang.carver.roller.hint) host_codegen() (in module tilelang.engine.lower) host_mod (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) host_source (tilelang.jit.kernel.JITKernel property) hypot() (in module tilelang.language.tir.op) I if_then_else() (in module tilelang.language.tir.op) IfStmtBinding() (in module tilelang.transform) import_source() (in module tilelang.language) in_dtype (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA property) index (tilelang.layout.layout.Layout property) index_to_coordinates() (in module tilelang.intrinsics.utils) indexdiv() (in module tilelang.language.tir.op) indexmod() (in module tilelang.language.tir.op) infer() (tilelang.carver.roller.shape_inference.common.InputShapeInference method) (tilelang.carver.roller.shape_inference.tir.InputShapeInference method) infer_block_partition() (tilelang.primitives.gemm.base.GemmBaseParams method) infer_node_smem_usage() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) infer_tensorcore_axis() (tilelang.carver.roller.node.PrimFuncNode method) infinity() (in module tilelang.language.tir.op) INIT_FUNC (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) initialize_function() (tilelang.carver.template.base.BaseTemplate method) (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) InjectFenceProxy() (in module tilelang.transform) InjectPTXAsyncCopy() (in module tilelang.transform) InjectSoftwarePipeline() (in module tilelang.transform) inline_transpose_block() (in module tilelang.carver.matmul_analysis) inputs (tilelang.carver.roller.node.Node property) InputShapeInference (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) INT4TensorCoreIntrinEmitter (class in tilelang.intrinsics.mma_macro_generator) INT4TensorCoreIntrinEmitterWithLadderTransform (class in tilelang.intrinsics.mma_macro_generator) Integer (tilelang.utils.tensor.TensorSupplyType attribute) integrated (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) inter_transform_a (tilelang.carver.roller.hint.IntrinInfo property) inter_transform_b (tilelang.carver.roller.hint.IntrinInfo property) InterWarpTransform (tilelang.common.transform_kind.TransformKind attribute) IntraWarpTransform (tilelang.common.transform_kind.TransformKind attribute) IntrinInfo (class in tilelang.carver.roller.hint) inverse() (tilelang.layout.layout.Layout method) invoke() (tilelang.primitives.gemm.gemm_mma.GemmPrimitiveMMA method) ir_module (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) ir_pass() (tilelang.tools.Analyzer.Analyzer method) is_ada_arch() (in module tilelang.carver.arch.cuda) is_ampere_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) is_boolean() (tilelang.engine.param.KernelParam method) is_broadcast_epilogue() (in module tilelang.carver.analysis) is_cache_enabled() (in module tilelang.env) is_causal (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) is_cdna_arch() (in module tilelang.carver.arch.cdna) (tilelang.carver.template.base.BaseTemplate method) is_cpu_arch() (in module tilelang.carver.arch.cpu) is_cpu_device_backend() (in module tilelang.engine.lower) is_cpu_target() (in module tilelang.jit.adapter.utils) is_cuda_arch() (in module tilelang.carver.arch.cuda) is_cuda_target() (in module tilelang.jit.adapter.utils) is_device_call() (in module tilelang.engine.lower) is_device_call_c_device() (in module tilelang.engine.lower) is_dynamic (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) is_elementwise() (tilelang.carver.analysis.BlockInfo method) is_float8() (tilelang.engine.param.KernelParam method) is_fragment() (in module tilelang.utils.language) is_full_col() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_full_row() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_gemm() (tilelang.carver.analysis.BlockInfo method) is_gemv() (tilelang.carver.analysis.BlockInfo method) is_global() (in module tilelang.utils.language) is_hip_target() (in module tilelang.jit.adapter.utils) is_hopper_arch() (in module tilelang.carver.arch.cuda) is_identity_block() (in module tilelang.carver.matmul_analysis) is_identity_or_transpose_block() (in module tilelang.carver.matmul_analysis) is_injective() (tilelang.carver.analysis.BlockInfo method) is_input_8bit() (tilelang.carver.roller.hint.IntrinInfo method) is_inter_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_intra_warp_transform() (tilelang.common.transform_kind.TransformKind method) is_ld_matrix_transform() (tilelang.common.transform_kind.TransformKind method) is_local() (in module tilelang.utils.language) is_m_first (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) is_non_transform() (tilelang.common.transform_kind.TransformKind method) is_output() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.OutputNode method) is_placeholder() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PlaceHolderNode method) is_reduction() (tilelang.carver.analysis.BlockInfo method) is_scalar() (tilelang.engine.param.KernelParam method) is_shared() (in module tilelang.utils.language) is_shared_dynamic() (in module tilelang.utils.language) is_square() (tilelang.primitives.gemm.base.GemmWarpPolicy method) is_tensorcore_supported_precision() (in module tilelang.carver.arch.cuda) is_tma_descriptor_arg() (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) is_transpose_block() (in module tilelang.carver.matmul_analysis) is_unsigned() (tilelang.engine.param.KernelParam method) is_valid() (tilelang.carver.roller.hint.Stride method) is_volta_arch() (in module tilelang.carver.arch.cuda) (tilelang.carver.template.base.BaseTemplate method) isfinite() (in module tilelang.language.tir.op) isinf() (in module tilelang.language.tir.op) isMultiGpuBoard (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) isnan() (in module tilelang.language.tir.op) isnullptr() (in module tilelang.language.tir.op) IterInfo (class in tilelang.carver.analysis) IterKind (class in tilelang.carver.matmul_analysis) iters (tilelang.carver.analysis.BlockInfo attribute) IterTrait (class in tilelang.carver.matmul_analysis) J jit() (in module tilelang.autotuner) (in module tilelang.jit) JITContext (class in tilelang.autotuner) JITKernel (class in tilelang.jit.kernel) K K (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] k_pack (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.primitives.gemm.base.GemmBaseParams attribute) kernel (tilelang.autotuner.AutotuneResult attribute), [1] (tilelang.autotuner.JITContext attribute), [1] Kernel() (in module tilelang.language.kernel) kernel_global_source (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) kernel_source (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.kernel.JITKernel property) KernelCache (class in tilelang.cache.kernel_cache) kernelExecTimeoutEnabled (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) KernelLaunchFrame (class in tilelang.language.kernel) KernelParam (class in tilelang.engine.param) kind (tilelang.carver.analysis.IterInfo attribute) (tilelang.carver.matmul_analysis.IterTrait attribute) kIter_I (tilelang.carver.matmul_analysis.IterKind attribute) kIter_J (tilelang.carver.matmul_analysis.IterKind attribute) kIter_K (tilelang.carver.matmul_analysis.IterKind attribute) kIter_S (tilelang.carver.matmul_analysis.IterKind attribute) kIter_T (tilelang.carver.matmul_analysis.IterKind attribute) L l2CacheSize (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) latency (tilelang.autotuner.AutotuneResult attribute), [1] Layout (class in tilelang.layout.layout) layout_propagate_chain() (in module tilelang.carver.matmul_analysis) LayoutInference() (in module tilelang.transform) ldexp() (in module tilelang.language.tir.op) ldmatrix_16x32_to_shared_16x32_layout_a() (in module tilelang.intrinsics.mma_layout) ldmatrix_16x32_to_shared_16x32_layout_b() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x16_to_shared_16x32_layout_a() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x16_to_shared_16x32_layout_b() (in module tilelang.intrinsics.mma_layout) ldmatrix_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) ldmatrix_a() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) ldmatrix_b() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) ldmatrix_trans_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) LDMatrixTransform (tilelang.common.transform_kind.TransformKind attribute) LegalizeSafeMemoryAccess() (in module tilelang.transform) LegalizeVectorizedLoop() (in module tilelang.transform) LetFrame (class in tilelang.language.frame) lib (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) lib_code (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) libcode (tilelang.autotuner.AutotuneResult attribute), [1] libpath (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) LibraryGenerator (class in tilelang.jit.adapter.libgen) likely() (in module tilelang.language.tir.op) load_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) LocalBufferProxy (class in tilelang.language.proxy) localL1CacheSupported (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) log() (in module tilelang.language.tir.op) log10() (in module tilelang.language.tir.op) log1p() (in module tilelang.language.tir.op) log2() (in module tilelang.language.tir.op) lookup_param() (in module tilelang.language.tir.op) loop_rv (tilelang.carver.analysis.IterInfo attribute) LoopVectorizeDynamic() (in module tilelang.transform) lower() (in module tilelang.engine.lower) LowerAndLegalize() (in module tilelang.engine.phase) LowerDeviceStorageAccessInfo() (in module tilelang.transform) LowerHopperIntrin() (in module tilelang.transform) LowerTileOp() (in module tilelang.transform) luid (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) luidDeviceNodeMask (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) M M (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] M_DIM (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) macro() (in module tilelang.language.tir.entry) main() (in module tilelang.testing) major (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) make_iter_fusion_index_map() (in module tilelang.carver.matmul_analysis) make_mfma_swizzle_layout() (in module tilelang.intrinsics.mfma_layout) make_mma_load_layout() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) make_mma_store_layout() (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) make_mma_swizzle_layout() (in module tilelang.intrinsics.mma_layout) make_reverse() (tilelang.carver.roller.shape_inference.tir.Statement method) make_swizzled_layout() (in module tilelang.layout.fragment) (in module tilelang.layout.swizzle) make_tensor() (in module tilelang.language.proxy) MakePackedAPI() (in module tilelang.transform) malloc() (tilelang.carver.roller.bestfit.BestFit method) managedMemory (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) map_forward_index() (tilelang.layout.layout.Layout method) map_forward_thread() (tilelang.layout.fragment.Fragment method) map_torch_type() (in module tilelang.utils.tensor) match_declare_kernel() (in module tilelang.jit.adapter.utils) match_declare_kernel_cpu() (in module tilelang.jit.adapter.utils) match_global_kernel() (in module tilelang.jit.adapter.utils) MatmulTemplate (class in tilelang.carver.template.matmul) MatrixCoreIntrinEmitter (class in tilelang.intrinsics.mfma_macro_generator) max_mismatched_ratio (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] max_value() (in module tilelang.language.tir.op) maxGridSize (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurface1D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurface1DLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurface2D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurface2DLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurface3D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurfaceCubemap (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxSurfaceCubemapLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture1D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture1DLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture1DLinear (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture1DMipmap (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture2D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture2DGather (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture2DLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture2DLinear (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture2DMipmap (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture3D (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTexture3DAlt (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTextureCubemap (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxTextureCubemapLayered (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxThreadsDim (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxThreadsPerBlock (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) maxThreadsPerMultiProcessor (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) MBarrierExpectTX() (in module tilelang.language.builtin) MBarrierWaitParity() (in module tilelang.language.builtin) memoryBusWidth (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) memoryClockRate (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) memPitch (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) merge() (tilelang.carver.roller.bestfit.Block method) MergeIfStmt() (in module tilelang.transform) mfma() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) mfma_store_index_map() (in module tilelang.intrinsics.utils) min_value() (in module tilelang.language.tir.op) minor (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) mma() (tilelang.intrinsics.mma_macro_generator.INT4TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.INT4TensorCoreIntrinEmitterWithLadderTransform method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitterWithLadderTransform method) mma_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) mma_fill() (in module tilelang.language.tir.op) mma_store() (in module tilelang.language.tir.op) mma_store_32x8_to_shared_16x16_layout() (in module tilelang.intrinsics.mma_layout) mma_store_index_map() (in module tilelang.intrinsics.utils) module tilelang tilelang.autotuner tilelang.cache tilelang.cache.kernel_cache tilelang.carver tilelang.carver.analysis tilelang.carver.arch tilelang.carver.arch.arch_base tilelang.carver.arch.cdna tilelang.carver.arch.cpu tilelang.carver.arch.cuda tilelang.carver.arch.driver tilelang.carver.arch.driver.cuda_driver tilelang.carver.common_schedules tilelang.carver.matmul_analysis tilelang.carver.roller tilelang.carver.roller.bestfit tilelang.carver.roller.hint tilelang.carver.roller.node tilelang.carver.roller.policy tilelang.carver.roller.policy.common tilelang.carver.roller.policy.default tilelang.carver.roller.policy.tensorcore tilelang.carver.roller.rasterization tilelang.carver.roller.shape_inference tilelang.carver.roller.shape_inference.common tilelang.carver.roller.shape_inference.tir tilelang.carver.template tilelang.carver.template.base tilelang.carver.template.conv tilelang.carver.template.elementwise tilelang.carver.template.flashattention tilelang.carver.template.gemv tilelang.carver.template.general_reduce tilelang.carver.template.matmul tilelang.carver.utils tilelang.common tilelang.common.transform_kind tilelang.contrib tilelang.contrib.cc tilelang.contrib.dlpack tilelang.contrib.hipcc tilelang.contrib.nvcc tilelang.contrib.rocm tilelang.engine tilelang.engine.callback tilelang.engine.lower tilelang.engine.param tilelang.engine.phase tilelang.env tilelang.intrinsics tilelang.intrinsics.mfma_layout tilelang.intrinsics.mfma_macro_generator tilelang.intrinsics.mma_layout tilelang.intrinsics.mma_macro_generator tilelang.intrinsics.utils tilelang.jit tilelang.jit.adapter tilelang.jit.adapter.base tilelang.jit.adapter.ctypes tilelang.jit.adapter.ctypes.adapter tilelang.jit.adapter.cython tilelang.jit.adapter.cython.adapter tilelang.jit.adapter.cython.cython_wrapper tilelang.jit.adapter.dlpack tilelang.jit.adapter.libgen tilelang.jit.adapter.utils tilelang.jit.adapter.wrapper tilelang.jit.env tilelang.jit.kernel tilelang.language tilelang.language.allocate tilelang.language.builtin tilelang.language.copy tilelang.language.customize tilelang.language.fill tilelang.language.frame tilelang.language.gemm tilelang.language.kernel tilelang.language.logical tilelang.language.memscope tilelang.language.parallel tilelang.language.pipeline tilelang.language.print tilelang.language.proxy tilelang.language.reduce tilelang.language.tir tilelang.language.tir.entry tilelang.language.tir.ir tilelang.language.tir.op tilelang.layout tilelang.layout.fragment tilelang.layout.layout tilelang.layout.swizzle tilelang.math tilelang.primitives tilelang.primitives.gemm tilelang.primitives.gemm.base tilelang.primitives.gemm.gemm_mma tilelang.profiler tilelang.profiler.bench tilelang.testing tilelang.tools tilelang.tools.Analyzer tilelang.tools.plot_layout tilelang.transform tilelang.transform.simplify tilelang.utils tilelang.utils.deprecated tilelang.utils.language tilelang.utils.target tilelang.utils.tensor multiGpuBoardGroupID (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) multiProcessorCount (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) MultiVersionBuffer() (in module tilelang.transform) N N (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] N_DIM (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) name (tilelang.carver.analysis.BlockInfo attribute) (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) nearbyint() (in module tilelang.language.tir.op) next_power_of_2() (in module tilelang.math) nextafter() (in module tilelang.language.tir.op) Node (class in tilelang.carver.roller.node) nodes (tilelang.carver.roller.policy.default.DefaultPolicy attribute) NonTransform (tilelang.common.transform_kind.TransformKind attribute) NoRasterization (class in tilelang.carver.roller.rasterization) Normal (tilelang.utils.tensor.TensorSupplyType attribute) normalize_prim_func() (in module tilelang.carver.analysis) normalize_to_matmul() (in module tilelang.carver.matmul_analysis) NoSetMaxNReg() (in module tilelang.language.builtin) num_heads (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) num_outputs() (tilelang.carver.roller.node.Node method) num_threads (tilelang.language.kernel.KernelLaunchFrame property) O One (tilelang.utils.tensor.TensorSupplyType attribute) OptimizeForTarget() (in module tilelang.engine.phase) out_dtype (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] out_idx (tilelang.autotuner.CompileArgs attribute), [1], [2] (tilelang.autotuner.JITContext attribute), [1] (tilelang.jit.kernel.JITKernel property) output_nodes (tilelang.carver.template.base.BaseTemplate property) OutputNode (class in tilelang.carver.roller.node) outputs (tilelang.carver.roller.node.Node property) P P (tilelang.carver.template.conv.ConvTemplate attribute), [1] panel_width (tilelang.carver.roller.rasterization.Rasterization property) panel_width_ (tilelang.carver.roller.rasterization.Rasterization attribute) Parallel() (in module tilelang.language.parallel) parallel() (in module tilelang.language.tir.ir) param_dtypes (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) param_shapes (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) params (tilelang.engine.param.CompiledArtifact attribute) (tilelang.jit.kernel.JITKernel property) (tilelang.profiler.Profiler attribute), [1] params_as_dict() (tilelang.carver.template.conv.ConvTemplate method) (tilelang.carver.template.elementwise.ElementwiseTemplate method) (tilelang.carver.template.flashattention.FlashAttentionTemplate method) (tilelang.carver.template.gemv.GEMVTemplate method) (tilelang.carver.template.general_reduce.GeneralReductionTemplate method) (tilelang.carver.template.matmul.MatmulTemplate method) (tilelang.primitives.gemm.base.GemmBaseParams method) parse_compute_version() (in module tilelang.contrib.nvcc) (in module tilelang.contrib.rocm) parse_source_information() (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) pass_configs (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) pciBusID (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) pciDeviceID (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) pciDomainID (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) persistingL2CacheMaxSize (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) pipeline_stage (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) Pipelined() (in module tilelang.language.pipeline) PipelinePlanning() (in module tilelang.transform) PlaceHolderNode (class in tilelang.carver.roller.node) plan_rasterization() (tilelang.carver.roller.policy.default.DefaultPolicy method) (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy method) plot_layout() (in module tilelang.tools.plot_layout) policy (tilelang.primitives.gemm.base.GemmBaseParams attribute) pop() (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) popcount() (in module tilelang.language.tir.op) pow() (in module tilelang.language.tir.op) power() (in module tilelang.language.tir.op) pre_order_traverse() (in module tilelang.carver.roller.node) prim_func (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper property) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper property) prim_func() (in module tilelang.language.tir.entry) PrimFuncNode (class in tilelang.carver.roller.node) print() (in module tilelang.language.print) print_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) Profiler (class in tilelang.profiler) propagate() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_inputs_on_reduction() (tilelang.carver.roller.node.PrimFuncNode method) propagate_outputs() (tilelang.carver.roller.node.PrimFuncNode method) propagate_reduction_inputs() (tilelang.carver.roller.node.PrimFuncNode method) ptr() (in module tilelang.language.proxy) ptr_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) ptx_arrive_barrier() (in module tilelang.language.tir.op) ptx_arrive_barrier_expect_tx() (in module tilelang.language.tir.op) ptx_commit_group() (in module tilelang.language.tir.op) ptx_cp_async() (in module tilelang.language.tir.op) ptx_cp_async_barrier() (in module tilelang.language.tir.op) ptx_cp_async_bulk() (in module tilelang.language.tir.op) ptx_init_barrier_thread_count() (in module tilelang.language.tir.op) ptx_ldmatrix() (in module tilelang.language.tir.op) ptx_mma() (in module tilelang.language.tir.op) ptx_mma_sp() (in module tilelang.language.tir.op) ptx_wait_barrier() (in module tilelang.language.tir.op) ptx_wait_group() (in module tilelang.language.tir.op) push() (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) Q q_multiply_shift() (in module tilelang.language.tir.op) q_multiply_shift_per_axis() (in module tilelang.language.tir.op) R Randn (tilelang.utils.tensor.TensorSupplyType attribute) Rasterization (class in tilelang.carver.roller.rasterization) Rasterization2DColumn (class in tilelang.carver.roller.rasterization) Rasterization2DRow (class in tilelang.carver.roller.rasterization) raxis_order (tilelang.carver.roller.hint.Hint property) recommend_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) recommend_hints() (tilelang.carver.template.base.BaseTemplate method) reduce() (in module tilelang.language.reduce) reduce_absmax() (in module tilelang.language.reduce) reduce_abssum() (in module tilelang.language.reduce) reduce_max() (in module tilelang.language.reduce) reduce_min() (in module tilelang.language.reduce) reduce_sum() (in module tilelang.language.reduce) ref_latency (tilelang.autotuner.AutotuneResult attribute), [1] ref_prog (tilelang.autotuner.CompileArgs attribute), [1], [2] (tilelang.autotuner.JITContext attribute), [1] region() (in module tilelang.language.copy) region_exist_in_list() (in module tilelang.carver.roller.shape_inference.tir) register_cuda_postproc() (in module tilelang.engine.callback) register_cuda_postproc_callback() (in module tilelang.engine.callback) register_hip_postproc() (in module tilelang.engine.callback) register_hip_postproc_callback() (in module tilelang.engine.callback) regsPerBlock (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) regsPerMultiprocessor (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) reinterpret() (in module tilelang.language.tir.op) remove_lib() (tilelang.jit.adapter.libgen.LibraryGenerator method) repeat() (tilelang.layout.fragment.Fragment method) replicate() (tilelang.layout.fragment.Fragment method) reserved (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) reserved1 (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) reserved2 (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) reshape() (in module tilelang.language.customize) result_idx (tilelang.profiler.Profiler attribute), [1] ret() (in module tilelang.language.tir.op) retrieve_func_from_module() (in module tilelang.carver.utils) (in module tilelang.utils.language) RewriteWgmmaSync() (in module tilelang.transform) rocm_link() (in module tilelang.contrib.rocm) round() (in module tilelang.language.tir.op) rsqrt() (in module tilelang.language.tir.op) rt_mod (tilelang.engine.param.CompiledArtifact attribute) rtol (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] run() (tilelang.autotuner.AutoTuner method) run_once() (tilelang.jit.kernel.JITKernel method) (tilelang.profiler.Profiler method) S S (tilelang.carver.template.conv.ConvTemplate attribute), [1] score_block_size() (tilelang.carver.roller.policy.default.DefaultPolicy method) seq_kv_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) seq_length (tilelang.carver.template.flashattention.FlashAttentionTemplate attribute) serial() (in module tilelang.language.tir.ir) set_buffer_device_map() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) set_buffer_dtype_map() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) set_compile_args() (tilelang.autotuner.AutoTuner method) set_dtype() (tilelang.carver.roller.node.Node method) (tilelang.carver.roller.node.PrimFuncNode method) set_dynamic_symbolic_map() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) set_function() (tilelang.carver.template.base.BaseTemplate method) set_inputs() (tilelang.carver.roller.node.Node method) set_lib_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_log_level() (in module tilelang) set_output_nodes() (tilelang.carver.template.base.BaseTemplate method) set_outputs() (tilelang.carver.roller.node.Node method) set_ptr_map() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) set_random_seed() (in module tilelang.testing) set_shape() (tilelang.carver.roller.node.Node method) set_src_path() (tilelang.jit.adapter.libgen.LibraryGenerator method) set_static_shape_map() (tilelang.jit.adapter.cython.cython_wrapper.CythonKernelWrapper method) set_tag() (tilelang.carver.roller.node.Node method) SetMaxNReg() (in module tilelang.language.builtin) shape (tilelang.carver.template.elementwise.ElementwiseTemplate attribute), [1] (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) (tilelang.engine.param.KernelParam attribute) shared_16x16_to_ldmatrix_64x4_layout() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_local_64x4_layout_C() (in module tilelang.intrinsics.mfma_layout) shared_16x16_to_mma_32x8_layout() (in module tilelang.intrinsics.mma_layout) (in module tilelang.intrinsics.utils) shared_16x16_to_mma_32x8_layout_rs() (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_layout_sr() (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_layout_trans() (in module tilelang.intrinsics.mma_layout) shared_16x16_to_mma_32x8_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_16x32_to_local_64x8_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_16x32_to_local_64x8_layout_B() (in module tilelang.intrinsics.mfma_layout) shared_16x32_to_mma_32x16_layout() (in module tilelang.intrinsics.mma_layout) (in module tilelang.intrinsics.utils) shared_16x32_to_mma_32x16_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_16x4_to_local_64x1_layout_A() (in module tilelang.intrinsics.mfma_layout) shared_32x16_to_mma_32x16_layout() (in module tilelang.intrinsics.mma_layout) (in module tilelang.intrinsics.utils) shared_32x16_to_mma_32x16_smoothlayout() (in module tilelang.intrinsics.mma_layout) shared_4x16_to_local_64x1_layout_B() (in module tilelang.intrinsics.mfma_layout) SharedBufferProxy (class in tilelang.language.proxy) sharedMemPerBlock (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) sharedMemPerMultiprocessor (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) shift_left() (in module tilelang.language.tir.op) shift_right() (in module tilelang.language.tir.op) sigmoid() (in module tilelang.language.tir.op) Simplify() (in module tilelang.transform.simplify) simplify_prim_func() (in module tilelang.transform.simplify) sin() (in module tilelang.language.tir.op) sinh() (in module tilelang.language.tir.op) size() (tilelang.carver.roller.bestfit.Block method) (tilelang.language.kernel.FrameStack method) skip_check (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] smooth_a (tilelang.carver.roller.hint.IntrinInfo property) smooth_b (tilelang.carver.roller.hint.IntrinInfo property) sqrt() (in module tilelang.language.tir.op) Square (tilelang.primitives.gemm.base.GemmWarpPolicy attribute) src_id (tilelang.carver.roller.node.Edge attribute) src_node (tilelang.carver.roller.node.Edge attribute) srcpath (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter property) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter property) (tilelang.jit.adapter.libgen.LibraryGenerator attribute) start_profile_intrinsic() (in module tilelang.language.tir.op) Statement (class in tilelang.carver.roller.shape_inference.common) (class in tilelang.carver.roller.shape_inference.tir) static_shape_map (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) step (tilelang.carver.roller.hint.Hint property) stmatrix() (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter method) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter method) streamPrioritiesSupported (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) Stride (class in tilelang.carver.roller.hint) stride (tilelang.carver.roller.hint.Stride property) structure (tilelang.carver.template.general_reduce.GeneralReductionTemplate attribute) supply_prog (tilelang.autotuner.CompileArgs attribute), [1], [2] (tilelang.autotuner.JITContext attribute), [1] supply_type (tilelang.autotuner.CompileArgs attribute), [1], [2] (tilelang.autotuner.JITContext attribute), [1] (tilelang.profiler.Profiler attribute), [1] surfaceAlignment (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) symbolic() (in module tilelang.language) T tags (tilelang.carver.roller.policy.default.DefaultPolicy attribute) tan() (in module tilelang.language.tir.op) tanh() (in module tilelang.language.tir.op) target (tilelang.autotuner.CompileArgs attribute), [1] (tilelang.autotuner.JITContext attribute), [1] (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) (tilelang.jit.adapter.wrapper.TLWrapper attribute) tccDriver (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) tensorcore_legalization() (tilelang.carver.roller.hint.Hint method) TensorCoreExtraConfig (class in tilelang.carver.roller.hint) TensorCoreIntrinEmitter (class in tilelang.intrinsics.mma_macro_generator) TensorCoreIntrinEmitterWithLadderTransform (class in tilelang.intrinsics.mma_macro_generator) TensorCorePolicy (class in tilelang.carver.roller.policy.tensorcore) TensorDepNode (class in tilelang.carver.roller.shape_inference.tir) TensorInstruction (class in tilelang.carver.arch.cuda) TensorProxy (class in tilelang.language.proxy) TensorSupplyType (class in tilelang.utils.tensor) textureAlignment (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) texturePitchAlignment (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) tflops (tilelang.tools.Analyzer.AnalysisResult attribute) thread (tilelang.layout.fragment.Fragment property) thread_binding() (in module tilelang.language.tir.ir) thread_id_shared_access_64x1_to_16x4_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x1_to_4x16_layout_B() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_B() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_m_n() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x4_to_16x16_layout_C_n_m() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_A() (in module tilelang.intrinsics.mfma_layout) thread_id_shared_access_64x8_to_16x32_layout_B() (in module tilelang.intrinsics.mfma_layout) ThreadPartialSync() (in module tilelang.transform) threads (tilelang.language.kernel.KernelLaunchFrame property) ThreadSync() (in module tilelang.transform) TileDevice (class in tilelang.carver.arch.arch_base) TileDict (class in tilelang.carver.roller.hint) tilelang module tilelang.autotuner module tilelang.cache module tilelang.cache.kernel_cache module tilelang.carver module tilelang.carver.analysis module tilelang.carver.arch module tilelang.carver.arch.arch_base module tilelang.carver.arch.cdna module tilelang.carver.arch.cpu module tilelang.carver.arch.cuda module tilelang.carver.arch.driver module tilelang.carver.arch.driver.cuda_driver module tilelang.carver.common_schedules module tilelang.carver.matmul_analysis module tilelang.carver.roller module tilelang.carver.roller.bestfit module tilelang.carver.roller.hint module tilelang.carver.roller.node module tilelang.carver.roller.policy module tilelang.carver.roller.policy.common module tilelang.carver.roller.policy.default module tilelang.carver.roller.policy.tensorcore module tilelang.carver.roller.rasterization module tilelang.carver.roller.shape_inference module tilelang.carver.roller.shape_inference.common module tilelang.carver.roller.shape_inference.tir module tilelang.carver.template module tilelang.carver.template.base module tilelang.carver.template.conv module tilelang.carver.template.elementwise module tilelang.carver.template.flashattention module tilelang.carver.template.gemv module tilelang.carver.template.general_reduce module tilelang.carver.template.matmul module tilelang.carver.utils module tilelang.common module tilelang.common.transform_kind module tilelang.contrib module tilelang.contrib.cc module tilelang.contrib.dlpack module tilelang.contrib.hipcc module tilelang.contrib.nvcc module tilelang.contrib.rocm module tilelang.engine module tilelang.engine.callback module tilelang.engine.lower module tilelang.engine.param module tilelang.engine.phase module tilelang.env module tilelang.intrinsics module tilelang.intrinsics.mfma_layout module tilelang.intrinsics.mfma_macro_generator module tilelang.intrinsics.mma_layout module tilelang.intrinsics.mma_macro_generator module tilelang.intrinsics.utils module tilelang.jit module tilelang.jit.adapter module tilelang.jit.adapter.base module tilelang.jit.adapter.ctypes module tilelang.jit.adapter.ctypes.adapter module tilelang.jit.adapter.cython module tilelang.jit.adapter.cython.adapter module tilelang.jit.adapter.cython.cython_wrapper module tilelang.jit.adapter.dlpack module tilelang.jit.adapter.libgen module tilelang.jit.adapter.utils module tilelang.jit.adapter.wrapper module tilelang.jit.env module tilelang.jit.kernel module tilelang.language module tilelang.language.allocate module tilelang.language.builtin module tilelang.language.copy module tilelang.language.customize module tilelang.language.fill module tilelang.language.frame module tilelang.language.gemm module tilelang.language.kernel module tilelang.language.logical module tilelang.language.memscope module tilelang.language.parallel module tilelang.language.pipeline module tilelang.language.print module tilelang.language.proxy module tilelang.language.reduce module tilelang.language.tir module tilelang.language.tir.entry module tilelang.language.tir.ir module tilelang.language.tir.op module tilelang.layout module tilelang.layout.fragment module tilelang.layout.layout module tilelang.layout.swizzle module tilelang.math module tilelang.primitives module tilelang.primitives.gemm module tilelang.primitives.gemm.base module tilelang.primitives.gemm.gemm_mma module tilelang.profiler module tilelang.profiler.bench module tilelang.testing module tilelang.tools module tilelang.tools.Analyzer module tilelang.tools.plot_layout module tilelang.transform module tilelang.transform.simplify module tilelang.utils module tilelang.utils.deprecated module tilelang.utils.language module tilelang.utils.target module tilelang.utils.tensor module TLCPUSourceWrapper (class in tilelang.jit.adapter.wrapper) TLCUDASourceWrapper (class in tilelang.jit.adapter.wrapper) TLHIPSourceWrapper (class in tilelang.jit.adapter.wrapper) TLWrapper (class in tilelang.jit.adapter.wrapper) TMALoadOp() (in module tilelang.language.builtin) TMAStoreArrive() (in module tilelang.language.builtin) TMAStoreWait() (in module tilelang.language.builtin) to_dict() (tilelang.carver.roller.hint.Hint method) to_prime_factors() (tilelang.primitives.gemm.base.GemmWarpPolicy static method) to_pytorch_func() (in module tilelang.contrib.dlpack) top() (tilelang.language.frame.FrameStack method) (tilelang.language.kernel.FrameStack method) topo_order() (in module tilelang.carver.roller.node) torch_assert_close() (in module tilelang.utils.tensor) torch_function (tilelang.jit.kernel.JITKernel attribute), [1] TorchDLPackKernelAdapter (class in tilelang.jit.adapter.dlpack) total_flops (tilelang.tools.Analyzer.AnalysisResult attribute) total_global_bytes (tilelang.tools.Analyzer.AnalysisResult attribute), [1] totalConstMem (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) totalGlobalMem (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) TqdmLoggingHandler (class in tilelang) trace() (in module tilelang.language.tir.op) trans_A (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] trans_B (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] TransformKind (class in tilelang.common.transform_kind) transpose_A (tilelang.primitives.gemm.base.GemmBaseParams attribute) transpose_B (tilelang.primitives.gemm.base.GemmBaseParams attribute) traverse_dependencies() (tilelang.carver.roller.shape_inference.tir.DependencyAnalysis method) trunc() (in module tilelang.language.tir.op) truncdiv() (in module tilelang.language.tir.op) truncmod() (in module tilelang.language.tir.op) try_inline() (in module tilelang.carver.common_schedules) try_inline_contiguous_spatial() (in module tilelang.carver.common_schedules) tvm_access_ptr() (in module tilelang.language.tir.op) tvm_bmma_sync() (in module tilelang.language.tir.op) tvm_check_return() (in module tilelang.language.tir.op) tvm_fill_fragment() (in module tilelang.language.tir.op) tvm_load_matrix_sync() (in module tilelang.language.tir.op) tvm_mfma() (in module tilelang.language.tir.op) tvm_mfma_store() (in module tilelang.language.tir.op) tvm_mma_sync() (in module tilelang.language.tir.op) tvm_rdna_wmma() (in module tilelang.language.tir.op) tvm_rdna_wmma_store() (in module tilelang.language.tir.op) tvm_stack_alloca() (in module tilelang.language.tir.op) tvm_stack_make_array() (in module tilelang.language.tir.op) tvm_stack_make_shape() (in module tilelang.language.tir.op) tvm_storage_sync() (in module tilelang.language.tir.op) tvm_store_matrix_sync() (in module tilelang.language.tir.op) tvm_struct_get() (in module tilelang.language.tir.op) tvm_struct_set() (in module tilelang.language.tir.op) tvm_thread_allreduce() (in module tilelang.language.tir.op) tvm_thread_invariant() (in module tilelang.language.tir.op) tvm_throw_last_error() (in module tilelang.language.tir.op) tvm_tuple() (in module tilelang.language.tir.op) tvm_warp_activemask() (in module tilelang.language.tir.op) tvm_warp_shuffle() (in module tilelang.language.tir.op) tvm_warp_shuffle_down() (in module tilelang.language.tir.op) tvm_warp_shuffle_up() (in module tilelang.language.tir.op) TVMBackendAllocWorkspace() (in module tilelang.language.tir.op) TVMBackendFreeWorkspace() (in module tilelang.language.tir.op) type_annotation() (in module tilelang.language.tir.op) U undef() (in module tilelang.language.tir.op) unifiedAddressing (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) Uniform (tilelang.utils.tensor.TensorSupplyType attribute) unroll() (in module tilelang.language.tir.ir) update_lib_code() (tilelang.jit.adapter.libgen.LibraryGenerator method) (tilelang.jit.adapter.wrapper.TLCPUSourceWrapper method) (tilelang.jit.adapter.wrapper.TLCUDASourceWrapper method) update_tags() (tilelang.carver.roller.node.Node method) use_async_copy (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) use_swizzle() (in module tilelang.language) uuid (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) V var (tilelang.carver.analysis.IterInfo attribute) vectorcombine() (in module tilelang.language.tir.op) vectorhigh() (in module tilelang.language.tir.op) vectorized() (in module tilelang.language.tir.ir) VectorizeLoop() (in module tilelang.transform) vectorlow() (in module tilelang.language.tir.op) view() (in module tilelang.language.customize) vscale() (in module tilelang.language.tir.op) W W (tilelang.carver.template.conv.ConvTemplate attribute), [1] WaitWgmma() (in module tilelang.language.builtin) walk_indice() (in module tilelang.carver.roller.shape_inference.tir) warp_col_tiles (tilelang.primitives.gemm.base.GemmBaseParams attribute) warp_row_tiles (tilelang.primitives.gemm.base.GemmBaseParams attribute) WARP_SIZE (tilelang.intrinsics.mfma_macro_generator.MatrixCoreIntrinEmitter attribute) (tilelang.intrinsics.mma_macro_generator.TensorCoreIntrinEmitter attribute) warpSize (tilelang.carver.arch.driver.cuda_driver.cudaDeviceProp attribute) WarpSpecialized() (in module tilelang.transform) WarpSpecializedPipeline() (in module tilelang.transform) with_arch() (tilelang.carver.template.base.BaseTemplate method) with_bias (tilelang.carver.template.conv.ConvTemplate attribute), [1] (tilelang.carver.template.gemv.GEMVTemplate attribute) (tilelang.carver.template.matmul.MatmulTemplate attribute), [1] with_default_adapter() (tilelang.profiler.Profiler method) wmma_k (tilelang.carver.roller.policy.tensorcore.TensorCorePolicy attribute) wrap() (tilelang.jit.adapter.wrapper.BaseWrapper method) (tilelang.jit.adapter.wrapper.TLWrapper method) wrapped_source (tilelang.jit.adapter.ctypes.adapter.CtypesKernelAdapter attribute) (tilelang.jit.adapter.cython.adapter.CythonKernelAdapter attribute) Z Zero (tilelang.utils.tensor.TensorSupplyType attribute)