kernels-community
/

flash-mla

Model card Files Files and versions

flash-mla / torch-ext /flash_mla /__init__.py

drbh

fix: remove unused trailing param

d76b04d 8 months ago

history blame contribute delete

725 Bytes

	import torch

	from ._ops import ops


	def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
	return ops.get_mla_metadata(seqlens_k, s_q, h_kv)


	def mha_fwd_kvcache_mla(
	q: torch.Tensor,
	kcache: torch.Tensor,
	vcache_: torch.Tensor,
	head_size_v: int,
	seqlens_k: torch.Tensor,
	block_table: torch.Tensor,
	softmax_scale: float,
	is_causal_: bool,
	tile_scheduler_metadata: torch.Tensor,
	num_splits: torch.Tensor,
	) -> torch.Tensor:
	return ops.mha_fwd_kvcache_mla(
	q,
	kcache,
	vcache_,
	head_size_v,
	seqlens_k,
	block_table,
	softmax_scale,
	is_causal_,
	tile_scheduler_metadata,
	num_splits
	)