Skip to content

vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe

_supports_current_device

_supports_current_device() -> bool

Supports only Blackwell-family GPUs.

Source code in vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
def _supports_current_device() -> bool:
    """Supports only Blackwell-family GPUs."""
    p = current_platform
    return p.is_cuda() and p.is_device_capability_family(100)

_supports_no_act_and_mul

_supports_no_act_and_mul() -> bool

BF16 kernels do not support non-gated MoE

Source code in vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
def _supports_no_act_and_mul() -> bool:
    """BF16 kernels do not support non-gated MoE"""
    return False

_supports_parallel_config

_supports_parallel_config(
    moe_parallel_config: FusedMoEParallelConfig,
) -> bool

Supports TRTLLM Kernel does not support EPLB.

Source code in vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
def _supports_parallel_config(moe_parallel_config: FusedMoEParallelConfig) -> bool:
    """Supports TRTLLM Kernel does not support EPLB."""
    return not moe_parallel_config.enable_eplb

is_supported_config_trtllm_bf16

is_supported_config_trtllm_bf16(
    moe_config: FusedMoEConfig,
    activation_format: FusedMoEActivationFormat,
) -> tuple[bool, str | None]

This method mirrors mk.FusedMoEPermuteExpertsUnpermute.is_supported_config for BF16 unquantized kernels.

Source code in vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py
def is_supported_config_trtllm_bf16(
    moe_config: FusedMoEConfig,
    activation_format: mk.FusedMoEActivationFormat,
) -> tuple[bool, str | None]:
    """
    This method mirrors mk.FusedMoEPermuteExpertsUnpermute.is_supported_config
    for BF16 unquantized kernels.
    """

    def _make_reason(reason: str) -> str:
        return f"kernel does not support {reason}"

    if not _supports_current_device():
        return False, _make_reason(f"current device {current_platform.device_name}")
    elif not (moe_config.is_act_and_mul or _supports_no_act_and_mul()):
        return False, _make_reason("no act_and_mul MLP layer")
    elif not _supports_activation(moe_config.activation):
        return False, _make_reason(f"{moe_config.activation} activation")
    elif not _supports_parallel_config(moe_config.moe_parallel_config):
        return False, _make_reason(f"parallel config {moe_config.moe_parallel_config}")
    elif not _supports_routing_method_bf16(moe_config.routing_method):
        return False, _make_reason(f"routing method {moe_config.routing_method}")
    elif activation_format != mk.FusedMoEActivationFormat.Standard:
        return False, _make_reason(f"activation format {activation_format}")

    return True, None