Skip to content

vllm.multimodal.media.base

MediaIO

Bases: ABC, Generic[_T]

Configuration values can be user-provided either by --media-io-kwargs or by the runtime API field "media_io_kwargs". Ensure proper validation and error handling.

Source code in vllm/multimodal/media/base.py
class MediaIO(ABC, Generic[_T]):
    """Configuration values can be user-provided either by --media-io-kwargs or
    by the runtime API field "media_io_kwargs". Ensure proper validation and
    error handling.
    """

    @classmethod
    def merge_kwargs(
        cls,
        default_kwargs: dict[str, Any] | None,
        runtime_kwargs: dict[str, Any] | None,
    ) -> dict[str, Any]:
        """Merge config-level kwargs and request-level kwargs.

        By default this performs a shallow merge where runtime kwargs override
        keys in default kwargs. Subclasses may override to apply modality-
        specific behavior.
        """
        merged = dict(default_kwargs or {})
        if runtime_kwargs:
            merged.update(runtime_kwargs)
        return merged

    @abstractmethod
    def load_bytes(self, data: bytes) -> _T:
        raise NotImplementedError

    @abstractmethod
    def load_base64(self, media_type: str, data: str) -> _T:
        """
        List of media types:
        https://www.iana.org/assignments/media-types/media-types.xhtml
        """
        raise NotImplementedError

    @abstractmethod
    def load_file(self, filepath: Path) -> _T:
        raise NotImplementedError

load_base64 abstractmethod

load_base64(media_type: str, data: str) -> _T

List of media types: https://www.iana.org/assignments/media-types/media-types.xhtml

Source code in vllm/multimodal/media/base.py
@abstractmethod
def load_base64(self, media_type: str, data: str) -> _T:
    """
    List of media types:
    https://www.iana.org/assignments/media-types/media-types.xhtml
    """
    raise NotImplementedError

merge_kwargs classmethod

merge_kwargs(
    default_kwargs: dict[str, Any] | None,
    runtime_kwargs: dict[str, Any] | None,
) -> dict[str, Any]

Merge config-level kwargs and request-level kwargs.

By default this performs a shallow merge where runtime kwargs override keys in default kwargs. Subclasses may override to apply modality- specific behavior.

Source code in vllm/multimodal/media/base.py
@classmethod
def merge_kwargs(
    cls,
    default_kwargs: dict[str, Any] | None,
    runtime_kwargs: dict[str, Any] | None,
) -> dict[str, Any]:
    """Merge config-level kwargs and request-level kwargs.

    By default this performs a shallow merge where runtime kwargs override
    keys in default kwargs. Subclasses may override to apply modality-
    specific behavior.
    """
    merged = dict(default_kwargs or {})
    if runtime_kwargs:
        merged.update(runtime_kwargs)
    return merged

MediaWithBytes dataclass

Bases: Generic[_T]

Wrapper that couples a media object with its original encoded bytes.

This ensures the raw bytes and media object remain synchronized, preventing cache corruption from in-place modifications.

The wrapper delegates attribute access to the underlying media object, making it behave transparently like the wrapped type (e.g., PIL.Image).

NOTE: Currently, this wrapper is used only for the image modality.

Source code in vllm/multimodal/media/base.py
@dataclass
class MediaWithBytes(Generic[_T]):
    """
    Wrapper that couples a media object with its original encoded bytes.

    This ensures the raw bytes and media object remain synchronized,
    preventing cache corruption from in-place modifications.

    The wrapper delegates attribute access to the underlying media object,
    making it behave transparently like the wrapped type (e.g., PIL.Image).

    NOTE: Currently, this wrapper is used only for the image modality.
    """

    media: _T
    original_bytes: bytes = field(repr=False)

    def __array__(self, *args, **kwargs) -> np.ndarray:
        """Allow np.array(obj) to return np.array(obj.media)."""
        return np.array(self.media, *args, **kwargs)

    def __getstate__(self):
        return self.__dict__.copy()

    def __setstate__(self, state: dict[str, Any]):
        self.__dict__.update(state)

    def __getattr__(self, name: str):
        """Delegate attribute access to the underlying media object."""
        return getattr(self.media, name)

__array__

__array__(*args, **kwargs) -> ndarray

Allow np.array(obj) to return np.array(obj.media).

Source code in vllm/multimodal/media/base.py
def __array__(self, *args, **kwargs) -> np.ndarray:
    """Allow np.array(obj) to return np.array(obj.media)."""
    return np.array(self.media, *args, **kwargs)

__getattr__

__getattr__(name: str)

Delegate attribute access to the underlying media object.

Source code in vllm/multimodal/media/base.py
def __getattr__(self, name: str):
    """Delegate attribute access to the underlying media object."""
    return getattr(self.media, name)