Skip to content

vllm.entrypoints.score_utils

ScoreContentPartParam module-attribute

ScoreContentPartParam: TypeAlias = Union[
    ChatCompletionContentPartImageParam,
    ChatCompletionContentPartImageEmbedsParam,
]

ScoreMultiModalParam

Bases: TypedDict

A specialized parameter type for scoring multimodal content

The reasons why don't reuse CustomChatCompletionMessageParam directly: 1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions 2. Including chat-specific fields would confuse users about their purpose in scoring 3. This is a more focused interface that only exposes what's needed for scoring

Source code in vllm/entrypoints/score_utils.py
class ScoreMultiModalParam(TypedDict, total=False):
    """
    A specialized parameter type for scoring multimodal content

    The reasons why don't reuse `CustomChatCompletionMessageParam` directly:
    1. Score tasks don't need the 'role' field (user/assistant/system) that's required in chat completions
    2. Including chat-specific fields would confuse users about their purpose in scoring
    3. This is a more focused interface that only exposes what's needed for scoring
    """ # noqa: E501
    content: Required[list[ScoreContentPartParam]]
    """The multimodal contents"""

content instance-attribute

The multimodal contents

_cosine_similarity

_cosine_similarity(
    tokenizer: Union[
        PreTrainedTokenizer, PreTrainedTokenizerFast
    ],
    embed_1: list[PoolingRequestOutput],
    embed_2: list[PoolingRequestOutput],
) -> list[PoolingRequestOutput]
Source code in vllm/entrypoints/score_utils.py
def _cosine_similarity(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    embed_1: list[PoolingRequestOutput],
    embed_2: list[PoolingRequestOutput],
) -> list[PoolingRequestOutput]:

    scorer = CosineSimilarity(0)
    scores: Union[list[PoolingRequestOutput]] = []

    for emb_1, emb_2 in zip(embed_1, embed_2):
        pair_score = scorer(emb_1.outputs.data, emb_2.outputs.data)

        padding = []
        if (pad_token_id := getattr(tokenizer, "pad_token_id",
                                    None)) is not None:
            padding = [pad_token_id]

        tokens = emb_1.prompt_token_ids + padding + emb_2.prompt_token_ids

        scores.append(
            PoolingRequestOutput(
                request_id=f"{emb_1.request_id}_{emb_2.request_id}",
                outputs=pair_score,
                prompt_token_ids=tokens,
                finished=True))

    return scores

_parse_score_content

_parse_score_content(
    data: Union[str, ScoreContentPartParam],
    mm_tracker: BaseMultiModalItemTracker,
) -> Optional[_ContentPart]
Source code in vllm/entrypoints/score_utils.py
def _parse_score_content(
    data: Union[str, ScoreContentPartParam],
    mm_tracker: BaseMultiModalItemTracker,
) -> Optional[_ContentPart]:

    if isinstance(data, str):
        data = ChatCompletionContentPartTextParam(type="text", text=data)

    mm_parser = mm_tracker.create_parser()

    parse_res = _parse_chat_message_content_part(
        data,
        mm_parser,
        wrap_dicts=False,
        interleave_strings=False,
    )

    if parse_res:
        return parse_res

    mm_placeholder_storage = mm_parser.mm_placeholder_storage()

    if len(mm_placeholder_storage) != 1 or len(
            next(iter(mm_placeholder_storage.values()))) != 1:
        raise ValueError("Only one multi-modal item is supported")

    return next(iter(mm_placeholder_storage.values()))[0]

_validate_score_input_lens

_validate_score_input_lens(
    data_1: Union[list[str], list[ScoreContentPartParam]],
    data_2: Union[list[str], list[ScoreContentPartParam]],
)
Source code in vllm/entrypoints/score_utils.py
def _validate_score_input_lens(
    data_1: Union[list[str], list[ScoreContentPartParam]],
    data_2: Union[list[str], list[ScoreContentPartParam]],
):
    len_1 = len(data_1)
    len_2 = len(data_2)

    if len_1 > 1 and len_1 != len_2:
        raise ValueError("Input lengths must be either 1:1, 1:N or N:N")
    if len_1 == 0:
        raise ValueError("At least one text element must be given")
    if len_2 == 0:
        raise ValueError("At least one text_pair element must be given")

apply_score_template

apply_score_template(
    model_config: ModelConfig, prompt_1: str, prompt_2: str
) -> str
Source code in vllm/entrypoints/score_utils.py
def apply_score_template(
    model_config: ModelConfig,
    prompt_1: str,
    prompt_2: str,
) -> str:
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls

    model = get_model_cls(model_config)
    if supports_score_template(model):
        full_prompt = model.get_score_template(prompt_1, prompt_2)
        if full_prompt is None:
            raise ValueError("Get empty score template from model")
        return full_prompt

    raise ValueError(
        f"Unsupported model architecture: {model_config.architecture}")

get_score_prompt

get_score_prompt(
    model_config: ModelConfig,
    tokenizer: AnyTokenizer,
    tokenization_kwargs: dict[str, Any],
    data_1: Union[str, ScoreContentPartParam],
    data_2: Union[str, ScoreContentPartParam],
) -> tuple[str, TokensPrompt]
Source code in vllm/entrypoints/score_utils.py
def get_score_prompt(
    model_config: ModelConfig,
    tokenizer: AnyTokenizer,
    tokenization_kwargs: dict[str, Any],
    data_1: Union[str, ScoreContentPartParam],
    data_2: Union[str, ScoreContentPartParam],
) -> tuple[str, TokensPrompt]:
    prompt_1, prompt_2, mm_data = parse_score_data(
        data_1,
        data_2,
        model_config,
        tokenizer,
    )

    full_prompt = apply_score_template(model_config, prompt_1, prompt_2)

    prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)

    engine_prompt = TokensPrompt(prompt_token_ids=prompt_inputs["input_ids"])

    post_process_tokens(model_config, engine_prompt)

    if mm_data is not None:
        engine_prompt["multi_modal_data"] = mm_data
    return full_prompt, engine_prompt

parse_score_data

parse_score_data(
    data_1: Union[str, ScoreContentPartParam],
    data_2: Union[str, ScoreContentPartParam],
    model_config: ModelConfig,
    tokenizer: AnyTokenizer,
) -> tuple[str, str, Optional[MultiModalDataDict]]
Source code in vllm/entrypoints/score_utils.py
def parse_score_data(
    data_1: Union[str, ScoreContentPartParam],
    data_2: Union[str, ScoreContentPartParam],
    model_config: ModelConfig,
    tokenizer: AnyTokenizer,
) -> tuple[str, str, Optional[MultiModalDataDict]]:
    mm_tracker = MultiModalItemTracker(model_config, tokenizer)

    content_1 = _parse_score_content(data_1, mm_tracker)

    content_2 = _parse_score_content(data_2, mm_tracker)

    def ensure_str(content: Optional[_ContentPart]) -> str:
        if content is not None and isinstance(content, str):
            return cast(str, content)
        else:
            raise ValueError(
                f"Only string content is supported, but got {content}.")

    prompt_1 = ensure_str(content_1)
    prompt_2 = ensure_str(content_2)

    return prompt_1, prompt_2, mm_tracker.all_mm_data()

post_process_tokens

post_process_tokens(
    model_config: ModelConfig, prompt: TokensPrompt
) -> None

Perform architecture-specific manipulations on the input tokens.

Note

This is an in-place operation.

Source code in vllm/entrypoints/score_utils.py
def post_process_tokens(
    model_config: ModelConfig,
    prompt: TokensPrompt,
) -> None:
    """
    Perform architecture-specific manipulations on the input tokens.

    Note:
        This is an in-place operation.
    """
    # NOTE(Simon): lazy import to avoid bring in all dependencies (e.g. gguf)
    from vllm.model_executor.model_loader import get_model_cls

    model = get_model_cls(model_config)
    if supports_score_template(model):
        model.post_process_tokens(prompt)