Source code for zvec_db.rerankers.utils.base_utils

"""Common utilities for rerankers in zvec-db.

This module provides shared functionality used across multiple reranker
implementations, including score extraction, document text extraction,
and other common operations.
"""

from __future__ import annotations

from typing import Optional

from zvec.model.doc import Doc

# Sentinel value to distinguish "not provided" from "explicitly None"
_UNSET = object()


[docs] def extract_score(doc: Doc) -> float: """Extract score from a document, handling various numeric types. Args: doc (Doc): Document with a score attribute. Returns: float: Score as a float, or 0.0 if score is None or invalid. Example: >>> doc = Doc(id="1", score=0.8) >>> extract_score(doc) 0.8 >>> doc_no_score = Doc(id="2", score=None) >>> extract_score(doc_no_score) 0.0 """ score = doc.score if score is None: return 0.0 try: return float(score) except (TypeError, ValueError): return 0.0
[docs] def extract_field_score(doc: Doc, field_name: str) -> float: """Extract score from a specific document field. Args: doc (Doc): Document with fields attribute. field_name (str): Name of the field to extract score from. Returns: float: Field score as a float, or 0.0 if field is missing or non-numeric. Example: >>> doc = Doc(id="1", fields={"title_score": 0.9, "content_score": 0.7}) >>> extract_field_score(doc, "title_score") 0.9 >>> extract_field_score(doc, "missing_field") 0.0 """ fields = getattr(doc, "fields", None) if not fields: return 0.0 field_value = fields.get(field_name) if field_value is None: return 0.0 try: return float(field_value) except (TypeError, ValueError): return 0.0
[docs] def get_document_text(doc: Doc, rerank_field: Optional[str] = None) -> str: """Extract document text for scoring or embedding. This function attempts to extract text content from a document using the following strategy: 1. If ``rerank_field`` is specified and the document has that field, use it. 2. Otherwise, try common field names: "content", "text", "body", "passage". 3. If no field matches, concatenate all fields. 4. As a last resort, return the document ID as a string. Args: doc (Doc): Document to extract text from. rerank_field (Optional[str]): Specific field name to use. If None, uses the fallback strategy. Defaults to None. Returns: str: Extracted document text. Example: >>> doc = Doc(id="1", fields={"content": "Hello world", "title": "Test"}) >>> get_document_text(doc) 'Hello world' >>> get_document_text(doc, rerank_field="title") 'Test' """ if rerank_field and doc.has_field(rerank_field): return str(doc.field(rerank_field)) # Try common field names for field_name in ["content", "text", "body", "passage"]: if doc.has_field(field_name): return str(doc.field(field_name)) # Fallback: use all fields concatenated if doc.fields: return " ".join(str(v) for v in doc.fields.values()) return str(doc.id)