Source code for zvec_db.rerankers.utils.base_utils
"""Common utilities for rerankers in zvec-db.
This module provides shared functionality used across multiple reranker
implementations, including score extraction, document text extraction,
and other common operations.
"""
from __future__ import annotations
from typing import Optional
from zvec.model.doc import Doc
# Sentinel value to distinguish "not provided" from "explicitly None"
_UNSET = object()
[docs]
def extract_score(doc: Doc) -> float:
"""Extract score from a document, handling various numeric types.
Args:
doc (Doc): Document with a score attribute.
Returns:
float: Score as a float, or 0.0 if score is None or invalid.
Example:
>>> doc = Doc(id="1", score=0.8)
>>> extract_score(doc)
0.8
>>> doc_no_score = Doc(id="2", score=None)
>>> extract_score(doc_no_score)
0.0
"""
score = doc.score
if score is None:
return 0.0
try:
return float(score)
except (TypeError, ValueError):
return 0.0
[docs]
def extract_field_score(doc: Doc, field_name: str) -> float:
"""Extract score from a specific document field.
Args:
doc (Doc): Document with fields attribute.
field_name (str): Name of the field to extract score from.
Returns:
float: Field score as a float, or 0.0 if field is missing or non-numeric.
Example:
>>> doc = Doc(id="1", fields={"title_score": 0.9, "content_score": 0.7})
>>> extract_field_score(doc, "title_score")
0.9
>>> extract_field_score(doc, "missing_field")
0.0
"""
fields = getattr(doc, "fields", None)
if not fields:
return 0.0
field_value = fields.get(field_name)
if field_value is None:
return 0.0
try:
return float(field_value)
except (TypeError, ValueError):
return 0.0
[docs]
def get_document_text(doc: Doc, rerank_field: Optional[str] = None) -> str:
"""Extract document text for scoring or embedding.
This function attempts to extract text content from a document using
the following strategy:
1. If ``rerank_field`` is specified and the document has that field, use it.
2. Otherwise, try common field names: "content", "text", "body", "passage".
3. If no field matches, concatenate all fields.
4. As a last resort, return the document ID as a string.
Args:
doc (Doc): Document to extract text from.
rerank_field (Optional[str]): Specific field name to use. If None,
uses the fallback strategy. Defaults to None.
Returns:
str: Extracted document text.
Example:
>>> doc = Doc(id="1", fields={"content": "Hello world", "title": "Test"})
>>> get_document_text(doc)
'Hello world'
>>> get_document_text(doc, rerank_field="title")
'Test'
"""
if rerank_field and doc.has_field(rerank_field):
return str(doc.field(rerank_field))
# Try common field names
for field_name in ["content", "text", "body", "passage"]:
if doc.has_field(field_name):
return str(doc.field(field_name))
# Fallback: use all fields concatenated
if doc.fields:
return " ".join(str(v) for v in doc.fields.values())
return str(doc.id)