Source code for scitex_pd._get_unique

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Timestamp: "2025-09-18 18:42:11 (ywatanabe)"
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/pd/_get_unique.py
# ----------------------------------------
from __future__ import annotations

import os

__FILE__ = __file__
__DIR__ = os.path.dirname(__FILE__)
# ----------------------------------------

"""
Extract unique values from DataFrame columns.
"""

from typing import Any, Optional

import pandas as pd


[docs] def get_unique( df: pd.DataFrame, column: str, default: Optional[Any] = None, raise_on_multiple: bool = False, ) -> Any: """Get value from column if it contains a unique value. Args: df: DataFrame to extract from column: Column name to check default: Default value if column doesn't exist or has multiple unique values raise_on_multiple: If True, raise ValueError when multiple unique values exist Returns: The unique value if exactly one exists, otherwise default value Examples: >>> import pandas as pd >>> df = pd.DataFrame({'patient_id': ['P01', 'P01', 'P01']}) >>> get_unique(df, 'patient_id') 'P01' >>> df = pd.DataFrame({'patient_id': ['P01', 'P02']}) >>> get_unique(df, 'patient_id', default='Unknown') 'Unknown' >>> # Raise error on multiple values >>> get_unique(df, 'patient_id', raise_on_multiple=True) ValueError: Column 'patient_id' has 2 unique values: ['P01', 'P02'] """ if column not in df.columns: if raise_on_multiple: raise KeyError(f"Column '{column}' not found in DataFrame") return default unique_values = df[column].unique() if len(unique_values) == 1: return unique_values[0] if len(unique_values) > 1 and raise_on_multiple: raise ValueError( f"Column '{column}' has {len(unique_values)} unique values: " f"{list(unique_values[:5])}" ) return default
if __name__ == "__main__": # Test the function import pandas as pd # Test case 1: Unique value df1 = pd.DataFrame({"patient_id": ["P01", "P01", "P01"]}) assert get_unique(df1, "patient_id") == "P01" print("✓ Test 1 passed: Unique value extracted") # Test case 2: Multiple values with default df2 = pd.DataFrame({"patient_id": ["P01", "P02"]}) assert get_unique(df2, "patient_id", default="Unknown") == "Unknown" print("✓ Test 2 passed: Default returned for multiple values") # Test case 3: Missing column assert get_unique(df1, "missing_col", default="N/A") == "N/A" print("✓ Test 3 passed: Default returned for missing column") # Test case 4: Raise on multiple raised = False try: get_unique(df2, "patient_id", raise_on_multiple=True) except ValueError as e: raised = True assert "has 2 unique values" in str(e) assert raised, "Should have raised ValueError" print("✓ Test 4 passed: ValueError raised for multiple values") print("\nAll tests passed!") # EOF