Source code for scitex_pd._melt_cols

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Time-stamp: "2024-10-05 23:04:16 (ywatanabe)"
# /home/ywatanabe/proj/_scitex_repo_openhands/src/scitex/pd/_melt_cols.py


#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Time-stamp: "2024-10-05 23:03:39 (ywatanabe)"
# /home/ywatanabe/proj/_scitex_repo_openhands/src/scitex/pd/_melt_cols.py

from typing import List, Optional

import pandas as pd


[docs] def melt_cols( df: pd.DataFrame, cols: List[str], id_columns: Optional[List[str]] = None ) -> pd.DataFrame: """ Melt specified columns while preserving links to other data in a DataFrame. Example ------- >>> data = pd.DataFrame({ ... 'id': [1, 2, 3], ... 'name': ['Alice', 'Bob', 'Charlie'], ... 'score_1': [85, 90, 78], ... 'score_2': [92, 88, 95] ... }) >>> melted = melt_cols(data, cols=['score_1', 'score_2']) >>> print(melted) id name variable value 0 1 Alice score_1 85 1 2 Bob score_1 90 2 3 Charlie score_1 78 3 1 Alice score_2 92 4 2 Bob score_2 88 5 3 Charlie score_2 95 Parameters ---------- df : pd.DataFrame Input DataFrame cols : List[str] Columns to be melted id_columns : Optional[List[str]], default None Columns to preserve as identifiers. If None, all columns not in 'cols' are used. Returns ------- pd.DataFrame Melted DataFrame with preserved identifier columns Raises ------ ValueError If cols are not present in the DataFrame """ missing_melt = set(cols) - set(df.columns) if missing_melt: raise ValueError(f"Columns not found in DataFrame: {missing_melt}") if id_columns is None: id_columns = [col for col in df.columns if col not in cols] df_copy = df.reset_index(drop=True) df_copy["global_index"] = df_copy.index # Use a different value_name if "value" is one of the columns being melted value_name = "value" if "value" not in cols else "melted_value" melted_df = df_copy[cols + ["global_index"]].melt( id_vars=["global_index"], value_name=value_name ) if id_columns: formatted_df = melted_df.merge( df_copy[id_columns + ["global_index"]], on="global_index" ) return formatted_df.drop("global_index", axis=1) else: # No id columns to merge, just return melted data without global_index return melted_df.drop("global_index", axis=1)