Source code for ds_resource_plugin_py_lib.common.serde.serialize.pandas
"""
**File:** ``pandas.py``
**Region:** ``ds_resource_plugin_py_lib/common/serde/serialize``
Description
-----------
Serialize a pandas DataFrame into a value.
Example
-------
.. code-block:: python
import pandas as pd
from ds_resource_plugin_py_lib.common.resource.dataset.storage_format import DatasetStorageFormatType
from ds_resource_plugin_py_lib.common.serde.serialize.pandas import PandasSerializer
df = pd.DataFrame({"a": [1, 2], "b": ["x", "y"]})
serializer = PandasSerializer(format=DatasetStorageFormatType.CSV)
csv_text = serializer(df, index=False)
"""
from dataclasses import dataclass, field
from typing import Any
import pandas as pd
from ds_common_logger_py_lib import Logger
from ...resource.dataset.storage_format import DatasetStorageFormatType
from .base import DataSerializer
logger = Logger.get_logger(__name__, package=True)
[docs]
@dataclass(kw_only=True)
class PandasSerializer(DataSerializer):
format: DatasetStorageFormatType
kwargs: dict[str, Any] = field(default_factory=dict)
[docs]
def __call__(self, obj: Any, **_kwargs: Any) -> Any:
"""
Serialize a pandas DataFrame into a value.
Args:
obj: The pandas DataFrame to serialize.
**kwargs: Additional keyword arguments.
Returns:
A value.
"""
logger.debug(f"PandasSerializer __call__ with format: {self.format} and args: {self.kwargs}")
if not isinstance(obj, pd.DataFrame):
raise TypeError(f"Expected pd.DataFrame, got {type(obj)}")
value = obj
default_float_format = "%.2f"
def _ensure_float_format() -> None:
if "float_format" not in self.kwargs:
self.kwargs["float_format"] = default_float_format
if self.format == DatasetStorageFormatType.CSV:
_ensure_float_format()
return value.to_csv(**self.kwargs)
elif self.format == DatasetStorageFormatType.PARQUET:
return value.to_parquet(**self.kwargs)
elif self.format in (
DatasetStorageFormatType.JSON,
DatasetStorageFormatType.SEMI_STRUCTURED_JSON,
):
return value.to_json(**self.kwargs)
elif self.format == DatasetStorageFormatType.EXCEL:
_ensure_float_format()
return value.to_excel(**self.kwargs)
elif self.format == DatasetStorageFormatType.XML:
return value.to_xml(**self.kwargs)
else:
raise ValueError(f"Unsupported format: {self.format}")