Source code for ds_provider_sdworx_py_lib.serde.employment_update

"""
**File:** ``employment_update.py``
**Region:** ``ds_provider_sdworx_py_lib/serde``

Description
-----------
Serializer for SD Worx EmploymentUpdateRequest XML format.
"""

from __future__ import annotations

from html import escape
from typing import Any

import pandas as pd
from ds_common_logger_py_lib import Logger
from ds_resource_plugin_py_lib.common.serde.serialize.base import DataSerializer

logger = Logger.get_logger(__name__, package=True)

# Default columns that become direct children of EmploymentUpdateRequest
DEFAULT_REQUEST_LEVEL_COLUMNS: set[str] = {"CompanyNo", "SSN", "PersonCompanyEmploymentNo"}

# Default metadata columns used for dates (excluded from FieldValues)
DEFAULT_METADATA_COLUMNS: set[str] = {"ValidFrom", "ValidUntil"}


[docs] class EmploymentUpdateSerializer(DataSerializer): """Serialize DataFrame to SD Worx EmploymentUpdateRequest XML format. Expected columns after renaming: - ``CompanyNo``, ``SSN``, ``PersonCompanyEmploymentNo``: request-level columns - ``ValidFrom``, ``ValidUntil``: date metadata columns - All other columns: become ``FieldValue`` elements Example ------- .. code-block:: python import pandas as pd from ds_provider_sdworx_py_lib.serde import EmploymentUpdateSerializer serializer = EmploymentUpdateSerializer() df = pd.DataFrame([{ "CompanyNo": "001", "SSN": "12345678901", "PersonCompanyEmploymentNo": "E001", "ValidFrom": "2024-01-01", "ValidUntil": "2024-12-31", "Salary": "50000", }]) xml_bytes = serializer(df) """ def __init__( self, *, request_level_columns: set[str] | None = None, metadata_columns: set[str] | None = None, ) -> None: self.request_level_columns = request_level_columns or DEFAULT_REQUEST_LEVEL_COLUMNS self.metadata_columns = metadata_columns or DEFAULT_METADATA_COLUMNS
[docs] def _escape_xml(self, value: Any) -> str: """Escape XML special characters in value.""" return escape(str(value), quote=True)
[docs] def _build_xml_element(self, row: pd.Series[Any]) -> list[str]: """Build EmploymentUpdateRequest XML element for a single row.""" lines: list[str] = [] lines.append("\t<EmploymentUpdateRequest>") # Request-level columns for col in self.request_level_columns: value = row.get(col) if pd.notna(value) and value != "": lines.append(f"\t\t<{col}>{self._escape_xml(value)}</{col}>") # Fields container lines.append("\t\t<Fields>") # Get metadata values valid_from = self._escape_xml(row.get("ValidFrom", "") or "") valid_until = self._escape_xml(row.get("ValidUntil", "") or "") # FieldValue for each data column skip_cols = self.request_level_columns | self.metadata_columns for col_key, value in row.items(): col = str(col_key) if col in skip_cols: continue if pd.isna(value) or value == "": continue escaped_col = self._escape_xml(col) escaped_value = self._escape_xml(value) lines.append(f'\t\t\t<FieldValue Name="{escaped_col}">') lines.append(f'\t\t\t\t<ValidFrom Format="yyyy-MM-dd">{valid_from}</ValidFrom>') if valid_until: lines.append(f'\t\t\t\t<ValidUntil Format="yyyy-MM-dd">{valid_until}</ValidUntil>') else: lines.append('\t\t\t\t<ValidUntil Format="yyyy-MM-dd"/>') lines.append(f"\t\t\t\t<Value>{escaped_value}</Value>") lines.append(f'\t\t\t\t<Modified Format="yyyy-MM-dd">{valid_from}</Modified>') lines.append("\t\t\t\t<ModifiedBy/>") lines.append("\t\t\t</FieldValue>") lines.append("\t\t</Fields>") lines.append("\t</EmploymentUpdateRequest>") return lines
[docs] def __call__(self, obj: pd.DataFrame, **_kwargs: Any) -> bytes: """Serialize DataFrame to EmploymentUpdateRequest XML bytes. Each row becomes an ``EmploymentUpdateRequest`` element within a single ``EmploymentUpdateRequests`` root document. Args: obj: DataFrame with columns to serialize. **kwargs: Additional keyword arguments (ignored). Returns: XML content as bytes (UTF-8 encoded). """ lines = ['<?xml version="1.0" encoding="utf-8"?>'] lines.append('<EmploymentUpdateRequests xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"') lines.append(' xmlns:xsd="http://www.w3.org/2001/XMLSchema">') for _, row in obj.iterrows(): lines.extend(self._build_xml_element(row)) lines.append("</EmploymentUpdateRequests>") logger.info("Serialized %d rows to EmploymentUpdateRequest XML", len(obj)) return "\n".join(lines).encode("utf-8")