Source code for ds_provider_sdworx_py_lib.serde.employment_update
"""
**File:** ``employment_update.py``
**Region:** ``ds_provider_sdworx_py_lib/serde``
Description
-----------
Serializer for SD Worx EmploymentUpdateRequest XML format.
"""
from __future__ import annotations
from html import escape
from typing import Any
import pandas as pd
from ds_common_logger_py_lib import Logger
from ds_resource_plugin_py_lib.common.serde.serialize.base import DataSerializer
logger = Logger.get_logger(__name__, package=True)
# Default columns that become direct children of EmploymentUpdateRequest
DEFAULT_REQUEST_LEVEL_COLUMNS: set[str] = {"CompanyNo", "SSN", "PersonCompanyEmploymentNo"}
# Default metadata columns used for dates (excluded from FieldValues)
DEFAULT_METADATA_COLUMNS: set[str] = {"ValidFrom", "ValidUntil"}
[docs]
class EmploymentUpdateSerializer(DataSerializer):
"""Serialize DataFrame to SD Worx EmploymentUpdateRequest XML format.
Expected columns after renaming:
- ``CompanyNo``, ``SSN``, ``PersonCompanyEmploymentNo``: request-level columns
- ``ValidFrom``, ``ValidUntil``: date metadata columns
- All other columns: become ``FieldValue`` elements
Example
-------
.. code-block:: python
import pandas as pd
from ds_provider_sdworx_py_lib.serde import EmploymentUpdateSerializer
serializer = EmploymentUpdateSerializer()
df = pd.DataFrame([{
"CompanyNo": "001",
"SSN": "12345678901",
"PersonCompanyEmploymentNo": "E001",
"ValidFrom": "2024-01-01",
"ValidUntil": "2024-12-31",
"Salary": "50000",
}])
xml_bytes = serializer(df)
"""
def __init__(
self,
*,
request_level_columns: set[str] | None = None,
metadata_columns: set[str] | None = None,
) -> None:
self.request_level_columns = request_level_columns or DEFAULT_REQUEST_LEVEL_COLUMNS
self.metadata_columns = metadata_columns or DEFAULT_METADATA_COLUMNS
[docs]
def _escape_xml(self, value: Any) -> str:
"""Escape XML special characters in value."""
return escape(str(value), quote=True)
[docs]
def _build_xml_element(self, row: pd.Series[Any]) -> list[str]:
"""Build EmploymentUpdateRequest XML element for a single row."""
lines: list[str] = []
lines.append("\t<EmploymentUpdateRequest>")
# Request-level columns
for col in self.request_level_columns:
value = row.get(col)
if pd.notna(value) and value != "":
lines.append(f"\t\t<{col}>{self._escape_xml(value)}</{col}>")
# Fields container
lines.append("\t\t<Fields>")
# Get metadata values
valid_from = self._escape_xml(row.get("ValidFrom", "") or "")
valid_until = self._escape_xml(row.get("ValidUntil", "") or "")
# FieldValue for each data column
skip_cols = self.request_level_columns | self.metadata_columns
for col_key, value in row.items():
col = str(col_key)
if col in skip_cols:
continue
if pd.isna(value) or value == "":
continue
escaped_col = self._escape_xml(col)
escaped_value = self._escape_xml(value)
lines.append(f'\t\t\t<FieldValue Name="{escaped_col}">')
lines.append(f'\t\t\t\t<ValidFrom Format="yyyy-MM-dd">{valid_from}</ValidFrom>')
if valid_until:
lines.append(f'\t\t\t\t<ValidUntil Format="yyyy-MM-dd">{valid_until}</ValidUntil>')
else:
lines.append('\t\t\t\t<ValidUntil Format="yyyy-MM-dd"/>')
lines.append(f"\t\t\t\t<Value>{escaped_value}</Value>")
lines.append(f'\t\t\t\t<Modified Format="yyyy-MM-dd">{valid_from}</Modified>')
lines.append("\t\t\t\t<ModifiedBy/>")
lines.append("\t\t\t</FieldValue>")
lines.append("\t\t</Fields>")
lines.append("\t</EmploymentUpdateRequest>")
return lines
[docs]
def __call__(self, obj: pd.DataFrame, **_kwargs: Any) -> bytes:
"""Serialize DataFrame to EmploymentUpdateRequest XML bytes.
Each row becomes an ``EmploymentUpdateRequest`` element within a single
``EmploymentUpdateRequests`` root document.
Args:
obj: DataFrame with columns to serialize.
**kwargs: Additional keyword arguments (ignored).
Returns:
XML content as bytes (UTF-8 encoded).
"""
lines = ['<?xml version="1.0" encoding="utf-8"?>']
lines.append('<EmploymentUpdateRequests xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"')
lines.append(' xmlns:xsd="http://www.w3.org/2001/XMLSchema">')
for _, row in obj.iterrows():
lines.extend(self._build_xml_element(row))
lines.append("</EmploymentUpdateRequests>")
logger.info("Serialized %d rows to EmploymentUpdateRequest XML", len(obj))
return "\n".join(lines).encode("utf-8")