Source code for spss_converter.write

from typing import Union, Optional, Any
from os import PathLike
import tempfile

from io import BytesIO
import yaml
import simplejson as json
import pyreadstat
from validator_collection import validators, checkers
from spss_converter.Metadata import Metadata
import pandas

DataFrame = pandas.DataFrame


[docs]def from_dataframe(df: DataFrame, target: Optional[Union['PathLike[Any]', BytesIO]] = None, metadata: Optional[Metadata] = None, compress: bool = False): """Create an SPSS dataset from a `Pandas <https://pandas.pydata.org/>`_ :class:`DataFrame <pandas:DataFrame>`. :param df: The :class:`DataFrame` to serialize to an SPSS dataset. :type df: :class:`pandas.DataFrame <pandas:DataFrame>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param metadata: The :class:`Metadata` associated with the dataset. If :obj:`None <python:None>`, will attempt to derive it form ``df``. Defaults to :obj:`None <python:None>`. :type metadata: :class:`Metadata` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` :raises ValueError: if ``df`` is not a :class:`pandas.DataFrame <pandas:DataFrame>` :raises ValueError: if ``metadata`` is not a :class:`Metadata` """ if not checkers.is_type(df, 'DataFrame'): raise ValueError(f'df must be a pandas.DataFrame. Was: {df.__class__.__name__}') if metadata and not checkers.is_type(metadata, ('Metadata', 'metadata_container', 'dict')): raise ValueError(f'metadata must be a Metadata instance or compatible object. ' f'Was: {metadata.__class__.__name__}') elif metadata and checkers.is_type(metadata, 'metadata_container'): metadata = Metadata.from_pyreadstat(metadata) elif metadata and checkers.is_type(metadata, 'dict'): metadata = Metadata.from_dict(metadata) is_file = False if target and checkers.is_pathlike(target): is_file = True elif target: target = validators.bytesIO(target, allow_empty = False) if metadata: as_pyreadstat = metadata.to_pyreadstat() else: as_pyreadstat = None if target and is_file: with open(target, 'wb') as target_file: if as_pyreadstat: pyreadstat.write_sav( df = df, dst_path = target_file, file_label = as_pyreadstat.file_label, column_labels = as_pyreadstat.column_labels, compress = compress, note = as_pyreadstat.notes, variable_value_labels = as_pyreadstat.variable_value_labels, missing_ranges = as_pyreadstat.missing_ranges, variable_display_width = as_pyreadstat.variable_display_width, variable_measure = as_pyreadstat.variable_measure ) else: pyreadstat.write_sav(df = df, dst_path = target_file, compress = compress) else: with tempfile.NamedTemporaryFile() as temp_file: if as_pyreadstat: pyreadstat.write_sav( df = df, dst_path = temp_file.name, file_label = as_pyreadstat.file_label, column_labels = as_pyreadstat.column_labels, compress = compress, note = as_pyreadstat.notes, variable_value_labels = as_pyreadstat.variable_value_labels, missing_ranges = as_pyreadstat.missing_ranges, variable_display_width = as_pyreadstat.variable_display_width, variable_measure = as_pyreadstat.variable_measure ) else: pyreadstat.write_sav(df = df, dst_path = temp_file.name, compress = compress) if target: target.write(temp_file.read()) else: target = BytesIO(temp_file.read()) return target
[docs]def from_csv(as_csv: Union[str, 'PathLike[Any]', BytesIO], target: Optional[Union['PathLike[Any]', BytesIO]] = None, compress: bool = False, delimiter = '|', **kwargs): """Convert a CSV file into an SPSS dataset. .. tip:: If you pass any additional keyword arguments, those keyword arguments will be passed onto the :func:`pandas.read_csv() <pandas:pandas.read_csv>` function. :param as_csv: The CSV data that you wish to convert into an SPSS dataset. :type as_csv: :class:`str <python:str>` / File-location / :class:`BytesIO <python:io.BytesIO>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :param delimiter: The delimiter used between columns. Defaults to ``|``. :type delimiter: :class:`str <python:str>` :param kwargs: Additional keyword arguments which will be passed onto the :func:`pandas.read_csv() <pandas:pandas.read_csv>` function. :type kwargs: :class:`dict <python:dict>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` """ df = pandas.read_csv(as_csv, delimiter = delimiter, **kwargs) if 'Unnamed: 0' in df: df = df.drop(['Unnamed: 0'], axis = 1) result = from_dataframe(df, target = target, compress = compress) return result
[docs]def from_dict(as_dict: dict, target: Optional[Union['PathLike[Any]', BytesIO]] = None, compress: bool = False, **kwargs): """Convert a :class:`dict <python:dict>` object into an SPSS dataset. .. tip:: If you pass any additional keyword arguments, those keyword arguments will be passed onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method. :param as_dict: The :class:`dict <python:dict>` data that you wish to convert into an SPSS dataset. :type as_dict: :class:`dict <python:dict>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :param kwargs: Additional keyword arguments which will be passed onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method. :type kwargs: :class:`dict <python:dict>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` """ df = DataFrame.from_dict(as_dict, **kwargs) result = from_dataframe(df, target = target, compress = compress) return result
[docs]def from_json(as_json: Union[str, 'PathLike[Any]', BytesIO], target: Optional[Union['PathLike[Any]', BytesIO]] = None, compress: bool = False, **kwargs): """Convert JSON data into an SPSS dataset. .. tip:: If you pass any additional keyword arguments, those keyword arguments will be passed onto the :func:`pandas.read_json() <pandas:pandas.read_json>` function. :param as_json: The JSON data that you wish to convert into an SPSS dataset. :type as_json: :class:`str <python:str>` / File-location / :class:`BytesIO <python:io.BytesIO>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :param kwargs: Additional keyword arguments which will be passed onto the :func:`pandas.read_json() <pandas:pandas.read_json>` function. :type kwargs: :class:`dict <python:dict>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` """ df = pandas.read_json(as_json, **kwargs) result = from_dataframe(df, target = target, compress = compress) return result
[docs]def from_yaml(as_yaml: Union[str, 'PathLike[Any]', BytesIO], target: Optional[Union['PathLike[Any]', BytesIO]] = None, compress: bool = False, **kwargs): """Convert YAML data into an SPSS dataset. .. tip:: If you pass any additional keyword arguments, those keyword arguments will be passed onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method. :param as_yaml: The YAML data that you wish to convert into an SPSS dataset. :type as_yaml: :class:`str <python:str>` / File-location / :class:`BytesIO <python:io.BytesIO>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :param kwargs: Additional keyword arguments which will be passed onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method. :type kwargs: :class:`dict <python:dict>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` """ if checkers.is_file(as_yaml) or checkers.is_bytesIO(as_yaml): file_path = as_yaml with open(file_path, 'rb') as yaml_file: as_dict = yaml.safe_load(yaml_file) else: as_yaml = validators.string(as_yaml, allow_empty = False) as_dict = yaml.safe_load(as_yaml) as_json = json.dumps(as_dict) return from_json(as_json, target = target, compress = compress, **kwargs)
[docs]def from_excel(as_excel, target: Optional[Union['PathLike[Any]', BytesIO]] = None, compress: bool = False, **kwargs): """Convert Excel data into an SPSS dataset. .. tip:: If you pass any additional keyword arguments, those keyword arguments will be passed onto the :func:`pandas.read_excel() <pandas:pandas.read_excel>` function. :param as_excel: The Excel data that you wish to convert into an SPSS dataset. :type as_excel: :class:`str <python:str>` / File-location / :class:`BytesIO <python:io.BytesIO>` / :class:`bytes <python:bytes>` / :class:`ExcelFile <pandas.ExcelFile>` :param target: The target to which the SPSS dataset should be written. Accepts either a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or :obj:`None <python:None>`. If :obj:`None <python:None>` will return a :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to :obj:`None <python:None>`. :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` / :obj:`None <python:None>` :param compress: If ``True``, will return data in the compressed ZSAV format. If ``False``, will return data in the standards SAV format. Defaults to ``False``. :type compress: :class:`bool <python:bool>` :param kwargs: Additional keyword arguments which will be passed onto the :func:`pandas.read_excel() <pandas:pandas.read_excel>` function. :type kwargs: :class:`dict <python:dict>` :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if ``target`` is :obj:`None <python:None>` or not a filename, otherwise :obj:`None <python:None>` :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>` """ df = pandas.read_excel(as_excel, **kwargs) if 'Unnamed: 0' in df: df = df.drop(['Unnamed: 0'], axis = 1) result = from_dataframe(df, target = target, compress = compress) return result
[docs]def apply_metadata(df: DataFrame, metadata: Union[Metadata, dict, pyreadstat.metadata_container], as_category: bool = True): """Updates the :class:`DataFrame <pandas:DataFrame>` ``df`` based on the ``metadata``. :param df: The :class:`DataFrame <pandas:pandas.DataFrame>` to update. :type df: :class:`pandas.DataFrame <pandas:pandas.DataFrame>` :param metadata: The :class:`Metadata` to apply to ``df``. :type metadata: :class:`Metadata`, :class:`pyreadstat.metadata_container`, or compatible :class:`dict <python:dict>` :param as_category: if ``True``, will variables with formats will be transformed into categories in the :class:`DataFrame <pandas:pandas.DataFrame>`. Defaults to ``True``. :type as_category: :class:`bool <python:bool>` :returns: A copy of ``df`` updated to reflect ``metadata``. :rtype: :class:`DataFrame <pandas:pandas.DataFrame>` """ if not checkers.is_type(df, 'DataFrame'): raise ValueError(f'df must be a pandas.DataFrame. Was: {df.__class__.__name__}') if not checkers.is_type(metadata, ('Metadata', 'metadata_container', 'dict')): raise ValueError(f'metadata must be a Metadata instance or compatible object. ' f'Was: {metadata.__class__.__name__}') elif checkers.is_type(metadata, 'metadata_container'): metadata = Metadata.from_pyreadstat(metadata) elif checkers.is_type(metadata, 'dict'): metadata = Metadata.from_dict(metadata) as_pyreadstat = metadata.to_pyreadstat() return pyreadstat.set_value_labels(df, metadata = as_pyreadstat.value_labels, formats_as_category = as_category)