from typing import Union, Optional, Any
from os import PathLike
import tempfile
from io import BytesIO
import yaml
import simplejson as json
import pyreadstat
from validator_collection import validators, checkers
from spss_converter.Metadata import Metadata
import pandas
DataFrame = pandas.DataFrame
[docs]def from_dataframe(df: DataFrame,
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
metadata: Optional[Metadata] = None,
compress: bool = False):
"""Create an SPSS dataset from a `Pandas <https://pandas.pydata.org/>`_
:class:`DataFrame <pandas:DataFrame>`.
:param df: The :class:`DataFrame` to serialize to an SPSS dataset.
:type df: :class:`pandas.DataFrame <pandas:DataFrame>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param metadata: The :class:`Metadata` associated with the dataset. If
:obj:`None <python:None>`, will attempt to derive it form ``df``. Defaults to
:obj:`None <python:None>`.
:type metadata: :class:`Metadata` / :obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
:raises ValueError: if ``df`` is not a :class:`pandas.DataFrame <pandas:DataFrame>`
:raises ValueError: if ``metadata`` is not a :class:`Metadata`
"""
if not checkers.is_type(df, 'DataFrame'):
raise ValueError(f'df must be a pandas.DataFrame. Was: {df.__class__.__name__}')
if metadata and not checkers.is_type(metadata, ('Metadata',
'metadata_container',
'dict')):
raise ValueError(f'metadata must be a Metadata instance or compatible object. '
f'Was: {metadata.__class__.__name__}')
elif metadata and checkers.is_type(metadata, 'metadata_container'):
metadata = Metadata.from_pyreadstat(metadata)
elif metadata and checkers.is_type(metadata, 'dict'):
metadata = Metadata.from_dict(metadata)
is_file = False
if target and checkers.is_pathlike(target):
is_file = True
elif target:
target = validators.bytesIO(target, allow_empty = False)
if metadata:
as_pyreadstat = metadata.to_pyreadstat()
else:
as_pyreadstat = None
if target and is_file:
with open(target, 'wb') as target_file:
if as_pyreadstat:
pyreadstat.write_sav(
df = df,
dst_path = target_file,
file_label = as_pyreadstat.file_label,
column_labels = as_pyreadstat.column_labels,
compress = compress,
note = as_pyreadstat.notes,
variable_value_labels = as_pyreadstat.variable_value_labels,
missing_ranges = as_pyreadstat.missing_ranges,
variable_display_width = as_pyreadstat.variable_display_width,
variable_measure = as_pyreadstat.variable_measure
)
else:
pyreadstat.write_sav(df = df,
dst_path = target_file,
compress = compress)
else:
with tempfile.NamedTemporaryFile() as temp_file:
if as_pyreadstat:
pyreadstat.write_sav(
df = df,
dst_path = temp_file.name,
file_label = as_pyreadstat.file_label,
column_labels = as_pyreadstat.column_labels,
compress = compress,
note = as_pyreadstat.notes,
variable_value_labels = as_pyreadstat.variable_value_labels,
missing_ranges = as_pyreadstat.missing_ranges,
variable_display_width = as_pyreadstat.variable_display_width,
variable_measure = as_pyreadstat.variable_measure
)
else:
pyreadstat.write_sav(df = df,
dst_path = temp_file.name,
compress = compress)
if target:
target.write(temp_file.read())
else:
target = BytesIO(temp_file.read())
return target
[docs]def from_csv(as_csv: Union[str, 'PathLike[Any]', BytesIO],
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
compress: bool = False,
delimiter = '|',
**kwargs):
"""Convert a CSV file into an SPSS dataset.
.. tip::
If you pass any additional keyword arguments, those keyword arguments will be passed
onto the :func:`pandas.read_csv() <pandas:pandas.read_csv>` function.
:param as_csv: The CSV data that you wish to convert into an SPSS dataset.
:type as_csv: :class:`str <python:str>` / File-location /
:class:`BytesIO <python:io.BytesIO>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:param delimiter: The delimiter used between columns. Defaults to ``|``.
:type delimiter: :class:`str <python:str>`
:param kwargs: Additional keyword arguments which will be passed onto the
:func:`pandas.read_csv() <pandas:pandas.read_csv>` function.
:type kwargs: :class:`dict <python:dict>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
"""
df = pandas.read_csv(as_csv,
delimiter = delimiter,
**kwargs)
if 'Unnamed: 0' in df:
df = df.drop(['Unnamed: 0'], axis = 1)
result = from_dataframe(df,
target = target,
compress = compress)
return result
[docs]def from_dict(as_dict: dict,
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
compress: bool = False,
**kwargs):
"""Convert a :class:`dict <python:dict>` object into an SPSS dataset.
.. tip::
If you pass any additional keyword arguments, those keyword arguments will be passed
onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.
:param as_dict: The :class:`dict <python:dict>` data that you wish to convert into an
SPSS dataset.
:type as_dict: :class:`dict <python:dict>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:param kwargs: Additional keyword arguments which will be passed onto the
:meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.
:type kwargs: :class:`dict <python:dict>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
"""
df = DataFrame.from_dict(as_dict, **kwargs)
result = from_dataframe(df,
target = target,
compress = compress)
return result
[docs]def from_json(as_json: Union[str, 'PathLike[Any]', BytesIO],
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
compress: bool = False,
**kwargs):
"""Convert JSON data into an SPSS dataset.
.. tip::
If you pass any additional keyword arguments, those keyword arguments will be passed
onto the :func:`pandas.read_json() <pandas:pandas.read_json>` function.
:param as_json: The JSON data that you wish to convert into an SPSS dataset.
:type as_json: :class:`str <python:str>` / File-location /
:class:`BytesIO <python:io.BytesIO>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:param kwargs: Additional keyword arguments which will be passed onto the
:func:`pandas.read_json() <pandas:pandas.read_json>` function.
:type kwargs: :class:`dict <python:dict>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
"""
df = pandas.read_json(as_json, **kwargs)
result = from_dataframe(df,
target = target,
compress = compress)
return result
[docs]def from_yaml(as_yaml: Union[str, 'PathLike[Any]', BytesIO],
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
compress: bool = False,
**kwargs):
"""Convert YAML data into an SPSS dataset.
.. tip::
If you pass any additional keyword arguments, those keyword arguments will be passed
onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.
:param as_yaml: The YAML data that you wish to convert into an SPSS dataset.
:type as_yaml: :class:`str <python:str>` / File-location /
:class:`BytesIO <python:io.BytesIO>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:param kwargs: Additional keyword arguments which will be passed onto the
:meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.
:type kwargs: :class:`dict <python:dict>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
"""
if checkers.is_file(as_yaml) or checkers.is_bytesIO(as_yaml):
file_path = as_yaml
with open(file_path, 'rb') as yaml_file:
as_dict = yaml.safe_load(yaml_file)
else:
as_yaml = validators.string(as_yaml, allow_empty = False)
as_dict = yaml.safe_load(as_yaml)
as_json = json.dumps(as_dict)
return from_json(as_json,
target = target,
compress = compress,
**kwargs)
[docs]def from_excel(as_excel,
target: Optional[Union['PathLike[Any]', BytesIO]] = None,
compress: bool = False,
**kwargs):
"""Convert Excel data into an SPSS dataset.
.. tip::
If you pass any additional keyword arguments, those keyword arguments will be passed
onto the :func:`pandas.read_excel() <pandas:pandas.read_excel>` function.
:param as_excel: The Excel data that you wish to convert into an SPSS dataset.
:type as_excel: :class:`str <python:str>` / File-location /
:class:`BytesIO <python:io.BytesIO>` / :class:`bytes <python:bytes>` /
:class:`ExcelFile <pandas.ExcelFile>`
:param target: The target to which the SPSS dataset should be written. Accepts either
a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
:obj:`None <python:None>`. If :obj:`None <python:None>` will return a
:class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
:obj:`None <python:None>`.
:type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
:obj:`None <python:None>`
:param compress: If ``True``, will return data in the compressed ZSAV format. If
``False``, will return data in the standards SAV format. Defaults to ``False``.
:type compress: :class:`bool <python:bool>`
:param kwargs: Additional keyword arguments which will be passed onto the
:func:`pandas.read_excel() <pandas:pandas.read_excel>` function.
:type kwargs: :class:`dict <python:dict>`
:returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
``target`` is :obj:`None <python:None>` or not a filename, otherwise
:obj:`None <python:None>`
:rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`
"""
df = pandas.read_excel(as_excel, **kwargs)
if 'Unnamed: 0' in df:
df = df.drop(['Unnamed: 0'], axis = 1)
result = from_dataframe(df,
target = target,
compress = compress)
return result