"""Read write HDF."""
import h5py
import pandas
import numpy
from typing import Type, Union, List
[docs]def hdf2mat(filepath: str, dtype: Type = float) -> pandas.DataFrame:
    """Read hdf generated by hdfrw.R mat2hdf function to a data frame.
    Note that due to how python and R handles data differently, colnames are for index and rownames are for columns,
    and the matrix is also tacitly transposed.
    :param filepath: path of hdf file
    :param dtype: type of data; default is float
    :return: a pandas data frame
    """
    with h5py.File(filepath, 'r') as f:
        df = pandas.DataFrame(f['matrix'][:], dtype=dtype, copy=False)
        if 'colnames' in f.keys():
            df.index = [x.decode() for x in f['colnames'][:]]
        if 'rownames' in f.keys():
            df.columns = [x.decode() for x in f['rownames'][:]]
    return df 
[docs]def mat2hdf(data: Union[pandas.DataFrame, numpy.array, List[str]], filepath: str) -> None:
    """Write dataframe to an hdf file which can be read by hdfrw.R hdf2mat function.
    :param data: data frame or numpy array to be written
    :param filepath: path of hdf file to be written
    :return: None
    """
    with h5py.File(filepath, 'w') as f:
        if type(data) is pandas.DataFrame:
            f['matrix'] = data.values
            f['colnames'] = [x.encode('ASCII') for x in data.index.tolist()]
            f['rownames'] = [x.encode('ASCII') for x in data.columns.tolist()]
        elif type(data) is numpy.ndarray:
            f['matrix'] = data
        elif type(data) is list:
            f['matrix'] = [x.encode('ASCII') for x in data]
        else:
            raise TypeError("only pandas.DataFrame and numpy.ndarray are supported.")