Source code for mcglm.dependencies

"""
An extension of MCGLM library to provide three options for matrix linear 
predictor: mc_id, mc_ma, and mc_mixed.
"""
import numpy as np
import itertools

from patsy import dmatrix
from mcglm.utils import diagonal
from itertools import combinations



[docs]
def mc_id(data=None):
    """
    mc_id method retrieves a numpy diagonal matrix with data length of the
    original matrix
    """
    size = data.shape[0]
    return diagonal(size, np.ones(size))




[docs]
def mc_ma(id=None, time=None, data=None, order=1):
    """
    mc_ma method retrieves the Z components for matrix linear predictor
    associated with Autoregressive models(Feller, W. (1957). An introduction
    to probability theory and its applications / William Feller.
    Wiley New York, 2nd ed. edition.).

    To ilustrate, in a three-row example, a MA(1) produce the following
    dependence matrix:

    [[0, 1, 0],
    [1, 0, 1],
    [0, 1, 0]]

    A MA(2) would produce:

    [[0, 0, 1],
    [0, 0, 0],
    [1, 0, 0]]
    """

    def diagonal_position(indexes=None, k=1):
        new_indexes = tuple((indexes[0][:-k] + k, indexes[1][:-k]))
        return new_indexes

    try:
        data = data.sort_values(by=[id, time], ascending=True)
    except Exception as e:
        print("Parameter data must be a pandas DataFrame")
        raise e

    min_time = sorted(np.unique(data[time]))[:order]
    id = data[id].values
    time = data[time].values

    sample_size = len(id)
    ma = np.zeros((sample_size, sample_size))

    ma_vector = np.ones(sample_size - order)
    index_time = sorted(
        list(itertools.chain(*[np.where(time == value)[0] for value in min_time]))
    )

    for index in index_time[order:]:
        ma_vector[index - order] = 0

    indexes_pos = np.diag_indices(sample_size)
    indexes = diagonal_position(indexes_pos, order)

    ma[indexes] = ma_vector

    return ma + ma.T




[docs]
def mc_mixed(data=None, formula=None):
    """
    mc_mixed retrieves the components for matrix linear predictor associated
    with mixed models(Demidenko E (2013). Mixed Models: Theory and
    Applications with R. John Wiley & Sons. doi:10.1002/0471728438.).
    """
    design_matrix = dmatrix(formula, data=data, return_type="dataframe")
    val_columns = design_matrix.columns.tolist()

    # Find two-points
    positions = list()
    for column in val_columns:
        if ":" in column:
            positions.append(1)
        else:
            positions.append(0)
    positions = np.array(positions)

    design_matrix = design_matrix.values

    all_indexes = [(i, i) for i in set(positions)]

    if len(all_indexes) > 1:
        all_indexes = all_indexes + list(itertools.combinations(list(range(2)), 2))

    matrices = list()
    for tc in all_indexes:
        # matrix1 = np.repeat(design_matrix[:, tc[0]], size, axis=0).reshape(size, size).T
        # matrix2 = np.repeat(design_matrix[:, tc[1]], size, axis=0).reshape(size, size).T
        matrix1 = design_matrix[:, np.where(positions == tc[0])[0]]
        matrix2 = design_matrix[:, np.where(positions == tc[1])[0]]

        if tc[0] == tc[1]:
            matrices.append(np.matmul(matrix1, matrix2.T))
        else:
            matrices.append(
                np.matmul(matrix1, matrix2.T) + np.matmul(matrix2, matrix1.T)
            )

    return matrices