Source code for skillmodels.process_model

import numpy as np
import pandas as pd
from pandas import DataFrame

import skillmodels.transition_functions as tf
from skillmodels.check_model import check_model


[docs]def process_model(model_dict):
    """Check, clean, extend and transform the model specs.

    Check the completeness, consistency and validity of the model specifications.

    Set default values and extend the model specification where necessary.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`

    Returns:
        dict: nested dictionary of model specs. It has the following entries:
        - dimensions (dict): Dimensional information like n_states, n_periods,
        n_controls, n_mixtures. See :ref:`dimensions`.
        - labels (dict): Dict of lists with labels for the model quantities like
        factors, periods, controls, stagemap and stages. See :ref:`labels`
        - anchoring (dict): Information about anchoring. See :ref:`anchoring`
        - transition_functions (tuple): Tuple of tuples of length n_periods. Each inner
        tuple has the following two entries: (name_of_transition_function, callable).
        - update_info (pandas.DataFrame): DataFrame with one row per Kalman update
        needed in the likelihood function. See :ref:`update_info`.
        - normalizations (dict): Nested dictionary with information on normalized factor
        loadings and intercepts for each factor. See :ref:`normalizations`.

    """
    dims = get_dimensions(model_dict)
    labels = _get_labels(model_dict, dims)
    anchoring = _process_anchoring(model_dict)
    check_model(model_dict, labels, dims, anchoring)

    processed = {
        "dimensions": dims,
        "labels": labels,
        "anchoring": anchoring,
        "estimation_options": _process_estimation_options(model_dict),
        "transition_functions": _get_transition_functions(labels["transition_names"]),
        "update_info": _get_update_info(model_dict, dims, labels, anchoring),
        "normalizations": _process_normalizations(model_dict, dims, labels),
    }
    return processed


[docs]def get_dimensions(model_dict):
    """Extract the dimensions of the model.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`

    Returns:
        dict: Dimensional information like n_states, n_periods, n_controls,
            n_mixtures. See :ref:`dimensions`.

    """
    all_n_periods = [len(d["measurements"]) for d in model_dict["factors"].values()]
    dims = {
        "n_states": len(model_dict["factors"]),
        "n_periods": max(all_n_periods),
        # plus 1 for the constant
        "n_controls": len(model_dict.get("controls", [])) + 1,
        "n_mixtures": model_dict["estimation_options"].get("n_mixtures", 1),
    }
    return dims


def _get_labels(model_dict, dimensions):
    """Extract labels of the model quantities.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`
        dimensions (dict): Dimensional information like n_states, n_periods, n_controls,
            n_mixtures. See :ref:`dimensions`.

    Returns:
        dict: Dict of lists with labels for the model quantities like
        factors, periods, controls, stagemap and stages. See :ref:`labels`

    """
    stagemap = model_dict.get("stagemap", list(range(dimensions["n_periods"] - 1)))

    labels = {
        "factors": sorted(model_dict["factors"]),
        "controls": ["constant"] + sorted(model_dict.get("controls", [])),
        "periods": list(range(dimensions["n_periods"])),
        "stagemap": stagemap,
        "stages": sorted(np.unique(stagemap)),
    }

    trans_names = []
    for factor in labels["factors"]:
        trans_names.append(model_dict["factors"][factor]["transition_function"])
    labels["transition_names"] = trans_names

    return labels


def _process_estimation_options(model_dict):
    """Process options.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`

    Returns:
        dict: Tuning parameters for the estimation. See :ref:`options`.

    """
    default_options = {
        "sigma_points_scale": 2,
        "robust_bounds": True,
        "bounds_distance": 1e-3,
        "clipping_lower_bound": -1e250,
        "clipping_upper_bound": None,
        "clipping_lower_hardness": 1,
        "clipping_upper_hardness": 1,
    }
    default_options.update(model_dict.get("estimation_options", {}))

    if not default_options["robust_bounds"]:
        default_options["bounds_distance"] = 0

    return default_options


def _process_anchoring(model_dict):
    """Process the specification that governs how latent factors are anchored.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`

    Returns:
        dict: Dictionary with information about anchoring. See :ref:`anchoring`

    """
    anchinfo = {
        "anchoring": False,
        "outcomes": {},
        "factors": [],
        "free_controls": False,
        "free_constant": False,
        "free_loadings": False,
        "ignore_constant_when_anchoring": False,
    }

    if "anchoring" in model_dict:
        anchinfo.update(model_dict["anchoring"])
        anchinfo["anchoring"] = True
        anchinfo["factors"] = sorted(anchinfo["outcomes"].keys())

    return anchinfo


def _get_transition_functions(transition_names):
    """Collect the transition functions in a nested tuple.

    Args:
        transition_names (list): Names of transition functions for each factor.

    Returns:
        tuple: Tuple of tuples of length n_periods. Each inner tuple
            has the following two entries: (name_of_transition_function, callable).

    """
    return tuple((name, getattr(tf, name)) for name in transition_names)


def _get_update_info(model_dict, dimensions, labels, anchoring_info):
    """Construct a DataFrame with information on each Kalman update.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`
        dimensions (dict): Dimensional information like n_states, n_periods, n_controls,
            n_mixtures. See :ref:`dimensions`.
        labels (dict): Dict of lists with labels for the model quantities like
            factors, periods, controls, stagemap and stages. See :ref:`labels`
        anchoring_info (dict): Information about anchoring. See :ref:`anchoring`

    Returns:
        pandas.DataFrame: DataFrame with one row per Kalman update needed in
            the likelihood function. See :ref:`update_info`.

    """
    index = pd.MultiIndex(levels=[[], []], codes=[[], []], names=["period", "variable"])
    uinfo = DataFrame(index=index, columns=labels["factors"] + ["purpose"])

    measurements = {}
    for factor in labels["factors"]:
        measurements[factor] = fill_list(
            model_dict["factors"][factor]["measurements"], [], dimensions["n_periods"]
        )

    for period in labels["periods"]:
        for factor in labels["factors"]:
            for meas in measurements[factor][period]:
                uinfo.loc[(period, meas), factor] = True
                uinfo.loc[(period, meas), "purpose"] = "measurement"
        for factor in anchoring_info["factors"]:
            outcome = anchoring_info["outcomes"][factor]
            name = f"{outcome}_{factor}"
            uinfo.loc[(period, name), factor] = True
            uinfo.loc[(period, name), "purpose"] = "anchoring"

    uinfo.fillna(False, inplace=True)
    return uinfo


def _process_normalizations(model_dict, dimensions, labels):
    """Process the normalizations of intercepts and factor loadings.

    Args:
        model_dict (dict): The model specification. See: :ref:`model_specs`
        dimensions (dict): Dimensional information like n_states, n_periods, n_controls,
            n_mixtures. See :ref:`dimensions`.
        labels (dict): Dict of lists with labels for the model quantities like
            factors, periods, controls, stagemap and stages. See :ref:`labels`

    Returns:
        normalizations (dict): Nested dictionary with information on normalized factor
            loadings and intercepts for each factor. See :ref:`normalizations`.

    """
    normalizations = {}
    for factor in labels["factors"]:
        normalizations[factor] = {}
        norminfo = model_dict["factors"][factor].get("normalizations", {})
        for norm_type in ["loadings", "intercepts"]:
            candidate = norminfo.get(norm_type, [])
            candidate = fill_list(candidate, {}, dimensions["n_periods"])
            normalizations[factor][norm_type] = candidate

    return normalizations


[docs]def fill_list(short_list, fill_value, length):
    """Extend a list to specified length by filling it with the fill_value.

    Examples:
    >>> fill_list(["a"], "b", 3)
    ['a', 'b', 'b']

    """
    res = list(short_list)
    diff = length - len(short_list)
    assert diff >= 0, "short_list has to be shorter than length."
    if diff >= 1:
        res += [fill_value] * diff
    return res


def get_period_measurements(update_info, period):
    if period in update_info.index:
        measurements = list(update_info.loc[period].index)
    else:
        measurements = []
    return measurements
Quick search

Source code for skillmodels.process_model