Source code for skillmodels.process_model

import numpy as np
import pandas as pd
from pandas import DataFrame

import skillmodels.transition_functions as tf
from skillmodels.check_model import check_model


[docs]def process_model(model_dict): """Check, clean, extend and transform the model specs. Check the completeness, consistency and validity of the model specifications. Set default values and extend the model specification where necessary. Args: model_dict (dict): The model specification. See: :ref:`model_specs` Returns: dict: nested dictionary of model specs. It has the following entries: - dimensions (dict): Dimensional information like n_states, n_periods, n_controls, n_mixtures. See :ref:`dimensions`. - labels (dict): Dict of lists with labels for the model quantities like factors, periods, controls, stagemap and stages. See :ref:`labels` - anchoring (dict): Information about anchoring. See :ref:`anchoring` - transition_functions (tuple): Tuple of tuples of length n_periods. Each inner tuple has the following two entries: (name_of_transition_function, callable). - update_info (pandas.DataFrame): DataFrame with one row per Kalman update needed in the likelihood function. See :ref:`update_info`. - normalizations (dict): Nested dictionary with information on normalized factor loadings and intercepts for each factor. See :ref:`normalizations`. """ dims = get_dimensions(model_dict) labels = _get_labels(model_dict, dims) anchoring = _process_anchoring(model_dict) check_model(model_dict, labels, dims, anchoring) processed = { "dimensions": dims, "labels": labels, "anchoring": anchoring, "estimation_options": _process_estimation_options(model_dict), "transition_functions": _get_transition_functions(labels["transition_names"]), "update_info": _get_update_info(model_dict, dims, labels, anchoring), "normalizations": _process_normalizations(model_dict, dims, labels), } return processed
[docs]def get_dimensions(model_dict): """Extract the dimensions of the model. Args: model_dict (dict): The model specification. See: :ref:`model_specs` Returns: dict: Dimensional information like n_states, n_periods, n_controls, n_mixtures. See :ref:`dimensions`. """ all_n_periods = [len(d["measurements"]) for d in model_dict["factors"].values()] dims = { "n_states": len(model_dict["factors"]), "n_periods": max(all_n_periods), # plus 1 for the constant "n_controls": len(model_dict.get("controls", [])) + 1, "n_mixtures": model_dict["estimation_options"].get("n_mixtures", 1), } return dims
def _get_labels(model_dict, dimensions): """Extract labels of the model quantities. Args: model_dict (dict): The model specification. See: :ref:`model_specs` dimensions (dict): Dimensional information like n_states, n_periods, n_controls, n_mixtures. See :ref:`dimensions`. Returns: dict: Dict of lists with labels for the model quantities like factors, periods, controls, stagemap and stages. See :ref:`labels` """ stagemap = model_dict.get("stagemap", list(range(dimensions["n_periods"] - 1))) labels = { "factors": sorted(model_dict["factors"]), "controls": ["constant"] + sorted(model_dict.get("controls", [])), "periods": list(range(dimensions["n_periods"])), "stagemap": stagemap, "stages": sorted(np.unique(stagemap)), } trans_names = [] for factor in labels["factors"]: trans_names.append(model_dict["factors"][factor]["transition_function"]) labels["transition_names"] = trans_names return labels def _process_estimation_options(model_dict): """Process options. Args: model_dict (dict): The model specification. See: :ref:`model_specs` Returns: dict: Tuning parameters for the estimation. See :ref:`options`. """ default_options = { "sigma_points_scale": 2, "robust_bounds": True, "bounds_distance": 1e-3, "clipping_lower_bound": -1e250, "clipping_upper_bound": None, "clipping_lower_hardness": 1, "clipping_upper_hardness": 1, } default_options.update(model_dict.get("estimation_options", {})) if not default_options["robust_bounds"]: default_options["bounds_distance"] = 0 return default_options def _process_anchoring(model_dict): """Process the specification that governs how latent factors are anchored. Args: model_dict (dict): The model specification. See: :ref:`model_specs` Returns: dict: Dictionary with information about anchoring. See :ref:`anchoring` """ anchinfo = { "anchoring": False, "outcomes": {}, "factors": [], "free_controls": False, "free_constant": False, "free_loadings": False, "ignore_constant_when_anchoring": False, } if "anchoring" in model_dict: anchinfo.update(model_dict["anchoring"]) anchinfo["anchoring"] = True anchinfo["factors"] = sorted(anchinfo["outcomes"].keys()) return anchinfo def _get_transition_functions(transition_names): """Collect the transition functions in a nested tuple. Args: transition_names (list): Names of transition functions for each factor. Returns: tuple: Tuple of tuples of length n_periods. Each inner tuple has the following two entries: (name_of_transition_function, callable). """ return tuple((name, getattr(tf, name)) for name in transition_names) def _get_update_info(model_dict, dimensions, labels, anchoring_info): """Construct a DataFrame with information on each Kalman update. Args: model_dict (dict): The model specification. See: :ref:`model_specs` dimensions (dict): Dimensional information like n_states, n_periods, n_controls, n_mixtures. See :ref:`dimensions`. labels (dict): Dict of lists with labels for the model quantities like factors, periods, controls, stagemap and stages. See :ref:`labels` anchoring_info (dict): Information about anchoring. See :ref:`anchoring` Returns: pandas.DataFrame: DataFrame with one row per Kalman update needed in the likelihood function. See :ref:`update_info`. """ index = pd.MultiIndex(levels=[[], []], codes=[[], []], names=["period", "variable"]) uinfo = DataFrame(index=index, columns=labels["factors"] + ["purpose"]) measurements = {} for factor in labels["factors"]: measurements[factor] = fill_list( model_dict["factors"][factor]["measurements"], [], dimensions["n_periods"] ) for period in labels["periods"]: for factor in labels["factors"]: for meas in measurements[factor][period]: uinfo.loc[(period, meas), factor] = True uinfo.loc[(period, meas), "purpose"] = "measurement" for factor in anchoring_info["factors"]: outcome = anchoring_info["outcomes"][factor] name = f"{outcome}_{factor}" uinfo.loc[(period, name), factor] = True uinfo.loc[(period, name), "purpose"] = "anchoring" uinfo.fillna(False, inplace=True) return uinfo def _process_normalizations(model_dict, dimensions, labels): """Process the normalizations of intercepts and factor loadings. Args: model_dict (dict): The model specification. See: :ref:`model_specs` dimensions (dict): Dimensional information like n_states, n_periods, n_controls, n_mixtures. See :ref:`dimensions`. labels (dict): Dict of lists with labels for the model quantities like factors, periods, controls, stagemap and stages. See :ref:`labels` Returns: normalizations (dict): Nested dictionary with information on normalized factor loadings and intercepts for each factor. See :ref:`normalizations`. """ normalizations = {} for factor in labels["factors"]: normalizations[factor] = {} norminfo = model_dict["factors"][factor].get("normalizations", {}) for norm_type in ["loadings", "intercepts"]: candidate = norminfo.get(norm_type, []) candidate = fill_list(candidate, {}, dimensions["n_periods"]) normalizations[factor][norm_type] = candidate return normalizations
[docs]def fill_list(short_list, fill_value, length): """Extend a list to specified length by filling it with the fill_value. Examples: >>> fill_list(["a"], "b", 3) ['a', 'b', 'b'] """ res = list(short_list) diff = length - len(short_list) assert diff >= 0, "short_list has to be shorter than length." if diff >= 1: res += [fill_value] * diff return res
def get_period_measurements(update_info, period): if period in update_info.index: measurements = list(update_info.loc[period].index) else: measurements = [] return measurements