Source code for aplusml.config

"""
Python Pydantic models for defining an APLUS simulation configuration.
"""
import os
from typing import Any, Dict, List, Literal, Optional, Union, get_args
import pandas as pd
from pydantic import BaseModel

VALID_DISTRIBUTION_TYPES = Literal["bernoulli", "exponential", "binomial", "normal", "poisson", "uniform"]
VALID_VARIABLE_TYPES = Literal["scalar", "resource", "property", "simulation"]
VALID_STATE_TYPES = Literal["start", "end", "intermediate"]
VALID_SIMULATION_VARIABLE_IDS = Literal["time_left_in_sim", "time_already_in_sim", "sim_current_timestep"]

[docs] class ConfigPatientSortPreferenceProperty(BaseModel): """ Patient sort preference property section of config. :param str variable: Name of a property (must be defined in the `variables` config section) that will be used to sort patients when prioritizing allocation of a finite resource :param bool is_ascending: True = ascending order, False = descending. Default: True """ variable: str is_ascending: bool = True
[docs] class ConfigMetadata(BaseModel): """ Metadata section of config. :param Optional[str] name: Name of the simulation. :param Optional[str] path_to_properties: Path to CSV file where each row is a patient, each column is a property. This is optional -- if you don't have a patient property CSV, you can leave this as `None`. Note: Only properties explicitly enumerated in the 'variables' config section will be imported. :param Optional[str] properties_col_for_patient_id: Column name in the CSV that contains unique patient IDs. :param Optional[ConfigPatientSortPreferenceProperty] patient_sort_preference_property: Property to sort patients by when prioritizing allocation of a finite resource. """ name: Optional[str] = None path_to_properties: Optional[str] = None properties_col_for_patient_id: Optional[str] = None patient_sort_preference_property: Optional[ConfigPatientSortPreferenceProperty] = None
[docs] def is_valid(self) -> bool: """Return TRUE if the ConfigMetadata is valid, FALSE otherwise.""" if self.properties_col_for_patient_id is not None: if self.path_to_properties is None: print(f"ERROR - `path_to_properties` must be specified if `properties_col_for_patient_id` is specified") return False if self.path_to_properties is not None: if not os.path.exists(self.path_to_properties): print(f"ERROR - `path_to_properties` (path='{self.path_to_properties}') does not exist") return False df = pd.read_csv(self.path_to_properties) if self.properties_col_for_patient_id is not None and self.properties_col_for_patient_id not in df.columns: print(f"ERROR - `properties_col_for_patient_id` (col_name='{self.properties_col_for_patient_id}') not found in {self.path_to_properties}") return False return True
[docs] class ConfigVariable(BaseModel): """ Variable section of config -- i.e. a dictionary mapping variable IDs to variables. :param str type: Type of variable. Must be one of: 'scalar', 'resource', 'property', 'simulation'. * `scalar`: A scalar value that is shared across all patients. It can be used to model things like the sensitivity of a screening test, the prevalence of a disease, etc. If set, then ``value`` must be specified. * `resource`: A finite resource that is shared across all patients. It can be decremented, incremented, and reset by the simulation. It can be used to model things like hospital beds, lab capacity, etc. If set, then ``init_amount``, ``max_amount``, ``refill_amount``, and ``refill_duration`` must be specified. * `property`: A property that is unique to each patient. This is a property that is unique to each patient (i.e. each patient may have a different value for this property). It can be used to model things like the age of a patient, the gender of a patient, etc. If set, then ``column`` (if loaded from a CSV file), ``value`` (if specified as a constant), or ``distribution`` (if randomly sampled) must be specified. * `simulation`: A simulation variable that is set and tracked automatically by APLUS. Must be one of: 'time_left_in_sim', 'time_already_in_sim', 'sim_current_timestep'. :param Optional[Union[int, float, bool, str, list, dict, set]] value: Scalar value. Must be a valid Python type. Use '!!set' tag for sets. :param Optional[int] init_amount: Initial amount of the resource. :param Optional[int] max_amount: Maximum amount of resource allowed. :param Optional[int] refill_amount: Amount added per refill. :param Optional[int] refill_duration: Time interval between refills. :param Optional[str] column: If this property is loaded from a CSV file, use the `column` parameter to specify the column name (e.g. 'y' or 'y_hat_dl'). Each row of the CSV will be a patient, and the value of this property for each patient will be the value of the column in the CSV file. :param Optional[Union[int, float, bool, str, list, dict, set]] value: Scalar value. Must be a valid Python type. Use '!!set' tag for sets. :param Optional[VALID_DISTRIBUTION_TYPES] distribution: If randomly sampled, specify the distribution type. :param Optional[Union[int, float]] mean: Mean value for distribution. :param Optional[Union[int, float]] std: Standard deviation. :param Optional[Union[int, float]] start: Minimum value. :param Optional[Union[int, float]] end: Maximum value. """ type: VALID_VARIABLE_TYPES = "scalar" # Scalar value value: Optional[Union[int, float, bool, str, list, dict, set]] = None # Resource value init_amount: Optional[int] = None max_amount: Optional[int] = None refill_amount: Optional[int] = None refill_duration: Optional[int] = None # Property value column: Optional[str] = None # Simulation value distribution: Optional[VALID_DISTRIBUTION_TYPES] = None mean: Optional[Union[int, float]] = None std: Optional[Union[int, float]] = None start: Optional[Union[int, float]] = None end: Optional[Union[int, float]] = None
[docs] def is_valid(self, id: str) -> bool: """Return TRUE if the ConfigVariable is valid, FALSE otherwise.""" if self.type == 'scalar': # This is a scalar value that is shared across all patients. # It can be used to model things like the sensitivity of a screening test, the prevalence of a disease, etc. if self.value is None: print(f"ERROR - `value` must be specified if `type` is 'scalar' for variable '{id}'") return False elif self.type == 'resource': # This is a finite resource that is shared across all patients. # It can be decremented, incremented, and reset by the simulation. # It can be used to model things like hospital beds, lab capacity, etc. if self.init_amount is None: print(f"ERROR - `init_amount` must be specified if `type` is 'resource' for variable '{id}'") return False if self.max_amount is None: print(f"ERROR - `max_amount` must be specified if `type` is 'resource' for variable '{id}'") return False if self.refill_amount is None: print(f"ERROR - `refill_amount` must be specified if `type` is 'resource' for variable '{id}'") return False if self.refill_duration is None: print(f"ERROR - `refill_duration` must be specified if `type` is 'resource' for variable '{id}'") return False elif self.type == 'property': # This is a property that is UNIQUE to each patient (i.e. each patient may have a different value for this property). # It can be used to model things like the age of a patient, the gender of a patient, etc. if len([ key for key in [ self.column, self.value, self.distribution ] if key is not None ]) > 1: print(f"ERROR - Can only have one of ('column', 'value', 'distribution') keys for variable '{id}'") return False elif self.type == 'simulation': # This is a simulation variable that is shared across all patients. # It can be used to model things like the time left in the simulation, the time already in the simulation, etc. # It must be one of the pre-defined simulation variable IDs if id not in get_args(VALID_SIMULATION_VARIABLE_IDS): print(f"ERROR - Invalid simulation variable name for variable '{id}'. Must be one of: {VALID_SIMULATION_VARIABLE_IDS}") return False return True
[docs] class ConfigUtility(BaseModel): """ Utility within a State or Transition. :param value: If str, it's evaluated as a Python expression. :param if_: A Python expression. If it evaluates to TRUE, then the `value` for this utility is set to this `value`. Note: These 'if' statements are not mutually exclusive (i.e. if multiple conditions within the same State evaluate to TRUE, then they will simply be summed together) :param unit: Measurement unit. Default: ''. """ value: Optional[Union[int, float, str]] = None if_: Optional[Union[str, bool]] = None unit: str = '' def __repr__(self): return f"Utility(value={self.value}, if_={self.if_}, unit={self.unit})"
[docs] def is_valid(self, state_id: str) -> bool: """Return TRUE if the ConfigUtility is valid, FALSE otherwise.""" return True
[docs] class ConfigTransition(BaseModel): """ Transition within a State. Transition conditions can either have... * All transitions have an 'if' condition (where if the last transition doesn't have an 'if', it defaults to always TRUE) * All transitions have a 'prob' condition (where if the last transition doesn't have a 'prob', it defaults to = 1 - (sum of other probs)) * The first set of transitions have an 'if' condition, but the second set have a 'prob' :param str dest: ID of the destination state. :param Optional[str] label: Human-readable label for the transition. Default: "". :param Optional[Union[str, bool]] if_: A Python expression. If it evaluates to TRUE, then the transition is taken. :param Optional[Union[str, float, int]] prob: Probability of the transition. :param int duration: Number of timesteps to wait before transitions are evaluated. Default: 0 :param Union[str, int, float, bool, List[ConfigUtility]] utilities: If str, float, or bool, it's evaluated as a Python expression. Default: []. :param Dict[str, float] resource_deltas: Changes to resource levels from taking this transition. Default: {}. [key] = name of a resource defined in `variables`. [value] = how much to change each resource level AS SOON AS this transition is taken """ dest: str label: Optional[str] = "" if_: Optional[Union[str, bool]] = None prob: Optional[Union[str, float, int]] = None duration: int = 0 utilities: Union[str, int, float, bool, List[ConfigUtility]] = [] resource_deltas: Dict[str, float] = {}
[docs] def is_valid(self, state_id: str) -> bool: """Return TRUE if the ConfigTransition is valid, FALSE otherwise.""" if self.duration < 0: print(f"ERROR - Transition for state='{state_id}' must have a non-negative duration, but has duration={self.duration}") return False return True
[docs] class ConfigState(BaseModel): """ State section of config -- i.e. a dictionary mapping State IDs to States. :param str type: Whether the state is a start, end, or intermediate state within the workflow. Default: "intermediate". :param Optional[str] label: Human-readable label for the state. Default: value of `key`. :param List[ConfigTransition] transitions: List of possible state transitions. :param int duration: Number of timesteps to wait before transitions are evaluated. Default: 0 :param Union[str, int, float, bool, List[ConfigUtility]] utilities: If str, float, or bool, it's evaluated as a Python expression. Default: []. :param Dict[str, float] resource_deltas: Changes to resource levels from entering this state. Default: {}. [key] = name of a resource defined in `variables`. [value] = how much to change each resource level AS SOON AS this state is hit """ type: VALID_STATE_TYPES = "intermediate" label: Optional[str] = None transitions: List[ConfigTransition] = [] duration: int = 0 utilities: Union[str, int, float, bool, List[ConfigUtility]] = [] resource_deltas: Dict[str, float] = {}
[docs] def is_valid(self, id: str) -> bool: """Return TRUE if the ConfigState is valid, FALSE otherwise.""" if self.type not in get_args(VALID_STATE_TYPES): print(f"ERROR - Invalid state type. Must be one of: {VALID_STATE_TYPES}") return False if self.duration < 0: print(f"ERROR - State='{id}' must have a non-negative duration, but has duration={self.duration}") return False return True
[docs] class Config(BaseModel): """Specification for an APLUS simulation. All three fields are required -- metadata, variables, and states. Each field is a Pydantic model, as defined in this API. Instead of specifying this Config object directly, you can use the :class:`~aplusml.sim.Simulation.create_from_yaml` method to load a YAML file that follows the schema in :doc:`/api/config`. Use via: .. code-block:: python config = Config( metadata=ConfigMetadata(...), variables=ConfigVariable(...), states=ConfigState(...), ) simulation = aplusml.Simulation.create_from_config(config) Args: metadata (ConfigMetadata): Metadata section of config. variables (Dict[str, ConfigVariable]): Variables section of config. states (Dict[str, ConfigState]): States section of config. """ metadata: ConfigMetadata variables: Dict[str, ConfigVariable] = {} # [key] = variable id, [value] = variable value states: Dict[str, ConfigState] = {} # [key] = state id, [value] = state value
[docs] def is_valid(self) -> bool: """Return TRUE if the Config is valid, FALSE otherwise.""" # # Metadata metadata = self.metadata if not isinstance(metadata, ConfigMetadata): print(f"ERROR - Metadata must be of type `ConfigMetadata`, but is of type {type(metadata)}") return False if not metadata.is_valid(): print(f"ERROR - Metadata is invalid. Metadata: {metadata}") return False # # Variables variables = self.variables all_variable_ids: List[str] = list(variables.keys()) # Cast each variable to a dict from YAML parser for v_id, v in variables.items(): # Check type if not isinstance(v, ConfigVariable): print(f"ERROR - Variable '{v_id}' must be of type `ConfigVariable`, but is of type {type(v)}") return False # Check internal validity if not v.is_valid(v_id): print(f"ERROR - Variable '{v_id}' is invalid") return False # Check variable names if v.type == 'simulation': if v_id == 'time_left_in_sim': # Require 'total_duration_in_sim' variable (otherwise can't calculate) if 'total_duration_in_sim' not in all_variable_ids: print(f"ERROR - A variable with the ID 'total_duration_in_sim' is required to use the simulation variable 'time_left_in_sim'") return False # Enforce unique variable IDs if len(all_variable_ids) != len(set(all_variable_ids)): print(f"ERROR - Cannot have a repeated variable ID. Instead, found: {all_variable_ids}") return False # Ensure 'patient_sort_preference_property' is an actual property patient_sort_preference_property: Optional[Dict[str, Any]] = metadata.patient_sort_preference_property if patient_sort_preference_property and len([ key for key, val in variables.items() if val.type == 'property' and key == patient_sort_preference_property.get('variable') ]) != 1: if patient_sort_preference_property.get('variable') not in [ 'start_timestep', 'id']: print("ERROR - The 'variable' key in metadata's 'patient_sort_preference_property' must be the name of a variable with the type 'property' or must be an attribute of the 'Patient' class") return False # # States states = self.states all_state_ids: List[str] = list(states.keys()) for s_id, s in states.items(): if not isinstance(s, ConfigState): print(f"ERROR - State '{s_id}' must be of type `ConfigState`, but is of type {type(s)}") return False # Check internal validity if not s.is_valid(s_id): print(f"ERROR - State '{s_id}' is invalid") return False # Ensure that all variables in resource_deltas are in the 'variables' section of the YAML resource_deltas: Dict[str, float] = s.resource_deltas for v_id in resource_deltas.keys(): if v_id not in all_variable_ids: print(f"ERROR - The variable {v_id} is used in a state's 'resource_deltas', but isn't listed in the 'variables' section") return False # Utilities utilities: Union[str, float, bool, List[ConfigUtility]] = s.utilities if isinstance(utilities, list): for u in utilities: if not u.is_valid(s_id): print(f"ERROR - Utility '{u}' is invalid") return False # Transitions transitions: List[ConfigTransition] = s.transitions for t in transitions: if not isinstance(t, ConfigTransition): print(f"ERROR - Transition '{t}' must be of type `ConfigTransition`, but is of type {type(t)}") return False if not t.is_valid(s_id): print(f"ERROR - Transition '{t}' is invalid") return False # Ensure that all variables in resource_deltas are in the 'variables' section of the YAML for v_id in t.resource_deltas.keys(): if v_id not in all_variable_ids: print(f"ERROR - The variable {v_id} is used in a transition's 'resource_deltas', but isn't listed in the 'variables' section") return False # Utilities utilities: Union[str, float, bool, List[ConfigUtility]] = t.utilities if isinstance(utilities, list): for u in utilities: if not u.is_valid(s_id): print(f"ERROR - Utility '{u}' is invalid") return False # Enforce correct # of transitions for start/intermediate/end/ states type_: str = s.type if type_ == 'start' and len(transitions) == 0: print(f"ERROR - state '{s_id}' must have at 1+ transitions because it has type = 'start'") return False elif type_ == 'intermediate' and len(transitions) == 0: print(f"ERROR - state '{s_id}' must have at 1+ transitions because it has type = 'intermediate'") return False if type_ == 'end' and len(transitions) > 0: print(f"ERROR - state '{s_id}' must have exactly 0 transitions because it has type = 'end'") return False # Enforce uniqueness if len(all_state_ids) != len(set(all_state_ids)): print(f"ERROR - Cannot have a repeated state ID, but found: {all_state_ids}") return False return True
if __name__ == "__main__": config = Config( metadata=ConfigMetadata( name="Hello World Workflow", path_to_properties="patient_properties.csv", properties_col_for_patient_id="patient_id", ), variables={ "patient_property_1": ConfigVariable(type="property", column="patient_property_1"), "patient_property_2": ConfigVariable(type="property", column="patient_property_2"), }, states={ "start": ConfigState(type="start"), "end": ConfigState(type="end"), }, )