"""
Python Pydantic models for defining an APLUS simulation configuration.
"""
import os
from typing import Any, Dict, List, Literal, Optional, Union, get_args
import pandas as pd
from pydantic import BaseModel
VALID_DISTRIBUTION_TYPES = Literal["bernoulli", "exponential", "binomial", "normal", "poisson", "uniform"]
VALID_VARIABLE_TYPES = Literal["scalar", "resource", "property", "simulation"]
VALID_STATE_TYPES = Literal["start", "end", "intermediate"]
VALID_SIMULATION_VARIABLE_IDS = Literal["time_left_in_sim", "time_already_in_sim", "sim_current_timestep"]
[docs]
class ConfigPatientSortPreferenceProperty(BaseModel):
"""
Patient sort preference property section of config.
:param str variable: Name of a property (must be defined in the `variables` config section) that will be used to sort patients when prioritizing allocation of a finite resource
:param bool is_ascending: True = ascending order, False = descending. Default: True
"""
variable: str
is_ascending: bool = True
[docs]
class ConfigVariable(BaseModel):
"""
Variable section of config -- i.e. a dictionary mapping variable IDs to variables.
:param str type: Type of variable. Must be one of: 'scalar', 'resource', 'property', 'simulation'.
* `scalar`: A scalar value that is shared across all patients. It can be used to model things like the sensitivity of a screening test, the prevalence of a disease, etc. If set, then ``value`` must be specified.
* `resource`: A finite resource that is shared across all patients. It can be decremented, incremented, and reset by the simulation. It can be used to model things like hospital beds, lab capacity, etc. If set, then ``init_amount``, ``max_amount``, ``refill_amount``, and ``refill_duration`` must be specified.
* `property`: A property that is unique to each patient. This is a property that is unique to each patient (i.e. each patient may have a different value for this property). It can be used to model things like the age of a patient, the gender of a patient, etc. If set, then ``column`` (if loaded from a CSV file), ``value`` (if specified as a constant), or ``distribution`` (if randomly sampled) must be specified.
* `simulation`: A simulation variable that is set and tracked automatically by APLUS. Must be one of: 'time_left_in_sim', 'time_already_in_sim', 'sim_current_timestep'.
:param Optional[Union[int, float, bool, str, list, dict, set]] value: Scalar value. Must be a valid Python type. Use '!!set' tag for sets.
:param Optional[int] init_amount: Initial amount of the resource.
:param Optional[int] max_amount: Maximum amount of resource allowed.
:param Optional[int] refill_amount: Amount added per refill.
:param Optional[int] refill_duration: Time interval between refills.
:param Optional[str] column: If this property is loaded from a CSV file, use the `column` parameter to specify the column name (e.g. 'y' or 'y_hat_dl'). Each row of the CSV will be a patient, and the value of this property for each patient will be the value of the column in the CSV file.
:param Optional[Union[int, float, bool, str, list, dict, set]] value: Scalar value. Must be a valid Python type. Use '!!set' tag for sets.
:param Optional[VALID_DISTRIBUTION_TYPES] distribution: If randomly sampled, specify the distribution type.
:param Optional[Union[int, float]] mean: Mean value for distribution.
:param Optional[Union[int, float]] std: Standard deviation.
:param Optional[Union[int, float]] start: Minimum value.
:param Optional[Union[int, float]] end: Maximum value.
"""
type: VALID_VARIABLE_TYPES = "scalar"
# Scalar value
value: Optional[Union[int, float, bool, str, list, dict, set]] = None
# Resource value
init_amount: Optional[int] = None
max_amount: Optional[int] = None
refill_amount: Optional[int] = None
refill_duration: Optional[int] = None
# Property value
column: Optional[str] = None
# Simulation value
distribution: Optional[VALID_DISTRIBUTION_TYPES] = None
mean: Optional[Union[int, float]] = None
std: Optional[Union[int, float]] = None
start: Optional[Union[int, float]] = None
end: Optional[Union[int, float]] = None
[docs]
def is_valid(self, id: str) -> bool:
"""Return TRUE if the ConfigVariable is valid, FALSE otherwise."""
if self.type == 'scalar':
# This is a scalar value that is shared across all patients.
# It can be used to model things like the sensitivity of a screening test, the prevalence of a disease, etc.
if self.value is None:
print(f"ERROR - `value` must be specified if `type` is 'scalar' for variable '{id}'")
return False
elif self.type == 'resource':
# This is a finite resource that is shared across all patients.
# It can be decremented, incremented, and reset by the simulation.
# It can be used to model things like hospital beds, lab capacity, etc.
if self.init_amount is None:
print(f"ERROR - `init_amount` must be specified if `type` is 'resource' for variable '{id}'")
return False
if self.max_amount is None:
print(f"ERROR - `max_amount` must be specified if `type` is 'resource' for variable '{id}'")
return False
if self.refill_amount is None:
print(f"ERROR - `refill_amount` must be specified if `type` is 'resource' for variable '{id}'")
return False
if self.refill_duration is None:
print(f"ERROR - `refill_duration` must be specified if `type` is 'resource' for variable '{id}'")
return False
elif self.type == 'property':
# This is a property that is UNIQUE to each patient (i.e. each patient may have a different value for this property).
# It can be used to model things like the age of a patient, the gender of a patient, etc.
if len([ key for key in [ self.column, self.value, self.distribution ] if key is not None ]) > 1:
print(f"ERROR - Can only have one of ('column', 'value', 'distribution') keys for variable '{id}'")
return False
elif self.type == 'simulation':
# This is a simulation variable that is shared across all patients.
# It can be used to model things like the time left in the simulation, the time already in the simulation, etc.
# It must be one of the pre-defined simulation variable IDs
if id not in get_args(VALID_SIMULATION_VARIABLE_IDS):
print(f"ERROR - Invalid simulation variable name for variable '{id}'. Must be one of: {VALID_SIMULATION_VARIABLE_IDS}")
return False
return True
[docs]
class ConfigUtility(BaseModel):
"""
Utility within a State or Transition.
:param value: If str, it's evaluated as a Python expression.
:param if_: A Python expression. If it evaluates to TRUE, then the `value` for this utility is set to this `value`. Note: These 'if' statements are not mutually exclusive (i.e. if multiple conditions within the same State evaluate to TRUE, then they will simply be summed together)
:param unit: Measurement unit. Default: ''.
"""
value: Optional[Union[int, float, str]] = None
if_: Optional[Union[str, bool]] = None
unit: str = ''
def __repr__(self):
return f"Utility(value={self.value}, if_={self.if_}, unit={self.unit})"
[docs]
def is_valid(self, state_id: str) -> bool:
"""Return TRUE if the ConfigUtility is valid, FALSE otherwise."""
return True
[docs]
class ConfigTransition(BaseModel):
"""
Transition within a State.
Transition conditions can either have...
* All transitions have an 'if' condition (where if the last transition doesn't have an 'if', it defaults to always TRUE)
* All transitions have a 'prob' condition (where if the last transition doesn't have a 'prob', it defaults to = 1 - (sum of other probs))
* The first set of transitions have an 'if' condition, but the second set have a 'prob'
:param str dest: ID of the destination state.
:param Optional[str] label: Human-readable label for the transition. Default: "".
:param Optional[Union[str, bool]] if_: A Python expression. If it evaluates to TRUE, then the transition is taken.
:param Optional[Union[str, float, int]] prob: Probability of the transition.
:param int duration: Number of timesteps to wait before transitions are evaluated. Default: 0
:param Union[str, int, float, bool, List[ConfigUtility]] utilities: If str, float, or bool, it's evaluated as a Python expression. Default: [].
:param Dict[str, float] resource_deltas: Changes to resource levels from taking this transition. Default: {}. [key] = name of a resource defined in `variables`. [value] = how much to change each resource level AS SOON AS this transition is taken
"""
dest: str
label: Optional[str] = ""
if_: Optional[Union[str, bool]] = None
prob: Optional[Union[str, float, int]] = None
duration: int = 0
utilities: Union[str, int, float, bool, List[ConfigUtility]] = []
resource_deltas: Dict[str, float] = {}
[docs]
def is_valid(self, state_id: str) -> bool:
"""Return TRUE if the ConfigTransition is valid, FALSE otherwise."""
if self.duration < 0:
print(f"ERROR - Transition for state='{state_id}' must have a non-negative duration, but has duration={self.duration}")
return False
return True
[docs]
class ConfigState(BaseModel):
"""
State section of config -- i.e. a dictionary mapping State IDs to States.
:param str type: Whether the state is a start, end, or intermediate state within the workflow. Default: "intermediate".
:param Optional[str] label: Human-readable label for the state. Default: value of `key`.
:param List[ConfigTransition] transitions: List of possible state transitions.
:param int duration: Number of timesteps to wait before transitions are evaluated. Default: 0
:param Union[str, int, float, bool, List[ConfigUtility]] utilities: If str, float, or bool, it's evaluated as a Python expression. Default: [].
:param Dict[str, float] resource_deltas: Changes to resource levels from entering this state. Default: {}. [key] = name of a resource defined in `variables`. [value] = how much to change each resource level AS SOON AS this state is hit
"""
type: VALID_STATE_TYPES = "intermediate"
label: Optional[str] = None
transitions: List[ConfigTransition] = []
duration: int = 0
utilities: Union[str, int, float, bool, List[ConfigUtility]] = []
resource_deltas: Dict[str, float] = {}
[docs]
def is_valid(self, id: str) -> bool:
"""Return TRUE if the ConfigState is valid, FALSE otherwise."""
if self.type not in get_args(VALID_STATE_TYPES):
print(f"ERROR - Invalid state type. Must be one of: {VALID_STATE_TYPES}")
return False
if self.duration < 0:
print(f"ERROR - State='{id}' must have a non-negative duration, but has duration={self.duration}")
return False
return True
[docs]
class Config(BaseModel):
"""Specification for an APLUS simulation. All three fields are required -- metadata, variables, and states.
Each field is a Pydantic model, as defined in this API.
Instead of specifying this Config object directly, you can use the :class:`~aplusml.sim.Simulation.create_from_yaml` method to load a YAML file that follows the schema in :doc:`/api/config`.
Use via:
.. code-block:: python
config = Config(
metadata=ConfigMetadata(...),
variables=ConfigVariable(...),
states=ConfigState(...),
)
simulation = aplusml.Simulation.create_from_config(config)
Args:
metadata (ConfigMetadata): Metadata section of config.
variables (Dict[str, ConfigVariable]): Variables section of config.
states (Dict[str, ConfigState]): States section of config.
"""
metadata: ConfigMetadata
variables: Dict[str, ConfigVariable] = {} # [key] = variable id, [value] = variable value
states: Dict[str, ConfigState] = {} # [key] = state id, [value] = state value
[docs]
def is_valid(self) -> bool:
"""Return TRUE if the Config is valid, FALSE otherwise."""
#
# Metadata
metadata = self.metadata
if not isinstance(metadata, ConfigMetadata):
print(f"ERROR - Metadata must be of type `ConfigMetadata`, but is of type {type(metadata)}")
return False
if not metadata.is_valid():
print(f"ERROR - Metadata is invalid. Metadata: {metadata}")
return False
#
# Variables
variables = self.variables
all_variable_ids: List[str] = list(variables.keys())
# Cast each variable to a dict from YAML parser
for v_id, v in variables.items():
# Check type
if not isinstance(v, ConfigVariable):
print(f"ERROR - Variable '{v_id}' must be of type `ConfigVariable`, but is of type {type(v)}")
return False
# Check internal validity
if not v.is_valid(v_id):
print(f"ERROR - Variable '{v_id}' is invalid")
return False
# Check variable names
if v.type == 'simulation':
if v_id == 'time_left_in_sim':
# Require 'total_duration_in_sim' variable (otherwise can't calculate)
if 'total_duration_in_sim' not in all_variable_ids:
print(f"ERROR - A variable with the ID 'total_duration_in_sim' is required to use the simulation variable 'time_left_in_sim'")
return False
# Enforce unique variable IDs
if len(all_variable_ids) != len(set(all_variable_ids)):
print(f"ERROR - Cannot have a repeated variable ID. Instead, found: {all_variable_ids}")
return False
# Ensure 'patient_sort_preference_property' is an actual property
patient_sort_preference_property: Optional[Dict[str, Any]] = metadata.patient_sort_preference_property
if patient_sort_preference_property and len([ key for key, val in variables.items() if val.type == 'property' and key == patient_sort_preference_property.get('variable') ]) != 1:
if patient_sort_preference_property.get('variable') not in [ 'start_timestep', 'id']:
print("ERROR - The 'variable' key in metadata's 'patient_sort_preference_property' must be the name of a variable with the type 'property' or must be an attribute of the 'Patient' class")
return False
#
# States
states = self.states
all_state_ids: List[str] = list(states.keys())
for s_id, s in states.items():
if not isinstance(s, ConfigState):
print(f"ERROR - State '{s_id}' must be of type `ConfigState`, but is of type {type(s)}")
return False
# Check internal validity
if not s.is_valid(s_id):
print(f"ERROR - State '{s_id}' is invalid")
return False
# Ensure that all variables in resource_deltas are in the 'variables' section of the YAML
resource_deltas: Dict[str, float] = s.resource_deltas
for v_id in resource_deltas.keys():
if v_id not in all_variable_ids:
print(f"ERROR - The variable {v_id} is used in a state's 'resource_deltas', but isn't listed in the 'variables' section")
return False
# Utilities
utilities: Union[str, float, bool, List[ConfigUtility]] = s.utilities
if isinstance(utilities, list):
for u in utilities:
if not u.is_valid(s_id):
print(f"ERROR - Utility '{u}' is invalid")
return False
# Transitions
transitions: List[ConfigTransition] = s.transitions
for t in transitions:
if not isinstance(t, ConfigTransition):
print(f"ERROR - Transition '{t}' must be of type `ConfigTransition`, but is of type {type(t)}")
return False
if not t.is_valid(s_id):
print(f"ERROR - Transition '{t}' is invalid")
return False
# Ensure that all variables in resource_deltas are in the 'variables' section of the YAML
for v_id in t.resource_deltas.keys():
if v_id not in all_variable_ids:
print(f"ERROR - The variable {v_id} is used in a transition's 'resource_deltas', but isn't listed in the 'variables' section")
return False
# Utilities
utilities: Union[str, float, bool, List[ConfigUtility]] = t.utilities
if isinstance(utilities, list):
for u in utilities:
if not u.is_valid(s_id):
print(f"ERROR - Utility '{u}' is invalid")
return False
# Enforce correct # of transitions for start/intermediate/end/ states
type_: str = s.type
if type_ == 'start' and len(transitions) == 0:
print(f"ERROR - state '{s_id}' must have at 1+ transitions because it has type = 'start'")
return False
elif type_ == 'intermediate' and len(transitions) == 0:
print(f"ERROR - state '{s_id}' must have at 1+ transitions because it has type = 'intermediate'")
return False
if type_ == 'end' and len(transitions) > 0:
print(f"ERROR - state '{s_id}' must have exactly 0 transitions because it has type = 'end'")
return False
# Enforce uniqueness
if len(all_state_ids) != len(set(all_state_ids)):
print(f"ERROR - Cannot have a repeated state ID, but found: {all_state_ids}")
return False
return True
if __name__ == "__main__":
config = Config(
metadata=ConfigMetadata(
name="Hello World Workflow",
path_to_properties="patient_properties.csv",
properties_col_for_patient_id="patient_id",
),
variables={
"patient_property_1": ConfigVariable(type="property", column="patient_property_1"),
"patient_property_2": ConfigVariable(type="property", column="patient_property_2"),
},
states={
"start": ConfigState(type="start"),
"end": ConfigState(type="end"),
},
)