import itertools
from typing import Any, List, Mapping, Tuple
from gymnasium import ActionWrapper, Env, ObservationWrapper, RewardWrapper, spaces, Wrapper
import numpy as np
import pandas as pd
try:
from ray.rllib.env import MultiAgentEnv
except (ModuleNotFoundError, ImportError) as e:
from gymnasium import Env as MultiAgentEnv
from citylearn.citylearn import CityLearnEnv
from citylearn.building import Building
[docs]
class ClippedObservationWrapper(ObservationWrapper):
"""Wrapper for observations min-max and periodic normalization.
Observations are clipped to be within the observation space limits.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv) -> None:
super().__init__(env)
self.env: CityLearnEnv
[docs]
def observation(self, observations: List[List[float]]) -> List[List[float]]:
"""Returns normalized observations."""
for i, (o, s) in enumerate(zip(observations, self.observation_space)):
for j, (o_, l, u) in enumerate(zip(o, s.low, s.high)):
observations[i][j] = min(max(o_, l), u)
return observations
[docs]
class NormalizedObservationWrapper(ObservationWrapper):
"""Wrapper for observations min-max and periodic normalization.
Temporal observations including `hour`, `day_type` and `month` are periodically normalized using sine/cosine
transformations and then all observations are min-max normalized between 0 and 1.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv) -> None:
super().__init__(env)
self.env: CityLearnEnv
@property
def shared_observations(self) -> List[str]:
"""Names of common observations across all buildings i.e. observations that have the same value irrespective of the building.
Includes extra three observations added during cyclic transformation of :code:`hour`, :code:`day_type` and :code:`month`.
"""
shared_observations = []
periodic_observation_names = list(Building.get_periodic_observation_metadata().keys())
for o in self.env.shared_observations:
if o in periodic_observation_names:
shared_observations += [f'{o}_cos', f'{o}_sin']
else:
shared_observations.append(o)
return shared_observations
@property
def observation_names(self) -> List[List[str]]:
"""Names of returned observations.
Includes extra three observations added during cyclic transformation of :code:`hour`, :code:`day_type` and :code:`month`.
Notes
-----
If `central_agent` is True, a list of 1 sublist containing all building observation names is returned in the same order as `buildings`.
The `shared_observations` names are only included in the first building's observation names. If `central_agent` is False, a list of sublists
is returned where each sublist is a list of 1 building's observation names and the sublist in the same order as `buildings`.
"""
if self.env.unwrapped.central_agent:
observation_names = []
for i, b in enumerate(self.env.unwrapped.buildings):
for k, _ in b.observations(normalize=True, periodic_normalization=True).items():
if i == 0 or k not in self.shared_observations or k not in observation_names:
observation_names.append(k)
else:
pass
observation_names = [observation_names]
else:
observation_names = [list(b.observations(normalize=True, periodic_normalization=True).keys()) for b in self.env.buildings]
return observation_names
@property
def observation_space(self) -> List[spaces.Box]:
"""Returns observation space for normalized observations."""
low_limit = []
high_limit = []
if self.env.unwrapped.central_agent:
shared_observations = []
for i, b in enumerate(self.env.unwrapped.buildings):
s = b.estimate_observation_space(normalize=True)
o = b.observations(normalize=True, periodic_normalization=True)
for k, lv, hv in zip(o, s.low, s.high):
if i == 0 or k not in self.shared_observations or k not in shared_observations:
low_limit.append(lv)
high_limit.append(hv)
else:
pass
if k in self.shared_observations and k not in shared_observations:
shared_observations.append(k)
else:
pass
observation_space = [spaces.Box(low=np.array(low_limit), high=np.array(high_limit), dtype=np.float32)]
else:
observation_space = [b.estimate_observation_space(normalize=True) for b in self.env.unwrapped.buildings]
return observation_space
[docs]
def observation(self, observations: List[List[float]]) -> List[List[float]]:
"""Returns normalized observations."""
if self.env.unwrapped.central_agent:
norm_observations = []
shared_observations = []
for i, b in enumerate(self.env.unwrapped.buildings):
for k, v in b.observations(normalize=True, periodic_normalization=True).items():
if i==0 or k not in self.shared_observations or k not in shared_observations:
norm_observations.append(v)
else:
pass
if k in self.shared_observations and k not in shared_observations:
shared_observations.append(k)
else:
pass
norm_observations = [norm_observations]
else:
norm_observations = [list(b.observations(normalize=True, periodic_normalization=True).values()) for b in self.env.unwrapped.buildings]
return norm_observations
[docs]
class NormalizedActionWrapper(ActionWrapper):
"""Wrapper for action min-max normalization.
All observations are min-max normalized between 0 and 1.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv) -> None:
super().__init__(env)
self.env: CityLearnEnv
@property
def action_space(self) -> List[spaces.Box]:
"""Returns action space for normalized actions."""
low_limit = []
high_limit = []
if self.env.unwrapped.central_agent:
for b in self.env.unwrapped.buildings:
low_limit += [0.0]*b.action_space.low.size
high_limit += [1.0]*b.action_space.high.size
action_space = [spaces.Box(low=np.array(low_limit), high=np.array(high_limit), dtype=np.float32)]
else:
action_space = [spaces.Box(
low=np.array([0.0]*b.action_space.low.size),
high=np.array([1.0]*b.action_space.high.size),
dtype=np.float32)
for b in self.env.unwrapped.buildings]
return action_space
[docs]
def action(self, actions: List[float]) -> List[List[float]]:
"""Returns denormalized actions."""
transformed_actions = []
for i, s in enumerate(self.env.unwrapped.action_space):
transformed_actions_ = []
for j, (l, h) in enumerate(zip(s.low, s.high)):
a = actions[i][j]*(h - l) + l
transformed_actions_.append(a)
transformed_actions.append(transformed_actions_)
return transformed_actions
[docs]
class NormalizedSpaceWrapper(Wrapper):
"""Wrapper for normalized observation and action spaces.
Wraps `env` in :py:class:`citylearn.wrappers.NormalizedObservationWrapper` and :py:class:`citylearn.wrappers.NormalizedActionWrapper`.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
env = NormalizedObservationWrapper(env)
env = NormalizedActionWrapper(env)
super().__init__(env)
self.env: CityLearnEnv
[docs]
class DiscreteObservationWrapper(ObservationWrapper):
"""Wrapper for observation space discretization.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active observation in each building.
default_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building observation.
"""
def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None):
super().__init__(env)
self.env: CityLearnEnv
assert bin_sizes is None or len(bin_sizes) == len(self.env.unwrapped.buildings), 'length of bin_size must equal number of buildings.'
self.bin_sizes = [{} for _ in self.env.unwrapped.buildings] if bin_sizes is None else bin_sizes
self.default_bin_size = 10 if default_bin_size is None else default_bin_size
self.bin_sizes = [
{o: s.get(o, self.default_bin_size) for o in b.active_observations}
for b, s in zip(self.env.unwrapped.buildings, self.bin_sizes)
]
@property
def observation_space(self) -> List[spaces.MultiDiscrete]:
"""Returns observation space for discretized observations."""
if self.env.unwrapped.central_agent:
bin_sizes = []
shared_observations = []
for i, b in enumerate(self.bin_sizes):
for k, v in b.items():
if i == 0 or k not in self.env.shared_observations or k not in shared_observations:
bin_sizes.append(v)
else:
pass
if k in self.env.shared_observations and k not in shared_observations:
shared_observations.append(k)
else:
pass
observation_space = [spaces.MultiDiscrete(bin_sizes)]
else:
observation_space = [spaces.MultiDiscrete(list(b.values())) for b in self.bin_sizes]
return observation_space
[docs]
def observation(self, observations: List[List[float]]) -> np.ndarray:
"""Returns discretized observations."""
transformed_observations = []
for i, (cs, ds) in enumerate(zip(self.env.unwrapped.observation_space, self.observation_space)):
transformed_observations_ = []
for j, (ll, hl, b) in enumerate(zip(cs.low, cs.high, ds)):
o = np.digitize(observations[i][j], np.linspace(ll, hl, b.n), right=True)
transformed_observations_.append(o)
transformed_observations.append(transformed_observations_)
return transformed_observations
[docs]
class DiscreteActionWrapper(ActionWrapper):
"""Wrapper for action space discretization.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active action in each building.
default_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building action.
"""
def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None):
super().__init__(env)
self.env: CityLearnEnv
assert bin_sizes is None or len(bin_sizes) == len(self.env.unwrapped.buildings), 'length of bin_size must equal number of buildings.'
self.bin_sizes = [{} for _ in self.env.unwrapped.buildings] if bin_sizes is None else bin_sizes
self.default_bin_size = 10 if default_bin_size is None else default_bin_size
self.bin_sizes = [
{a: s.get(a, self.default_bin_size) for a in b.active_actions}
for b, s in zip(self.env.unwrapped.buildings, self.bin_sizes)
]
@property
def action_space(self) -> List[spaces.MultiDiscrete]:
"""Returns action space for discretized actions."""
if self.env.unwrapped.central_agent:
bin_sizes = []
for b in self.bin_sizes:
for _, v in b.items():
bin_sizes.append(v)
action_space = [spaces.MultiDiscrete(bin_sizes)]
else:
action_space = [spaces.MultiDiscrete(list(b.values())) for b in self.bin_sizes]
return action_space
[docs]
def action(self, actions: List[float]) -> List[List[float]]:
"""Returns undiscretized actions."""
transformed_actions = []
for i, (cs, ds) in enumerate(zip(self.env.unwrapped.action_space, self.action_space)):
transformed_actions_ = []
for j, (ll, hl, b) in enumerate(zip(cs.low, cs.high, ds)):
a = np.linspace(ll, hl, b.n)[actions[i][j]]
transformed_actions_.append(a)
transformed_actions.append(transformed_actions_)
return transformed_actions
[docs]
class DiscreteSpaceWrapper(Wrapper):
"""Wrapper for observation and action spaces discretization.
Wraps `env` in :py:class:`citylearn.wrappers.DiscreteObservationWrapper` and :py:class:`citylearn.wrappers.DiscreteActionWrapper`.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
observation_bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active observation in each building.
action_bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active action in each building.
default_observation_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building observation.
default_action_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building action.
"""
def __init__(self, env: CityLearnEnv, observation_bin_sizes: List[Mapping[str, int]] = None, action_bin_sizes: List[Mapping[str, int]] = None, default_observation_bin_size: int = None, default_action_bin_size: int = None):
env = DiscreteObservationWrapper(env, bin_sizes=observation_bin_sizes, default_bin_size=default_observation_bin_size)
env = DiscreteActionWrapper(env, bin_sizes=action_bin_sizes, default_bin_size=default_action_bin_size)
super().__init__(env)
self.env: CityLearnEnv
[docs]
class TabularQLearningObservationWrapper(ObservationWrapper):
"""Observation wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent.
Wraps `env` in :py:class:`citylearn.wrappers.DiscreteObservationWrapper`.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active observation in each building.
default_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building observation.
"""
def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None) -> None:
env = DiscreteObservationWrapper(env, bin_sizes=bin_sizes, default_bin_size=default_bin_size)
super().__init__(env)
self.env: CityLearnEnv
self.combinations = self.set_combinations()
@property
def observation_space(self) -> List[spaces.Discrete]:
"""Returns observation space for discretized observations."""
observation_space = []
for c in self.combinations:
observation_space.append(spaces.Discrete(len(c) - 1))
return observation_space
[docs]
def observation(self, observations: List[List[int]]) -> List[List[int]]:
"""Returns discretized observations."""
return [[c.index(tuple(o))] for o, c in zip(observations, self.combinations)]
[docs]
def set_combinations(self) -> List[List[int]]:
"""Returns all combinations of discrete observations."""
combs_list = []
for s in self.env.observation_space:
options = [list(range(d.n + 1)) for d in s]
combs = list(itertools.product(*options))
combs_list.append(combs)
return combs_list
[docs]
class TabularQLearningActionWrapper(ActionWrapper):
"""Action wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent.
Wraps `env` in :py:class:`citylearn.wrappers.DiscreteActionWrapper`.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active action in each building.
default_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building action.
"""
def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None) -> None:
env = DiscreteActionWrapper(env, bin_sizes=bin_sizes, default_bin_size=default_bin_size)
super().__init__(env)
self.env: CityLearnEnv
self.combinations = self.set_combinations()
@property
def action_space(self) -> List[spaces.Discrete]:
"""Returns action space for discretized actions."""
action_space = []
for c in self.combinations:
action_space.append(spaces.Discrete(len(c)))
return action_space
[docs]
def action(self, actions: List[float]) -> List[List[int]]:
"""Returns discretized actions."""
return [list(c[a[0]]) for a, c in zip(actions, self.combinations)]
[docs]
def set_combinations(self) -> List[List[int]]:
"""Returns all combinations of discrete actions."""
combs_list = []
for s in self.env.action_space:
options = [list(range(d.n)) for d in s]
combs = list(itertools.product(*options))
combs_list.append(combs)
return combs_list
[docs]
class TabularQLearningWrapper(Wrapper):
"""Wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent.
Discretizes observation and action spaces.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
observation_bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active observation in each building.
action_bin_sizes: List[Mapping[str, int]], optional
Then number of bins for each active action in each building.
default_observation_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building observation.
default_action_bin_size: int, default = 10
The default number of bins if `bin_sizes` is unspecified for any active building action.
"""
def __init__(self, env: CityLearnEnv, observation_bin_sizes: List[Mapping[str, int]] = None, action_bin_sizes: List[Mapping[str, int]] = None, default_observation_bin_size: int = None, default_action_bin_size: int = None):
env = TabularQLearningObservationWrapper(env, bin_sizes=observation_bin_sizes, default_bin_size=default_observation_bin_size)
env = TabularQLearningActionWrapper(env, bin_sizes=action_bin_sizes, default_bin_size=default_action_bin_size)
super().__init__(env)
self.env: CityLearnEnv
[docs]
class StableBaselines3ObservationWrapper(ObservationWrapper):
"""Observation wrapper for :code:`stable-baselines3` algorithms.
Wraps observations so that they are returned in a 1-dimensional numpy array.
This wrapper is only compatible when the environment is controlled by a central agent
i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert env.unwrapped.central_agent, 'StableBaselines3ObservationWrapper is compatible only when env.central_agent = True.'\
' First set env.central_agent = True to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
@property
def observation_space(self) -> spaces.Box:
"""Returns single spaces Box object."""
return self.env.observation_space[0]
[docs]
def observation(self, observations: List[List[float]]) -> np.ndarray:
"""Returns observations as 1-dimensional numpy array."""
return np.array(observations[0], dtype='float32')
[docs]
class StableBaselines3ActionWrapper(ActionWrapper):
"""Action wrapper for :code:`stable-baselines3` algorithms.
Wraps actions so that they are returned in a 1-dimensional numpy array.
This wrapper is only compatible when the environment is controlled by a central agent
i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert env.unwrapped.central_agent, 'StableBaselines3ActionWrapper is compatible only when env.central_agent = True.'\
' First set env.central_agent = True to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
@property
def action_space(self) -> spaces.Box:
"""Returns single spaces Box object."""
return self.env.action_space[0]
[docs]
def action(self, actions: List[float]) -> List[List[float]]:
"""Returns actions as 1-dimensional numpy array."""
return [actions]
[docs]
class StableBaselines3RewardWrapper(RewardWrapper):
"""Reward wrapper for :code:`stable-baselines3` algorithms.
Wraps rewards so that it is returned as float value.
This wrapper is only compatible when the environment is controlled by a central agent
i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert env.unwrapped.central_agent, 'StableBaselines3RewardWrapper is compatible only when env.central_agent = True.'\
' First set env.central_agent = True to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
[docs]
def reward(self, reward: List[float]) -> float:
"""Returns reward as float value."""
return reward[0]
[docs]
class StableBaselines3Wrapper(Wrapper):
"""Wrapper for :code:`stable-baselines3` algorithms.
Wraps observations so that they are returned in a 1-dimensional numpy array.
Wraps actions so that they are returned in a 1-dimensional numpy array.
Wraps rewards so that it is returned as float value.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
env = StableBaselines3ActionWrapper(env)
env = StableBaselines3RewardWrapper(env)
env = StableBaselines3ObservationWrapper(env)
super().__init__(env)
self.env: CityLearnEnv
[docs]
class RLlibSingleAgentWrapper(StableBaselines3Wrapper):
"""Wrapper for :code:`RLlib` single-agent algorithms.
Uses the same wrapper as :code:`stable-baselines3` by wrapping
`env` in :py:class:`citylearn.wrappers.StableBaselines3Wrapper`.
Parameters
----------
env_config: Mapping[str, Any]
Dictionary providing initialization parameters for the environment.
Must contain `env_kwargs` as a key where `env_kwargs` is a `dict` used to
initialize :py:class:`citylearn.citylearn.CityLearnEnv`. Thus it must
contain all with positional arguments needed for intialization and
optionally contain optional intialization arguments. `env_config` can also
contain a `wrappers` key that is a list of :py:mod:`citylearn.wrappers`
classes to wrap :py:class:`citylearn.citylearn.CityLearnEnv` with. Wrapping
with :py:class:`citylearn.wrappers.ClippedObservationWrapper` is recommended
to avoid having the simulation terminating prematurely with an error due to
out of bound observations relative to the observation space.
Notes
-----
This wrapper is only compatible with an environment where
:py:attr:`citylearn.citylearn.central_agent`=`True` and will initialize the
environment as such, overriding any value for `central_agent` in `env_kwargs`.
"""
def __init__(self, env_config: Mapping[str, Any]):
env_kwargs = env_config['env_kwargs']
env_kwargs['central_agent'] = True
assert 'schema' in env_kwargs, 'missing schema key in env_kwargs.'
env = CityLearnEnv(**env_kwargs)
wrappers = env_config.get('wrappers')
wrappers = [] if wrappers is None else wrappers
for w in wrappers:
env = w(env)
super().__init__(env)
[docs]
class RLlibMultiAgentObservationWrapper(ObservationWrapper):
"""Observation wrapper for :code:`RLlib` multi-agent algorithms.
Wraps observation space and observations so that they are returned
as :py:class:`gymnasium.spaces.Dict` and `dict` objects respectively.
The keys in these objects correspond to the agent IDs i.e.,
policy IDs in the multi-agent.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert not env.central_agent, 'RLlibMultiAgentObservationWrapper is'\
' compatible only when env.central_agent = False.'\
' First set env.central_agent = False to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
@property
def observation_space(self) -> spaces.Dict:
"""Parses observation space into a :py:class:`gymnasium.spaces.Dict`."""
return spaces.Dict({f'agent_{i}': s for i, s in enumerate(self.env.observation_space)})
[docs]
def observation(
self, observations: List[List[float]]
) -> Mapping[str, np.ndarray]:
"""Parses observation into a dictionary."""
return {f'agent_{i}': np.array(o, dtype='float32') for i, o in enumerate(observations)}
[docs]
class RLlibMultiAgentActionWrapper(ActionWrapper):
"""Action wrapper for :code:`RLlib` multi-agent algorithms.
Wraps action space so that it is returned as :py:class:`gymnasium.spaces.Dict`.
The keys correspond to the agent IDs i.e., policy IDs in the multi-agent.
Also converts agent actions from `dict` to data structure need by
:py:meth:`citylearn.citylearn.CityLearnEnv.step`.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert not env.central_agent, 'RLlibMultiAgentActionWrapper is'\
' compatible only when env.central_agent = False.'\
' First set env.central_agent = False to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
@property
def action_space(self) -> spaces.Dict:
"""Parses action space into a :py:class:`gymnasium.spaces.Dict`."""
return spaces.Dict({f'agent_{i}': s for i, s in enumerate(self.env.action_space)})
[docs]
def action(self, actions: Mapping[str, np.ndarray]) -> List[List[float]]:
"""Parses actions into data structure for :py:meth:`citylearn.citylearn.CityLearnEnv.step`."""
return [list(v) for v in actions.values()]
[docs]
class RLlibMultiAgentRewardWrapper(RewardWrapper):
"""Action wrapper for :code:`RLlib` multi-agent algorithms.
Wraps action space so that it is returned as a `dict` mapping agent IDs to reward values.
Parameters
----------
env: CityLearnEnv
CityLearn environment.
"""
def __init__(self, env: CityLearnEnv):
assert not env.central_agent, 'RLlibMultiAgentRewardWrapper is'\
' compatible only when env.central_agent = False.'\
' First set env.central_agent = False to use this wrapper.'
super().__init__(env)
self.env: CityLearnEnv
[docs]
def reward(self, reward: List[float]) -> Mapping[str, float]:
"""Parses reward into a `dict`."""
return {f'agent_{i}': r for i, r in enumerate(reward)}
[docs]
class RLlibMultiAgentEnv(MultiAgentEnv):
"""Wrapper for :code:`RLlib` multi-agent algorithms.
Converts, observation and action spaces to :py:class:`gymnasium.spaces.Dict`.
Also converts `observations`, `actions`, `rewards`, `terminated`, and `truncated`
to dictionaries where necessary. The dictionary keys correspond to the agent IDs i.e.,
policy IDs in the multi-agent. Agent IDs are accessible through the `_agent_ids` property.
The initialized environment is a :py:class:`ray.rllib.env.MultiAgentEnv` object
and has an `env` attribute that is :py:class:`citylearn.citylearn.CityLearnEnv` object.
Parameters
----------
env_config: Mapping[str, Any]
Dictionary providing initialization parameters for the environment.
Must contain `env_kwargs` as a key where `env_kwargs` is a `dict` used to
initialize :py:class:`citylearn.citylearn.CityLearnEnv`. Thus it must
contain all with positional arguments needed for intialization and
optionally contain optional intialization arguments. `env_config` can also
contain a `wrappers` key that is a list of :py:mod:`citylearn.wrappers`
classes to wrap :py:class:`citylearn.citylearn.CityLearnEnv` with. Wrapping
with :py:class:`citylearn.wrappers.ClippedObservationWrapper` is recommended
to avoid having the simulation terminating prematurely with an error due to
out of bound observations relative to the observation space.
Notes
-----
This wrapper is only compatible with an environment where
:py:attr:`citylearn.citylearn.central_agent`=`False` and will initialize the
environment as such, overriding any value for `central_agent` in `env_kwargs`.
"""
def __init__(self, env_config: Mapping[str, Any]):
if MultiAgentEnv == Env:
raise Exception('This functionality requires you to install RLlib.'\
'You can install RLlib from pip: pip install "ray[rllib]", '\
'or for more detailed instructions please visit https://docs.ray.io/en/latest/rllib/index.html.')
else:
pass
super().__init__()
env_kwargs = env_config['env_kwargs']
env_kwargs['central_agent'] = False
assert 'schema' in env_kwargs, 'missing schema key in env_kwargs.'
env = CityLearnEnv(**env_kwargs)
wrappers = env_config.get('wrappers')
wrappers = [] if wrappers is None else wrappers
for w in wrappers:
env = w(env)
env = RLlibMultiAgentActionWrapper(env)
env = RLlibMultiAgentObservationWrapper(env)
env = RLlibMultiAgentRewardWrapper(env)
self.env: CityLearnEnv = env
self._agent_ids = [f'agent_{i}' for i in range(len(self.buildings))]
self.observation_space: spaces.Dict = self.env.observation_space
self.action_space: spaces.Dict = self.env.action_space
@property
def time_step(self) -> int:
"""Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.time_step`."""
return self.env.unwrapped.time_step
@property
def buildings(self) -> List[Building]:
"""Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.buildings`."""
return self.env.unwrapped.buildings
@property
def terminated(self) -> bool:
"""Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.terminated`."""
return self.env.unwrapped.terminated
[docs]
def step(
self, action_dict: Mapping[str, np.ndarray]
) -> Tuple[Mapping[str, np.ndarray], Mapping[str, float], Mapping[str, bool], Mapping[str, bool], Mapping[str, dict]]:
"""Calls :py:meth:`citylearn.citylearn.CityLearnEnv.step` and parses returned values into dictionaries."""
observations, reward, terminated, truncated, info = self.env.step(action_dict)
terminated = {'__all__': terminated, **{a: terminated for a in self._agent_ids}}
truncated = {'__all__': truncated, **{a: truncated for a in self._agent_ids}}
info = {a: info for a in self._agent_ids}
return observations, reward, terminated, truncated, info
[docs]
def evaluate(self, **kwargs) -> pd.DataFrame:
"""Convenience method for :py:meth:`citylearn.citylearn.CityLearnEnv.evaluate`."""
return self.env.unwrapped.evaluate(**kwargs)
[docs]
def reset(self, *, seed: int = None, options: Mapping[str, Any] = None) -> Tuple[Mapping[str, np.ndarray], Mapping[str, dict]]:
"""Calls :py:meth:`citylearn.citylearn.CityLearnEnv.reset` and parses returned values into dictionaries."""
observations, info = self.env.reset(seed=seed, options=options)
info = {a: info for a in self._agent_ids}
return observations, info