[docs]classAgent(Environment):r"""Base agent class. Parameters ---------- env : CityLearnEnv CityLearn environment. Other Parameters ---------------- **kwargs : dict Other keyword arguments used to initialize super class. """def__init__(self,env:CityLearnEnv,**kwargs:Any):self.env=envself.observation_names=self.env.observation_namesself.action_names=self.env.unwrapped.action_namesself.observation_space=self.env.observation_spaceself.action_space=self.env.action_spaceself.episode_time_steps=self.env.unwrapped.time_stepsself.building_metadata=self.env.unwrapped.get_metadata()['buildings']super().__init__(seconds_per_time_step=self.env.unwrapped.seconds_per_time_step,random_seed=self.env.unwrapped.random_seed,episode_tracker=self.env.unwrapped.episode_tracker,)self.reset()@propertydefenv(self)->CityLearnEnv:"""CityLearn environment."""returnself.__env@propertydefobservation_names(self)->List[List[str]]:"""Names of active observations that can be used to map observation values."""returnself.__observation_names@propertydefaction_names(self)->List[List[str]]:"""Names of active actions that can be used to map action values."""returnself.__action_names@propertydefobservation_space(self)->List[spaces.Box]:"""Format of valid observations."""returnself.__observation_space@propertydefaction_space(self)->List[spaces.Box]:"""Format of valid actions."""returnself.__action_space@propertydefepisode_time_steps(self)->int:returnself.__episode_time_steps@propertydefbuilding_metadata(self)->List[Mapping[str,Any]]:"""Building(s) metadata."""returnself.__building_metadata@propertydefaction_dimension(self)->List[int]:"""Number of returned actions."""return[s.shape[0]forsinself.action_space]@propertydefactions(self)->List[List[List[Any]]]:"""Action history/time series."""returnself.__actions@env.setterdefenv(self,env:CityLearnEnv):self.__env=env@observation_names.setterdefobservation_names(self,observation_names:List[List[str]]):self.__observation_names=observation_names@action_names.setterdefaction_names(self,action_names:List[List[str]]):self.__action_names=action_names@observation_space.setterdefobservation_space(self,observation_space:List[spaces.Box]):self.__observation_space=observation_space@action_space.setterdefaction_space(self,action_space:List[spaces.Box]):self.__action_space=action_space@episode_time_steps.setterdefepisode_time_steps(self,episode_time_steps:int):"""Number of time steps in one episode."""self.__episode_time_steps=episode_time_steps@building_metadata.setterdefbuilding_metadata(self,building_metadata:List[Mapping[str,Any]]):self.__building_metadata=building_metadata@actions.setterdefactions(self,actions:List[List[Any]]):foriinrange(len(self.action_space)):self.__actions[i][self.time_step]=actions[i]
[docs]deflearn(self,episodes:int=None,deterministic:bool=None,deterministic_finish:bool=None,logging_level:int=None):"""Train agent. Parameters ---------- episodes: int, default: 1 Number of training episode >= 1. deterministic: bool, default: False Indicator to take deterministic actions i.e. strictly exploit the learned policy. deterministic_finish: bool, default: False Indicator to take deterministic actions in the final episode. logging_level: int, default: 30 Logging level where increasing the number silences lower level information. """episodes=1ifepisodesisNoneelseepisodesdeterministic_finish=Falseifdeterministic_finishisNoneelsedeterministic_finishdeterministic=FalseifdeterministicisNoneelsedeterministicself.__set_logger(logging_level)forepisodeinrange(episodes):deterministic=deterministicor(deterministic_finishandepisode>=episodes-1)observations,_=self.env.reset()self.episode_time_steps=self.episode_tracker.episode_time_stepsterminated=Falsetime_step=0rewards_list=[]whilenotterminated:actions=self.predict(observations,deterministic=deterministic)# apply actions to citylearn_envnext_observations,rewards,terminated,truncated,_=self.env.step(actions)rewards_list.append(rewards)# updateifnotdeterministic:self.update(observations,actions,rewards,next_observations,terminated=terminated,truncated=truncated)else:passobservations=[oforoinnext_observations]logging.debug(f'Time step: {time_step+1}/{self.episode_time_steps},'\
f' Episode: {episode+1}/{episodes},'\
f' Actions: {actions},'\
f' Rewards: {rewards}')time_step+=1rewards=np.array(rewards_list,dtype='float')rewards_summary={'min':rewards.min(axis=0),'max':rewards.max(axis=0),'sum':rewards.sum(axis=0),'mean':rewards.mean(axis=0)}logging.info(f'Completed episode: {episode+1}/{episodes}, Reward: {rewards_summary}')
[docs]defpredict(self,observations:List[List[float]],deterministic:bool=None)->List[List[float]]:"""Provide actions for current time step. Return randomly sampled actions from `action_space`. Parameters ---------- observations: List[List[float]] Environment observations deterministic: bool, default: False Wether to return purely exploitatative deterministic actions. Returns ------- actions: List[List[float]] Action values """actions=[list(s.sample())forsinself.action_space]self.actions=actionsself.next_time_step()returnactions
def__set_logger(self,logging_level:int=None):"""Set logging level."""logging_level=30iflogging_levelisNoneelselogging_levelassertlogging_level>=0,'logging_level must be >= 0'LOGGER.setLevel(logging_level)
[docs]defupdate(self,*args,**kwargs):"""Update replay buffer and networks. Notes ----- This implementation does nothing but is kept to keep the API for all agents similar during simulation. """pass
[docs]classBaselineAgent(Agent):r"""Agent class for business-as-usual simulation where the storage systems and heat pumps are not controlled. This agent will provide results for when there is no storage for load shifting and no heat pump partial load. The storage actions prescribed will be 0.0 and the heat pump will have no action, i.e. `None`, causing it to deliver the ideal load in the building time series files. To ensure that the environment does not expect non-zero and non-null actions, the buildings in the parsed `env` will be set to have no active actions. This means that you must initialize a new `env` if you want to simulate with a new agent type. This agent class is best used to establish a baseline simulation that can then be compared to RBC, RLC, or MPC control algorithms. Parameters ---------- env : CityLearnEnv CityLearn environment. Other Parameters ---------------- **kwargs : dict Other keyword arguments used to initialize super class. """def__init__(self,env:CityLearnEnv,**kwargs:Any):super().__init__(env,**kwargs)@Agent.env.setterdefenv(self,env:CityLearnEnv):Agent.env.fset(self,self.__deactivate_actions(env))def__deactivate_actions(self,env:CityLearnEnv)->CityLearnEnv:forbinenv.unwrapped.buildings:forainb.action_metadata:b.action_metadata[a]=Falseb.action_space=b.estimate_action_space()b.observation_space=b.estimate_observation_space()returnenv