Source code for xuance.common.offline_util

import numpy as np
import gymnasium as gym
try:
    import d4rl
except ImportError:
    d4rl = object

[docs] def load_d4rl_dataset(dataset_name: str, max_episode_steps, obsnorm=False, rewnorm=True): # create environment env = gym.make(dataset_name) dataset = d4rl.qlearning_dataset(env) if obsnorm: state_mean, state_std = compute_mean_std(dataset["observations"], eps=1e-3) dataset["observations"] = normalize_states( dataset["observations"], state_mean, state_std ) dataset["next_observations"] = normalize_states( dataset["next_observations"], state_mean, state_std ) else: state_mean = 0.0 state_std = 1.0 if rewnorm: if any(s in dataset_name for s in ('halfcheetah', 'hopper', 'walker2d')): min_ret, max_ret = return_range(dataset, max_episode_steps) dataset['rewards'] /= (max_ret - min_ret) dataset['rewards'] *= max_episode_steps elif 'antmaze' in dataset_name: dataset['rewards'] -= 1. return dataset, state_mean, state_std
[docs] def compute_mean_std(states: np.ndarray, eps: float): mean = states.mean(0) std = states.std(0) + eps return mean, std
[docs] def normalize_states(states: np.ndarray, mean: np.ndarray, std: np.ndarray): return (states - mean) / std
[docs] def return_range(dataset, max_episode_steps): returns, lengths = [], [] ep_ret, ep_len = 0., 0 for r, d in zip(dataset['rewards'], dataset['terminals']): ep_ret += float(r) ep_len += 1 if d or ep_len == max_episode_steps: returns.append(ep_ret) lengths.append(ep_len) ep_ret, ep_len = 0., 0 # returns.append(ep_ret) # incomplete trajectory lengths.append(ep_len) # but still keep track of number of steps assert sum(lengths) == len(dataset['rewards']) return min(returns), max(returns)