Source code for xuance.environment.multi_agent_env.football

"""
Football Benchmarks:
    - 11_vs_11_stochastic: A full 90 minutes football game (medium difficulty)
    - 11_vs_11_easy_stochastic: A full 90 minutes football game (easy difficulty)
    - 11_vs_11_hard_stochastic: A full 90 minutes football game (hard difficulty)

Football Academy - with a total of 11 scenarios
    - academy_empty_goal_close: Our player starts inside the box with the ball, and needs to score against an empty goal.
    - academy_empty_goal: Our player starts in the middle of the field with the ball, and needs to score against an empty goal.
    - academy_run_to_score: Our player starts in the middle of the field with the ball, and needs to score against an empty goal. Five opponent players chase ours from behind.
    - academy_run_to_score_with_keeper: Our player starts in the middle of the field with the ball, and needs to score against a keeper. Five opponent players chase ours from behind.
    - academy_pass_and_shoot_with_keeper: Two of our players try to score from the edge of the box, one is on the side with the ball, and next to a defender. The other is at the center, unmarked, and facing the opponent keeper.
    - academy_run_pass_and_shoot_with_keeper: Two of our players try to score from the edge of the box, one is on the side with the ball, and unmarked. The other is at the center, next to a defender, and facing the opponent keeper.
    - academy_3_vs_1_with_keeper: Three of our players try to score from the edge of the box, one on each side, and the other at the center. Initially, the player at the center has the ball and is facing the defender. There is an opponent keeper.
    - academy_corner: Standard corner-kick situation, except that the corner taker can run with the ball from the corner.
    - academy_counterattack_easy: 4 versus 1 counter-attack with keeper; all the remaining players of both teams run back towards the ball.
    - academy_counterattack_hard: 4 versus 2 counter-attack with keeper; all the remaining players of both teams run back towards the ball.
    - academy_single_goal_versus_lazy: Full 11 versus 11 games, where the opponents cannot move but they can only intercept the ball if it is close enough to them. Our center back defender has the ball at first.
"""
import numpy as np
from gymnasium.spaces import Box
from xuance.environment import RawMultiAgentEnv
try:
    import gfootball.env as football_env
    from gfootball.env import _apply_output_wrappers
    from gfootball.env.football_env import FootballEnv
    from gfootball.env import config as gf_config
except ImportError:
    FootballEnv = object

GFOOTBALL_ENV_ID = {
    "1v1": "1_vs_1_easy",
    "5v5": "5_vs_5",
    "11v11_competition": "11_vs_11_competition",
    "11v11_kaggle": "11_vs_11_kaggle",
    "11v11": "11_vs_11_stochastic",
    "11v11_easy": "11_vs_11_easy_stochastic",
    "11v11_hard": "11_vs_11_hard_stochastic",
    "3v1": "academy_3_vs_1_with_keeper",
    "corner": "academy_corner",
    "ca_easy": "academy_counterattack_easy",
    "ca_hard": "academy_counterattack_hard",
    "eg": "academy_empty_goal",
    "eg_close": "academy_empty_goal_close",
    "psk": "academy_pass_and_shoot_with_keeper",
    "rpsk": "academy_run_pass_and_shoot_with_keeper",
    "rs": "academy_run_to_score",
    "rsk": "academy_run_to_score_with_keeper",
    "single_gvl": "academy_single_goal_versus_lazy",
}



[docs]
class football_raw_env(FootballEnv):
    def __init__(self, config):
        write_goal_dumps = False
        dump_frequency = 1
        extra_players = None
        other_config_options = {}
        self.env_id = GFOOTBALL_ENV_ID[config.env_id]
        if config.test:
            write_full_episode_dumps = True
            self.render = True
            write_video = True
        else:
            write_full_episode_dumps = False
            self.render = False
            write_video = False
        self.n_agents = config.num_agent

        self.env = football_env.create_environment(
            env_name=self.env_id,
            stacked=config.use_stacked_frames,
            representation=config.obs_type,
            rewards=config.rewards_type,
            write_goal_dumps=write_goal_dumps,
            write_full_episode_dumps=write_full_episode_dumps,
            render=self.render,
            write_video=write_video,
            dump_frequency=dump_frequency,
            logdir=config.videos_dir,
            extra_players=extra_players,
            number_of_left_players_agent_controls=config.num_agent,
            number_of_right_players_agent_controls=config.num_adversary,
            channel_dimensions=(config.smm_width, config.smm_height),
            other_config_options=other_config_options
        ).unwrapped

        scenario_config = gf_config.Config({'level': self.env_id}).ScenarioConfig()
        players = [('agent:left_players=%d,right_players=%d' % (config.num_agent, config.num_adversary))]

        # Enable MultiAgentToSingleAgent wrapper?
        if scenario_config.control_all_players:
            if (config.num_agent in [0, 1]) and (config.num_adversary in [0, 1]):
                players = [('agent:left_players=%d,right_players=%d' %
                            (scenario_config.controllable_left_players if config.num_agent else 0,
                             scenario_config.controllable_right_players if config.num_adversary else 0))]

        if extra_players is not None:
            players.extend(extra_players)
        config_values = {
            'dump_full_episodes': write_full_episode_dumps,
            'dump_scores': write_goal_dumps,
            'players': players,
            'level': self.env_id,
            'tracesdir': config.videos_dir,
            'write_video': write_video,
        }
        config_values.update(other_config_options)
        c = gf_config.Config(config_values)
        super(football_raw_env, self).__init__(c)


[docs]
    def reset(self):
        obs = self.env.reset()
        return obs, {}



[docs]
    def step(self, action):
        obs, reward, terminated, info = self.env.step(action)
        global_reward = np.sum(reward)
        truncated = False
        reward_n = np.array([global_reward] * self.n_agents)
        return obs, reward_n, terminated, truncated, info



[docs]
    def get_frame(self):
        original_obs = self.env._env._observation
        frame = original_obs["frame"] if self.render else []
        return frame



[docs]
    def state(self):
        def do_flatten(obj):
            """Run flatten on either python list or numpy array."""
            if type(obj) == list:
                return np.array(obj).flatten()
            elif type(obj) == int:
                return np.array([obj])
            else:
                return obj.flatten()

        original_obs = self.env._env._observation
        state = []
        for k, v in original_obs.items():
            if k == "ball_owned_team":
                if v == -1:
                    state.extend([1, 0, 0])
                elif v == 0:
                    state.extend([0, 1, 0])
                else:
                    state.extend([0, 0, 1])
            elif k == "game_mode":
                game_mode = [0] * 7
                game_mode[v] = 1
                state.extend(game_mode)
            elif k == "frame":
                pass
            else:
                state.extend(do_flatten(v))
        return state





[docs]
class GFootball_Env(RawMultiAgentEnv):
    """The wrapper of original football environment.

    Args:
        config: the SimpleNamespace variable that contains attributes to create an original env.
    """
    def __init__(self, config):
        super(GFootball_Env, self).__init__()
        env = football_raw_env(config)
        self.env = _apply_output_wrappers(env=env,
                                          rewards=config.rewards_type,
                                          representation=config.obs_type,
                                          channel_dimensions=(config.smm_width, config.smm_height),
                                          apply_single_agent_wrappers=(config.num_agent + config.num_adversary == 1),
                                          stacked=config.num_adversary)
        self.num_agents = config.num_agent
        self.agents = [f'agent_{i}' for i in range(self.num_agents)]
        self.num_adversaries = config.num_adversary
        obs_shape_i = (self.env.observation_space.shape[-1], )
        self.observation_space = {k: Box(-np.inf, np.inf, obs_shape_i) for k in self.agents}
        try:
            self.action_space = {k: self.env.action_space[i] for i, k in enumerate(self.agents)}
        except:
            self.action_space = {k: self.env.action_space for i, k in enumerate(self.agents)}
        self.max_episode_steps = config.max_episode_steps
        self._episode_step = 0
        self.env.reset()
        state_shape = self.state().shape
        self.state_space = Box(-np.inf, np.inf, state_shape)


[docs]
    def get_env_info(self):
        return {'state_space': self.state_space,
                'observation_space': self.observation_space,
                'action_space': self.action_space,
                'agents': self.agents,
                'num_agents': self.num_agents,
                'max_episode_steps': self.max_episode_steps,
                'num_adversaries': self.num_adversaries}



[docs]
    def close(self):
        """Close the environment."""
        self.env.close()



[docs]
    def render(self, *config, **kwconfig):
        """Get one-step frame."""
        return self.env.get_frame()



[docs]
    def reset(self):
        """Reset the environment."""
        obs, info = self.env.reset()
        obs_dict = {k: obs[i] for i, k in enumerate(self.agents)}
        self._episode_step = 0
        return obs_dict, info



[docs]
    def step(self, actions):
        """One-step transition of the environment.

        Args:
            actions: the actions for all agents.
        """
        actions_list = [int(actions[k]) for k in self.agents]
        obs, reward, terminated, _, info = self.env.step(actions_list)
        obs_dict = {k: obs[i] for i, k in enumerate(self.agents)}
        reward_dict = {k: reward[i] for i, k in enumerate(self.agents)}
        terminated_dict = {k: terminated for k in self.agents}
        self._episode_step += 1
        truncated = True if self._episode_step >= self.max_episode_steps else False
        return obs_dict, reward_dict, terminated_dict, truncated, info



[docs]
    def get_more_info(self, info):
        state = self.env.unwrapped.observation()
        info.update(state[0])
        info["active"] = np.array([state[i]['active'] for i in range(self.num_agents)])
        info["designated"] = np.array([state[i]["designated"] for i in range(self.num_agents)])
        info["sticky_actions"] = np.stack([state[i]["sticky_actions"] for i in range(self.num_agents)])
        return info



[docs]
    def state(self):
        """Get global state."""
        return np.array(self.env.env.state())