Source code for training_ml_control.environments.cart

"""
Original code taken from:
https://github.com/Farama-Foundation/Gymnasium/blob/f26cbe13e9ac20d43486032b7e9dd4b8f2c563dc/gymnasium/envs/classic_control/cartpole.py

MIT License:
https://github.com/Farama-Foundation/Gymnasium/blob/f26cbe13e9ac20d43486032b7e9dd4b8f2c563dc/LICENSE
"""
import logging
import math

import gymnasium as gym
import numpy as np
from gymnasium import spaces
from gymnasium.envs.classic_control.continuous_mountain_car import (
    Continuous_MountainCarEnv,
)
from gymnasium.error import DependencyNotInstalled
from numpy.typing import NDArray

__all__ = ["CartEnv"]

logger = logging.getLogger(__name__)



[docs]
class CartEnv(Continuous_MountainCarEnv):
    r"""The cart, or double-integarator, problem is based on the classic problem
    in control theory. It is a simple cart that can move without friction
    to the left or to the right.

    $$
    \begin{array}{ll}
    \ddot {q} &= u(t)\\
    y &= q(t)
    \end{array}
    $$

    where $\displaystyle q(t),u(t)\in \mathbb {R}$.

    This class is a modified version of the `Continuous_MountainCarEnv`
    environment from gymnasium that modifies that environment to be flat.
    """

    def __init__(
        self,
        render_mode: str | None = None,
        *,
        goal_velocity: float = 5,
        max_position: float = 200,
        max_speed: float = 10,
        max_force: float = 10,
        goal_position: float = 9.0,
    ):
        self.min_position = -max_position
        self.max_position = max_position
        self.min_speed = -max_speed
        self.max_speed = max_speed
        self.min_action = -max_force
        self.max_action = max_force
        if abs(goal_position) >= max_position:
            raise ValueError(
                "Goal position should be smaller in magnitude than max position."
            )
        self.goal_position = goal_position
        self.goal_velocity = goal_velocity
        self.dt = 1 / self.metadata["render_fps"]

        self.low_state = np.array([self.min_position, self.min_speed], dtype=np.float32)
        self.high_state = np.array(
            [self.max_position, self.max_speed], dtype=np.float32
        )

        self.render_mode = render_mode

        self.screen_width = 600
        self.screen_height = 400
        self.screen = None
        self.clock = None
        self.isopen = True

        self.action_space = spaces.Box(
            low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
        )
        self.observation_space = spaces.Box(
            low=self.low_state, high=self.high_state, dtype=np.float32
        )


[docs]
    def _height(self, xs: NDArray) -> NDArray:
        # Constant height
        return np.ones_like(xs) * 0.55



[docs]
    def step(self, action: NDArray) -> tuple[NDArray, float, bool, bool, dict]:
        position = self.state[0]
        velocity = self.state[1]
        force = min(max(action[0], self.min_action), self.max_action)

        velocity += force * self.dt
        if velocity > self.max_speed:
            velocity = self.max_speed
        if velocity < self.min_speed:
            velocity = self.min_speed
        position += velocity * self.dt
        if position > self.max_position:
            position = self.max_position
            velocity = 0
        if position < self.min_position:
            position = self.min_position
            velocity = 0

        # Convert a possible numpy bool to a Python bool.
        terminated = bool(
            abs(position) >= abs(self.goal_position)
            and abs(velocity) >= self.goal_velocity
        )

        reward = 0
        if terminated:
            reward = 100.0
        reward -= math.pow(action[0], 2) * 0.1

        self.state = np.array([position, velocity], dtype=np.float32)

        if self.render_mode == "human":
            self.render()
        return self.state, reward, terminated, False, {}



[docs]
    def render(self):
        if self.render_mode is None:
            assert self.spec is not None
            gym.logger.warn(
                "You are calling render method without specifying any render mode. "
                "You can specify the render_mode at initialization, "
                f'e.g. gym.make("{self.spec.id}", render_mode="rgb_array")'
            )
            return

        try:
            import pygame
            from pygame import gfxdraw
        except ImportError as e:
            raise DependencyNotInstalled(
                "pygame is not installed, run `pip install gymnasium[classic-control]`"
            ) from e

        if self.screen is None:
            pygame.init()
            if self.render_mode == "human":
                pygame.display.init()
                self.screen = pygame.display.set_mode(
                    (self.screen_width, self.screen_height)
                )
            else:  # mode == "rgb_array":
                self.screen = pygame.Surface((self.screen_width, self.screen_height))
        if self.clock is None:
            self.clock = pygame.time.Clock()

        world_width = self.max_position - self.min_position
        scale = self.screen_width / world_width
        carwidth = 40
        carheight = 20

        self.surf = pygame.Surface((self.screen_width, self.screen_height))
        self.surf.fill((255, 255, 255))

        pos = self.state[0]

        xs = np.linspace(self.min_position, self.max_position, 100)
        ys = self._height(xs)
        xys = list(zip((xs - self.min_position) * scale, ys * scale))

        pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))

        clearance = 10

        l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
        coords = []
        for c in [(l, b), (l, t), (r, t), (r, b)]:
            c = pygame.math.Vector2(c)
            coords.append(
                (
                    c[0] + (pos - self.min_position) * scale,
                    c[1] + clearance + self._height(pos) * scale,
                )
            )

        gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
        gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))

        for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
            c = pygame.math.Vector2(c)
            wheel = (
                int(c[0] + (pos - self.min_position) * scale),
                int(c[1] + clearance + self._height(pos) * scale),
            )

            gfxdraw.aacircle(
                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
            )
            gfxdraw.filled_circle(
                self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
            )

        flagx = int((self.goal_position - self.min_position) * scale)
        flagy1 = int(self._height(self.goal_position) * scale)
        flagy2 = flagy1 + 50
        gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))

        gfxdraw.aapolygon(
            self.surf,
            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
            (204, 204, 0),
        )
        gfxdraw.filled_polygon(
            self.surf,
            [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
            (204, 204, 0),
        )

        self.surf = pygame.transform.flip(self.surf, False, True)
        self.screen.blit(self.surf, (0, 0))
        if self.render_mode == "human":
            pygame.event.pump()
            self.clock.tick(self.metadata["render_fps"])
            pygame.display.flip()

        elif self.render_mode == "rgb_array":
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
            )