Source code for training_classical_control.control

from typing import Protocol

import numpy as np
from gymnasium import Env
from numpy.typing import NDArray

__all__ = [
    "FeedbackController",
    "Observer",
    "ConstantController",
    "RandomController",
    "ProportionalController",
    "FullStateFeedbackController",
    "PIDController",
]


[docs] class FeedbackController(Protocol):
[docs] def control(self, observation: NDArray) -> NDArray: ...
[docs] class Observer(Protocol):
[docs] def observe(self, measrument: NDArray) -> NDArray: ...
[docs] class ConstantController: def __init__(self, u: NDArray = np.zeros(1)) -> None: self.u = u
[docs] def act(self, observation: NDArray) -> NDArray: return self.u
[docs] class RandomController: def __init__(self, env: Env) -> None: self.action_space = env.action_space
[docs] def act(self, observation: NDArray) -> NDArray: return self.action_space.sample()
[docs] class ProportionalController: def __init__(self, K: float = 10.0) -> None: self.K = K
[docs] def act(self, observation: NDArray) -> NDArray: theta = observation[[2]] action = self.K * theta return action
[docs] class FullStateFeedbackController: def __init__(self, K: NDArray, kr: float, reference: float) -> None: self.K = K self.kr = kr self.reference = reference
[docs] def act(self, observation: NDArray) -> NDArray: return self.kr * self.reference - self.K @ observation[[2, 3]]
[docs] class PIDController: def __init__( self, Kp: float, Ki: float, Kd: float, reference: float, dt: float ) -> None: self.Kp = Kp self.Ki = Ki self.Kd = Kd self.reference = reference self.dt = dt self.accumulated_error = 0.0 self.previous_error = 0.0
[docs] def act(self, observation: NDArray) -> NDArray: error = self.reference - observation[0] self.accumulated_error += error error_difference = error - self.previous_error self.previous_error = error force = ( self.Kp * error + self.Ki * self.accumulated_error * self.dt + self.Kd * error_difference / self.dt ) return np.array([force])