Source code for training_classical_control.control
from typing import Protocol
import numpy as np
from gymnasium import Env
from numpy.typing import NDArray
__all__ = [
"FeedbackController",
"Observer",
"ConstantController",
"RandomController",
"ProportionalController",
"FullStateFeedbackController",
"PIDController",
]
[docs]
class FeedbackController(Protocol):
[docs]
def control(self, observation: NDArray) -> NDArray:
...
[docs]
class Observer(Protocol):
[docs]
def observe(self, measrument: NDArray) -> NDArray:
...
[docs]
class ConstantController:
def __init__(self, u: NDArray = np.zeros(1)) -> None:
self.u = u
[docs]
def act(self, observation: NDArray) -> NDArray:
return self.u
[docs]
class RandomController:
def __init__(self, env: Env) -> None:
self.action_space = env.action_space
[docs]
def act(self, observation: NDArray) -> NDArray:
return self.action_space.sample()
[docs]
class ProportionalController:
def __init__(self, K: float = 10.0) -> None:
self.K = K
[docs]
def act(self, observation: NDArray) -> NDArray:
theta = observation[[2]]
action = self.K * theta
return action
[docs]
class FullStateFeedbackController:
def __init__(self, K: NDArray, kr: float, reference: float) -> None:
self.K = K
self.kr = kr
self.reference = reference
[docs]
def act(self, observation: NDArray) -> NDArray:
return self.kr * self.reference - self.K @ observation[[2, 3]]
[docs]
class PIDController:
def __init__(
self, Kp: float, Ki: float, Kd: float, reference: float, dt: float
) -> None:
self.Kp = Kp
self.Ki = Ki
self.Kd = Kd
self.reference = reference
self.dt = dt
self.accumulated_error = 0.0
self.previous_error = 0.0
[docs]
def act(self, observation: NDArray) -> NDArray:
error = self.reference - observation[0]
self.accumulated_error += error
error_difference = error - self.previous_error
self.previous_error = error
force = (
self.Kp * error
+ self.Ki * self.accumulated_error * self.dt
+ self.Kd * error_difference / self.dt
)
return np.array([force])