-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
215 lines (187 loc) · 8.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
from baseline.gain_scheduled import GainScheduled
from baseline.cascaded_PD import CascadedPD
from baseline.eval_performance import ISE, ITSE
from flying_sim.trajectory import Trajectory
from flying_sim.drone import Drone
from flying_sim.configs.config import Config
from stable_baselines3.ppo.ppo import PPO
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
def main():
config = Config()
# Configure reference trajectory
trajectory = Trajectory(config)
tf, f_sref, f_uref = trajectory.interp_trajectory(load_file=config.trajectory_config.training_files[0])
# Configure drone
planar_quad_1 = Drone(config)
planar_quad_2 = Drone(config)
planar_quad_3 = Drone(config)
# Configure controllers
gain_scheduled = GainScheduled(config, planar_quad_1)
PD_cascaded = CascadedPD(config, planar_quad_2)
PD_cascaded_RL = CascadedPD(config, planar_quad_3)
RL_control = PPO.load(config.training.model_dir + 'model-v0')
states_1 = []
states_2 = []
states_3 = []
control_1 = []
control_2 = []
control_3 = []
actions = []
dt = 0.01
t = [0]
while t[-1] < 12:
# Time step for gain schedule controlled drone
x_nom, u_nom = f_sref(t[-1]), f_uref(t[-1])
control = gain_scheduled.policy(x_nom, u_nom)
planar_quad_1.step_RK4(control, dt)
control_1.append(planar_quad_1.clip_control(control, dt))
# Time step for PD cascade controlled drone
x_ref = f_sref(t[-1])
control = PD_cascaded.policy(x_ref)
planar_quad_2.step_RK4(control, dt)
control_2.append(planar_quad_2.clip_control(control, dt))
# Time step for RL controlled drone
obs = PD_cascaded_RL.return_errors(f_sref(t[-1])[0], f_sref(t[-1])[1])
action = RL_control.predict(obs, deterministic=True)
gains = PD_cascaded_RL.configure_gains(action[0])
control = PD_cascaded_RL.policy(x_ref)
planar_quad_3.step_RK4(control, dt)
control_3.append(planar_quad_3.clip_control(control, dt))
actions.append(gains)
# Log states and time
states_1.append(planar_quad_1.state.copy())
states_2.append(planar_quad_2.state.copy())
states_3.append(planar_quad_3.state.copy())
t.append(t[-1]+dt)
# Plot logged results
t = np.array(t[:-1])
states_1 = np.array(states_1)
states_2 = np.array(states_2)
states_3 = np.array(states_3)
control_1 = np.array(control_1)
control_2 = np.array(control_2)
control_3 = np.array(control_3)
actions = np.array(actions)
x_ref = f_sref(t)
u_ref = f_uref(t)
# Evaluate and plot tracking trajectories
ISE_1 = ISE(states_1, x_ref, dt)
ISE_2 = ISE(states_2, x_ref, dt)
ISE_3 = ISE(states_3, x_ref, dt)
ITSE_1 = ITSE(states_1, x_ref, dt)
ITSE_2 = ITSE(states_2, x_ref, dt)
ITSE_3 = ITSE(states_3, x_ref, dt)
print('========================= Evaluation Metric =========================')
print(f'The gain scheduled controller has ISE of {ISE_1}')
print(f'The cascaded PD controller has ISE of {ISE_2}')
print(f'The RL controller has ISE of {ISE_3}')
print(f'The gain scheduled controller has ITSE of {ITSE_1}')
print(f'The cascaded PD controller has ITSE of {ITSE_2}')
print(f'The RL controller has ITSE of {ITSE_3}')
# State Trajectories
plt.figure('State Trajectories')
plt.plot(x_ref[:, 0], x_ref[:, 1], 'r-', label='Reference Trajectory')
# plt.plot(states_1[:, 0], states_1[:, 1], 'g+', label='LQR Gain Scheduled Drone')
plt.plot(states_2[:, 0], states_2[:, 1], 'b-', label='Baseline Controller')
plt.plot(states_3[:, 0], states_3[:, 1], 'y-', label='RL Controller')
plt.xlabel('x-position [m]')
plt.ylabel('y-position [m]')
plt.grid()
plt.legend()
# State Time Trajectories
fig, (ax1, ax2) = plt.subplots(2, 1)
ax1.plot(t, x_ref[:, 0], 'r-', label='Reference Trajectory')
# ax1.plot(t, states_1[:, 0], 'g-', label='LQR Gain Scheduled Drone')
ax1.plot(t, states_2[:, 0], 'b-', label='Baseline Controller')
ax1.plot(t, states_3[:, 0], 'y-', label='RL Controller')
ax1.set_ylabel('x-position [m]')
ax1.grid()
ax1.legend()
ax2.plot(t, x_ref[:, 1], 'r-')
# ax2.plot(t, states_1[:, 1], 'g-')
ax2.plot(t, states_2[:, 1], 'b-')
ax2.plot(t, states_3[:, 1], 'y-')
ax2.set_ylabel('y-position [m]')
ax2.set_xlabel('time [s]')
ax2.grid()
# Input Time Trajectories
fig, (ax1, ax2) = plt.subplots(2, 1)
ax1.plot(t, u_ref[:, 0], 'r-', label='Reference Input')
# ax1.plot(t, control_1[:, 0], 'g-', label='LQR Gain Scheduled Drone')
ax1.plot(t, control_2[:, 0], 'b-', label='Baseline Controller')
ax1.plot(t, control_3[:, 0], 'y-', label='RL Controller')
ax1.legend()
ax2.plot(t, u_ref[:, 1], 'r-')
# ax2.plot(t, control_1[:, 1], 'g-')
ax2.plot(t, control_2[:, 1], 'b-')
ax2.plot(t, control_3[:, 1], 'y-')
# Gain Time Trajectories
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(2, 3)
ax1.plot(t, config.RL_scheduled_config.lower_gains[0] * np.ones(len(t)), 'r-')
ax1.text(0.5, config.RL_scheduled_config.lower_gains[0], 'Lower Limit', verticalalignment='bottom')
ax1.plot(t, config.RL_scheduled_config.upper_gains[0] * np.ones(len(t)), 'r-')
ax1.text(0.5, config.RL_scheduled_config.upper_gains[0], 'Upper Limit', verticalalignment='top')
ax1.plot(t, config.cascaded_PD.Kp_x * np.ones(len(t)), 'b-', label='Manual gain')
ax1.plot(t, actions[:, 0], 'g-', label='RL gain')
ax1.set_title(r'$Kp_x$')
ax1.set_ylabel('Value [-]')
ax1.set_xlim([0, 12])
ax1.grid()
ax2.plot(t, config.RL_scheduled_config.lower_gains[1] * np.ones(len(t)), 'r-', label='Lower Limit')
ax2.text(0.5, config.RL_scheduled_config.lower_gains[1], 'Lower Limit', verticalalignment='bottom')
ax2.plot(t, config.RL_scheduled_config.upper_gains[1] * np.ones(len(t)), 'r-', label='Upper Limit')
ax2.text(0.5, config.RL_scheduled_config.upper_gains[1], 'Upper Limit', verticalalignment='top')
ax2.plot(t, config.cascaded_PD.Kp_vx * np.ones(len(t)), 'b-', label='Manual gain')
ax2.plot(t, actions[:, 1], 'g-', label='RL gain')
ax2.set_title(r'$Kp_{V_x}$')
ax2.set_xlim([0, 12])
ax2.grid()
ax3.plot(t, config.RL_scheduled_config.lower_gains[2] * np.ones(len(t)), 'r-')
ax3.text(0.5, config.RL_scheduled_config.lower_gains[2], 'Lower Limit', verticalalignment='bottom')
ax3.plot(t, config.RL_scheduled_config.upper_gains[2] * np.ones(len(t)), 'r-')
ax3.text(0.5, config.RL_scheduled_config.upper_gains[2], 'Upper Limit', verticalalignment='top')
ax3.plot(t, config.cascaded_PD.Kp_theta * np.ones(len(t)), 'b-', label='Manual gain')
ax3.plot(t, actions[:, 2], 'g-', label='RL gain')
ax3.set_title(r'$Kp_{\theta}$')
ax3.legend(loc='upper right', fontsize=8)
ax3.set_xlim([0, 12])
ax3.grid()
ax4.plot(t, config.RL_scheduled_config.lower_gains[3] * np.ones(len(t)), 'r-', label='Lower Limit')
ax4.text(0.5, config.RL_scheduled_config.lower_gains[3], 'Lower Limit', verticalalignment='bottom')
ax4.plot(t, config.RL_scheduled_config.upper_gains[3] * np.ones(len(t)), 'r-', label='Upper Limit')
ax4.text(0.5, config.RL_scheduled_config.upper_gains[3], 'Upper Limit', verticalalignment='top')
ax4.plot(t, config.cascaded_PD.Kp_omega * np.ones(len(t)), 'b-', label='Manual gain')
ax4.plot(t, actions[:, 3], 'g-', label='RL gain')
ax4.set_title(r'$Kp_{\omega}$')
ax4.set_xlabel('Time [s]')
ax4.set_ylabel('Value [-]')
ax4.set_xlim([0, 12])
ax4.grid()
ax5.plot(t, config.RL_scheduled_config.lower_gains[4] * np.ones(len(t)), 'r-', label='Lower Limit')
ax5.text(0.5, config.RL_scheduled_config.lower_gains[4], 'Lower Limit', verticalalignment='bottom')
ax5.plot(t, config.RL_scheduled_config.upper_gains[4] * np.ones(len(t)), 'r-', label='Upper Limit')
ax5.text(0.5, config.RL_scheduled_config.upper_gains[4], 'Upper Limit', verticalalignment='top')
ax5.plot(t, config.cascaded_PD.Kp_y * np.ones(len(t)), 'b-', label='Manual gain')
ax5.plot(t, actions[:, 4], 'g-', label='RL gain')
ax5.set_title(r'$Kp_y$')
ax5.set_xlabel('Time [s]')
ax5.set_xlim([0, 12])
ax5.grid()
ax6.plot(t, config.RL_scheduled_config.lower_gains[5] * np.ones(len(t)), 'r-', label='Lower Limit')
ax6.text(0.5, config.RL_scheduled_config.lower_gains[5], 'Lower Limit', verticalalignment='bottom')
ax6.plot(t, config.RL_scheduled_config.upper_gains[5] * np.ones(len(t)), 'r-', label='Upper Limit')
ax6.text(0.5, config.RL_scheduled_config.upper_gains[5], 'Upper Limit', verticalalignment='top')
ax6.plot(t, config.cascaded_PD.Kp_vy * np.ones(len(t)), 'b-', label='Manual gain')
ax6.plot(t, actions[:, 5], 'g-', label='RL gain')
ax6.set_title(r'$Kp_{V_y}$')
ax6.set_xlabel('Time [s]')
ax6.set_xlim([0, 12])
ax6.grid()
plt.tight_layout()
plt.show()
return 0
if __name__ == '__main__':
main()