Skip to content

Commit 9153e61

Browse files
Remove some algorithms
1 parent fcc132d commit 9153e61

File tree

9 files changed

+44
-423
lines changed

9 files changed

+44
-423
lines changed

.gitignore

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,6 @@ __pycache__/
33
*.py[cod]
44
*$py.class
55

6-
.idea
7-
exp_*
8-
upload.py
9-
*.sh
10-
data
11-
dataset
12-
draw_*
13-
log
14-
old_logs
15-
figure
16-
images_data
17-
186
# C extensions
197
*.so
208

@@ -99,3 +87,13 @@ ENV/
9987

10088
# Rope project settings
10189
.ropeproject
90+
91+
.idea
92+
data
93+
dataset
94+
log
95+
old_logs
96+
figure
97+
images_data
98+
mjkey.txt
99+
.DS_Store

deep_rl/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from .agent import *
22
from .component import *
3-
from .model import *
43
from .network import *
54
from .utils import *

deep_rl/agent/DDPG_agent.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,41 +32,44 @@ def soft_update(self, target, src):
3232

3333
def eval_step(self, state):
3434
self.config.state_normalizer.set_read_only()
35-
state = np.stack([self.config.state_normalizer(state)])
35+
state = self.config.state_normalizer(state)
3636
action = self.network(state)
3737
self.config.state_normalizer.unset_read_only()
38-
return to_np(action).flatten()
38+
return to_np(action)
3939

4040
def step(self):
4141
config = self.config
4242
if self.state is None:
4343
self.random_process.reset_states()
4444
self.state = self.task.reset()
4545
self.state = config.state_normalizer(self.state)
46-
action = self.network(np.stack([self.state]))
47-
action = to_np(action).flatten()
46+
action = self.network(self.state)
47+
action = to_np(action)
4848
action += self.random_process.sample()
49-
next_state, reward, done, info = self.task.step(action)
49+
next_state, reward, done, _ = self.task.step(action)
5050
next_state = self.config.state_normalizer(next_state)
5151
self.episode_reward += reward
5252
reward = self.config.reward_normalizer(reward)
53-
self.replay.feed([self.state, action, reward, next_state, int(done)])
54-
if done:
55-
next_state = None
53+
self.replay.feed([self.state, action, reward, next_state, np.asarray(done, dtype=np.uint8)])
54+
if done[0]:
5655
self.episode_rewards.append(self.episode_reward)
5756
self.episode_reward = 0
57+
self.random_process.reset_states()
5858
self.state = next_state
5959
self.total_steps += 1
6060

6161
if self.replay.size() >= config.min_memory_size:
6262
experiences = self.replay.sample()
6363
states, actions, rewards, next_states, terminals = experiences
64+
states = states.squeeze(1)
65+
actions = actions.squeeze(1)
66+
rewards = tensor(rewards)
67+
next_states = next_states.squeeze(1)
68+
terminals = tensor(terminals)
6469

6570
phi_next = self.target_network.feature(next_states)
6671
a_next = self.target_network.actor(phi_next)
6772
q_next = self.target_network.critic(phi_next, a_next)
68-
terminals = tensor(terminals).unsqueeze(1)
69-
rewards = tensor(rewards).unsqueeze(1)
7073
q_next = config.discount * q_next * (1 - terminals)
7174
q_next.add_(rewards)
7275
q_next = q_next.detach()

deep_rl/component/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22
from .replay import *
33
from .task import *
44
from .random_process import *
5-
from .bench import *
5+
from .bench import *
6+
7+
from .envs import VecTask

deep_rl/model/__init__.py

Lines changed: 0 additions & 2 deletions
This file was deleted.

deep_rl/model/action_conditional_video_prediction.py

Lines changed: 0 additions & 193 deletions
This file was deleted.

deep_rl/model/dataset.py

Lines changed: 0 additions & 81 deletions
This file was deleted.

0 commit comments

Comments
 (0)