ShangtongZhang
diff --git a/‎.gitignore
Lines changed: 10 additions & 12 deletions b/‎.gitignore
Lines changed: 10 additions & 12 deletions
diff --git a/‎deep_rl/__init__.py
Lines changed: 0 additions & 1 deletion b/‎deep_rl/__init__.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎deep_rl/agent/DDPG_agent.py
Lines changed: 13 additions & 10 deletions b/‎deep_rl/agent/DDPG_agent.py
Lines changed: 13 additions & 10 deletions
diff --git a/‎deep_rl/component/__init__.py
Lines changed: 3 additions & 1 deletion b/‎deep_rl/component/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎deep_rl/model/__init__.py
Lines changed: 0 additions & 2 deletions b/‎deep_rl/model/__init__.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎deep_rl/model/action_conditional_video_prediction.py
Lines changed: 0 additions & 193 deletions b/‎deep_rl/model/action_conditional_video_prediction.py
Lines changed: 0 additions & 193 deletions
diff --git a/‎deep_rl/model/dataset.py
Lines changed: 0 additions & 81 deletions b/‎deep_rl/model/dataset.py
Lines changed: 0 additions & 81 deletions
@@ -3,18 +3,6 @@ __pycache__/
 *.py[cod]
 *$py.class
 
-.idea
-exp_*
-upload.py
-*.sh
-data
-dataset
-draw_*
-log
-old_logs
-figure
-images_data
-
 # C extensions
 *.so
 
@@ -99,3 +87,13 @@ ENV/
 
 # Rope project settings
 .ropeproject
+
+.idea
+data
+dataset
+log
+old_logs
+figure
+images_data
+mjkey.txt
+.DS_Store
@@ -1,5 +1,4 @@
 from .agent import *
 from .component import *
-from .model import *
 from .network import *
 from .utils import *
@@ -32,41 +32,44 @@ def soft_update(self, target, src):
 
     def eval_step(self, state):
         self.config.state_normalizer.set_read_only()
-        state = np.stack([self.config.state_normalizer(state)])
+        state = self.config.state_normalizer(state)
         action = self.network(state)
         self.config.state_normalizer.unset_read_only()
-        return to_np(action).flatten()
+        return to_np(action)
 
     def step(self):
         config = self.config
         if self.state is None:
             self.random_process.reset_states()
             self.state = self.task.reset()
             self.state = config.state_normalizer(self.state)
-        action = self.network(np.stack([self.state]))
-        action = to_np(action).flatten()
+        action = self.network(self.state)
+        action = to_np(action)
         action += self.random_process.sample()
-        next_state, reward, done, info = self.task.step(action)
+        next_state, reward, done, _ = self.task.step(action)
         next_state = self.config.state_normalizer(next_state)
         self.episode_reward += reward
         reward = self.config.reward_normalizer(reward)
-        self.replay.feed([self.state, action, reward, next_state, int(done)])
-        if done:
-            next_state = None
+        self.replay.feed([self.state, action, reward, next_state, np.asarray(done, dtype=np.uint8)])
+        if done[0]:
             self.episode_rewards.append(self.episode_reward)
             self.episode_reward = 0
+            self.random_process.reset_states()
         self.state = next_state
         self.total_steps += 1
 
         if self.replay.size() >= config.min_memory_size:
             experiences = self.replay.sample()
             states, actions, rewards, next_states, terminals = experiences
+            states = states.squeeze(1)
+            actions = actions.squeeze(1)
+            rewards = tensor(rewards)
+            next_states = next_states.squeeze(1)
+            terminals = tensor(terminals)
 
             phi_next = self.target_network.feature(next_states)
             a_next = self.target_network.actor(phi_next)
             q_next = self.target_network.critic(phi_next, a_next)
-            terminals = tensor(terminals).unsqueeze(1)
-            rewards = tensor(rewards).unsqueeze(1)
             q_next = config.discount * q_next * (1 - terminals)
             q_next.add_(rewards)
             q_next = q_next.detach()
 
@@ -2,4 +2,6 @@
 from .replay import *
 from .task import *
 from .random_process import *
-from .bench import *
+from .bench import *
+
+from .envs import VecTask