Rafael1s
diff --git a/‎MountainCarContinuous_PPO/MountainCarContinuous_PPO_VecEnv-16proc_21epis.ipynb
Lines changed: 405 additions & 0 deletions b/‎MountainCarContinuous_PPO/MountainCarContinuous_PPO_VecEnv-16proc_21epis.ipynb
Lines changed: 405 additions & 0 deletions
diff --git a/‎MountainCarContinuous_PPO/distributions.py
Lines changed: 66 additions & 0 deletions b/‎MountainCarContinuous_PPO/distributions.py
Lines changed: 66 additions & 0 deletions
diff --git a/‎MountainCarContinuous_PPO/dummy_vec_env.py
Lines changed: 31 additions & 0 deletions b/‎MountainCarContinuous_PPO/dummy_vec_env.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎MountainCarContinuous_PPO/envs.py
Lines changed: 90 additions & 0 deletions b/‎MountainCarContinuous_PPO/envs.py
Lines changed: 90 additions & 0 deletions
diff --git a/‎MountainCarContinuous_PPO/images/4_diagrams.png
45.9 KB b/‎MountainCarContinuous_PPO/images/4_diagrams.png
45.9 KB
diff --git a/‎MountainCarContinuous_PPO/images/plot_MountainCarCont_16proc_21epis_score152.png
15.9 KB b/‎MountainCarContinuous_PPO/images/plot_MountainCarCont_16proc_21epis_score152.png
15.9 KB
diff --git a/‎MountainCarContinuous_PPO/init_vec_env.py
Lines changed: 100 additions & 0 deletions b/‎MountainCarContinuous_PPO/init_vec_env.py
Lines changed: 100 additions & 0 deletions
@@ -0,0 +1,66 @@
+import torch
+import torch.nn as nn
+
+from utils import AddBias, init, init_normc_
+
+"""
+Modify standard PyTorch distributions so they are compatible with this code.
+"""
+
+FixedCategorical = torch.distributions.Categorical
+
+old_sample = FixedCategorical.sample
+FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
+
+log_prob_cat = FixedCategorical.log_prob
+FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).unsqueeze(-1)
+
+FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
+
+FixedNormal = torch.distributions.Normal
+log_prob_normal = FixedNormal.log_prob
+FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(-1, keepdim=True)
+
+entropy = FixedNormal.entropy
+FixedNormal.entropy = lambda self: entropy(self).sum(-1)
+
+FixedNormal.mode = lambda self: self.mean
+
+
+class Categorical(nn.Module):
+    def __init__(self, num_inputs, num_outputs):
+        super(Categorical, self).__init__()
+
+        init_ = lambda m: init(m,
+              nn.init.orthogonal_,
+              lambda x: nn.init.constant_(x, 0),
+              gain=0.01)
+
+        self.linear = init_(nn.Linear(num_inputs, num_outputs))
+
+    def forward(self, x):
+        x = self.linear(x)
+        return FixedCategorical(logits=x)
+
+
+class DiagGaussian(nn.Module):
+    def __init__(self, num_inputs, num_outputs):
+        super(DiagGaussian, self).__init__()
+
+        init_ = lambda m: init(m,
+              init_normc_,
+              lambda x: nn.init.constant_(x, 0))
+
+        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
+        self.logstd = AddBias(torch.zeros(num_outputs))
+
+    def forward(self, x):
+        action_mean = self.fc_mean(x)
+
+        #  An ugly hack for my KFAC implementation.
+        zeros = torch.zeros(action_mean.size())
+        if x.is_cuda:
+            zeros = zeros.cuda()
+
+        action_logstd = self.logstd(zeros)
+        return FixedNormal(action_mean, action_logstd.exp())
@@ -0,0 +1,31 @@
+import numpy as np
+from init_vec_env import VecEnv
+
+class DummyVecEnv(VecEnv):
+    def __init__(self, env_fns):
+        self.envs = [fn() for fn in env_fns]
+        env = self.envs[0]        
+        VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
+        self.ts = np.zeros(len(self.envs), dtype='int')        
+        self.actions = None
+
+    def step_async(self, actions):
+        self.actions = actions
+
+    def step_wait(self):
+        results = [env.step(a) for (a,env) in zip(self.actions, self.envs)]
+        obs, rews, dones, infos = map(np.array, zip(*results))
+        self.ts += 1
+        for (i, done) in enumerate(dones):
+            if done: 
+                obs[i] = self.envs[i].reset()
+                self.ts[i] = 0
+        self.actions = None
+        return np.array(obs), np.array(rews), np.array(dones), infos
+
+    def reset(self):        
+        results = [env.reset() for env in self.envs]
+        return np.array(results)
+
+    def close(self):
+        return
@@ -0,0 +1,90 @@
+import gym
+import numpy as np
+import torch
+from gym.spaces.box import Box
+
+from init_vec_env import VecEnvWrapper
+from dummy_vec_env import DummyVecEnv
+from subproc_vec_env import SubprocVecEnv
+
+class AddTimestep(gym.ObservationWrapper):
+    def __init__(self, env=None):
+        super(AddTimestep, self).__init__(env)
+        self.observation_space = Box(
+            self.observation_space.low[0],
+            self.observation_space.high[0],
+            [self.observation_space.shape[0] + 1],
+            dtype=self.observation_space.dtype)
+
+    def observation(self, observation):
+        return np.concatenate((observation, [self.env._elapsed_steps]))
+    
+class VecPyTorch(VecEnvWrapper):
+    def __init__(self, venv, device):
+        """Return only every `skip`-th frame"""
+        super(VecPyTorch, self).__init__(venv)
+        self.device = device
+        # TODO: Fix data types
+
+    def reset(self):
+        obs = self.venv.reset()
+        obs = torch.from_numpy(obs).float().to(self.device)
+        return obs
+
+    def step_async(self, actions):
+        actions = actions.squeeze(1).cpu().numpy()
+        self.venv.step_async(actions)
+
+    def step_wait(self):
+        obs, reward, done, info = self.venv.step_wait()
+        obs = torch.from_numpy(obs).float().to(self.device)
+        reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float()
+        return obs, reward, done, info
+    
+
+def make_env(env_id, seed, rank, log_dir, add_timestep, allow_early_resets):
+    def _thunk():
+        env = gym.make(env_id)
+        env.seed(seed + rank)
+
+        obs_shape = env.observation_space.shape
+
+        if add_timestep and len(
+                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
+            env = AddTimestep(env)
+
+        #if log_dir is not None:
+        #    env = bench.Monitor(env, os.path.join(log_dir, str(rank)),
+        #                        allow_early_resets=allow_early_resets)
+
+        # If the input has shape (W,H,3), wrap for PyTorch convolutions
+        '''
+        obs_shape = env.observation_space.shape
+        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
+            env = TransposeImage(env)
+        '''
+        
+        return env
+
+    return _thunk
+
+def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep, device, allow_early_resets):
+    envs = [make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets) for i in range(num_processes)]
+
+    if len(envs) > 1:
+        envs = SubprocVecEnv(envs)
+    else:
+        envs = DummyVecEnv(envs)
+
+    envs = VecPyTorch(envs, device)
+
+    '''
+    if len(envs.observation_space.shape) == 3:
+        print('Creating frame stacking wrapper')
+        envs = VecPyTorchFrameStack(envs, 4, device)
+        #print(envs.observation_space)    '''
+     
+    return envs
+
+
+
@@ -0,0 +1,100 @@
+from abc import ABC, abstractmethod
+
+class VecEnv(ABC):
+
+    def __init__(self, num_envs, observation_space, action_space):
+        self.num_envs = num_envs
+        self.observation_space = observation_space
+        self.action_space = action_space
+
+    """
+    An abstract asynchronous, vectorized environment.
+    """
+    @abstractmethod
+    def reset(self):
+        """
+        Reset all the environments and return an array of
+        observations.
+        If step_async is still doing work, that work will
+        be cancelled and step_wait() should not be called
+        until step_async() is invoked again.
+        """
+        pass
+
+    @abstractmethod
+    def step_async(self, actions):
+        """
+        Tell all the environments to start taking a step
+        with the given actions.
+        Call step_wait() to get the results of the step.
+        You should not call this if a step_async run is
+        already pending.
+        """
+        pass
+
+    @abstractmethod
+    def step_wait(self):
+        """
+        Wait for the step taken with step_async().
+        Returns (obs, rews, dones, infos):
+         - obs: an array of observations
+         - rews: an array of rewards
+         - dones: an array of "episode done" booleans
+         - infos: an array of info objects
+        """
+        pass
+
+    @abstractmethod
+    def close(self):
+        """
+        Clean up the environments' resources.
+        """
+        pass
+
+    def step(self, actions):
+        self.step_async(actions)
+        return self.step_wait()
+
+    def render(self):
+        #logger.warn('Render not defined for %s'%self)
+        pass
+
+class VecEnvWrapper(VecEnv):
+    def __init__(self, venv, observation_space=None, action_space=None):
+        self.venv = venv
+        VecEnv.__init__(self,
+            num_envs=venv.num_envs,
+            observation_space=observation_space or venv.observation_space,
+            action_space=action_space or venv.action_space)
+
+    def step_async(self, actions):
+        self.venv.step_async(actions)
+
+    @abstractmethod
+    def reset(self):
+        pass
+
+    @abstractmethod
+    def step_wait(self):
+        pass
+
+    def close(self):
+        return self.venv.close()
+
+    def render(self):
+        self.venv.render()
+
+class CloudpickleWrapper(object):
+    """
+    Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
+    """
+    def __init__(self, x):
+        self.x = x
+    def __getstate__(self):
+        import cloudpickle
+        return cloudpickle.dumps(self.x)
+    def __setstate__(self, ob):
+        import pickle
+        self.x = pickle.loads(ob)
+
+