Skip to content

Commit 7b807d9

Browse files
committed
Add project MountainCarContinuous_PPO with 16 processes
1 parent 3514d83 commit 7b807d9

14 files changed

+1372
-0
lines changed

MountainCarContinuous_PPO/MountainCarContinuous_PPO_VecEnv-16proc_21epis.ipynb

Lines changed: 405 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import torch
2+
import torch.nn as nn
3+
4+
from utils import AddBias, init, init_normc_
5+
6+
"""
7+
Modify standard PyTorch distributions so they are compatible with this code.
8+
"""
9+
10+
FixedCategorical = torch.distributions.Categorical
11+
12+
old_sample = FixedCategorical.sample
13+
FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
14+
15+
log_prob_cat = FixedCategorical.log_prob
16+
FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).unsqueeze(-1)
17+
18+
FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
19+
20+
FixedNormal = torch.distributions.Normal
21+
log_prob_normal = FixedNormal.log_prob
22+
FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(-1, keepdim=True)
23+
24+
entropy = FixedNormal.entropy
25+
FixedNormal.entropy = lambda self: entropy(self).sum(-1)
26+
27+
FixedNormal.mode = lambda self: self.mean
28+
29+
30+
class Categorical(nn.Module):
31+
def __init__(self, num_inputs, num_outputs):
32+
super(Categorical, self).__init__()
33+
34+
init_ = lambda m: init(m,
35+
nn.init.orthogonal_,
36+
lambda x: nn.init.constant_(x, 0),
37+
gain=0.01)
38+
39+
self.linear = init_(nn.Linear(num_inputs, num_outputs))
40+
41+
def forward(self, x):
42+
x = self.linear(x)
43+
return FixedCategorical(logits=x)
44+
45+
46+
class DiagGaussian(nn.Module):
47+
def __init__(self, num_inputs, num_outputs):
48+
super(DiagGaussian, self).__init__()
49+
50+
init_ = lambda m: init(m,
51+
init_normc_,
52+
lambda x: nn.init.constant_(x, 0))
53+
54+
self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
55+
self.logstd = AddBias(torch.zeros(num_outputs))
56+
57+
def forward(self, x):
58+
action_mean = self.fc_mean(x)
59+
60+
# An ugly hack for my KFAC implementation.
61+
zeros = torch.zeros(action_mean.size())
62+
if x.is_cuda:
63+
zeros = zeros.cuda()
64+
65+
action_logstd = self.logstd(zeros)
66+
return FixedNormal(action_mean, action_logstd.exp())
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import numpy as np
2+
from init_vec_env import VecEnv
3+
4+
class DummyVecEnv(VecEnv):
5+
def __init__(self, env_fns):
6+
self.envs = [fn() for fn in env_fns]
7+
env = self.envs[0]
8+
VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space)
9+
self.ts = np.zeros(len(self.envs), dtype='int')
10+
self.actions = None
11+
12+
def step_async(self, actions):
13+
self.actions = actions
14+
15+
def step_wait(self):
16+
results = [env.step(a) for (a,env) in zip(self.actions, self.envs)]
17+
obs, rews, dones, infos = map(np.array, zip(*results))
18+
self.ts += 1
19+
for (i, done) in enumerate(dones):
20+
if done:
21+
obs[i] = self.envs[i].reset()
22+
self.ts[i] = 0
23+
self.actions = None
24+
return np.array(obs), np.array(rews), np.array(dones), infos
25+
26+
def reset(self):
27+
results = [env.reset() for env in self.envs]
28+
return np.array(results)
29+
30+
def close(self):
31+
return

MountainCarContinuous_PPO/envs.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import gym
2+
import numpy as np
3+
import torch
4+
from gym.spaces.box import Box
5+
6+
from init_vec_env import VecEnvWrapper
7+
from dummy_vec_env import DummyVecEnv
8+
from subproc_vec_env import SubprocVecEnv
9+
10+
class AddTimestep(gym.ObservationWrapper):
11+
def __init__(self, env=None):
12+
super(AddTimestep, self).__init__(env)
13+
self.observation_space = Box(
14+
self.observation_space.low[0],
15+
self.observation_space.high[0],
16+
[self.observation_space.shape[0] + 1],
17+
dtype=self.observation_space.dtype)
18+
19+
def observation(self, observation):
20+
return np.concatenate((observation, [self.env._elapsed_steps]))
21+
22+
class VecPyTorch(VecEnvWrapper):
23+
def __init__(self, venv, device):
24+
"""Return only every `skip`-th frame"""
25+
super(VecPyTorch, self).__init__(venv)
26+
self.device = device
27+
# TODO: Fix data types
28+
29+
def reset(self):
30+
obs = self.venv.reset()
31+
obs = torch.from_numpy(obs).float().to(self.device)
32+
return obs
33+
34+
def step_async(self, actions):
35+
actions = actions.squeeze(1).cpu().numpy()
36+
self.venv.step_async(actions)
37+
38+
def step_wait(self):
39+
obs, reward, done, info = self.venv.step_wait()
40+
obs = torch.from_numpy(obs).float().to(self.device)
41+
reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float()
42+
return obs, reward, done, info
43+
44+
45+
def make_env(env_id, seed, rank, log_dir, add_timestep, allow_early_resets):
46+
def _thunk():
47+
env = gym.make(env_id)
48+
env.seed(seed + rank)
49+
50+
obs_shape = env.observation_space.shape
51+
52+
if add_timestep and len(
53+
obs_shape) == 1 and str(env).find('TimeLimit') > -1:
54+
env = AddTimestep(env)
55+
56+
#if log_dir is not None:
57+
# env = bench.Monitor(env, os.path.join(log_dir, str(rank)),
58+
# allow_early_resets=allow_early_resets)
59+
60+
# If the input has shape (W,H,3), wrap for PyTorch convolutions
61+
'''
62+
obs_shape = env.observation_space.shape
63+
if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
64+
env = TransposeImage(env)
65+
'''
66+
67+
return env
68+
69+
return _thunk
70+
71+
def make_vec_envs(env_name, seed, num_processes, gamma, log_dir, add_timestep, device, allow_early_resets):
72+
envs = [make_env(env_name, seed, i, log_dir, add_timestep, allow_early_resets) for i in range(num_processes)]
73+
74+
if len(envs) > 1:
75+
envs = SubprocVecEnv(envs)
76+
else:
77+
envs = DummyVecEnv(envs)
78+
79+
envs = VecPyTorch(envs, device)
80+
81+
'''
82+
if len(envs.observation_space.shape) == 3:
83+
print('Creating frame stacking wrapper')
84+
envs = VecPyTorchFrameStack(envs, 4, device)
85+
#print(envs.observation_space) '''
86+
87+
return envs
88+
89+
90+
45.9 KB
Loading
Loading
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
from abc import ABC, abstractmethod
2+
3+
class VecEnv(ABC):
4+
5+
def __init__(self, num_envs, observation_space, action_space):
6+
self.num_envs = num_envs
7+
self.observation_space = observation_space
8+
self.action_space = action_space
9+
10+
"""
11+
An abstract asynchronous, vectorized environment.
12+
"""
13+
@abstractmethod
14+
def reset(self):
15+
"""
16+
Reset all the environments and return an array of
17+
observations.
18+
If step_async is still doing work, that work will
19+
be cancelled and step_wait() should not be called
20+
until step_async() is invoked again.
21+
"""
22+
pass
23+
24+
@abstractmethod
25+
def step_async(self, actions):
26+
"""
27+
Tell all the environments to start taking a step
28+
with the given actions.
29+
Call step_wait() to get the results of the step.
30+
You should not call this if a step_async run is
31+
already pending.
32+
"""
33+
pass
34+
35+
@abstractmethod
36+
def step_wait(self):
37+
"""
38+
Wait for the step taken with step_async().
39+
Returns (obs, rews, dones, infos):
40+
- obs: an array of observations
41+
- rews: an array of rewards
42+
- dones: an array of "episode done" booleans
43+
- infos: an array of info objects
44+
"""
45+
pass
46+
47+
@abstractmethod
48+
def close(self):
49+
"""
50+
Clean up the environments' resources.
51+
"""
52+
pass
53+
54+
def step(self, actions):
55+
self.step_async(actions)
56+
return self.step_wait()
57+
58+
def render(self):
59+
#logger.warn('Render not defined for %s'%self)
60+
pass
61+
62+
class VecEnvWrapper(VecEnv):
63+
def __init__(self, venv, observation_space=None, action_space=None):
64+
self.venv = venv
65+
VecEnv.__init__(self,
66+
num_envs=venv.num_envs,
67+
observation_space=observation_space or venv.observation_space,
68+
action_space=action_space or venv.action_space)
69+
70+
def step_async(self, actions):
71+
self.venv.step_async(actions)
72+
73+
@abstractmethod
74+
def reset(self):
75+
pass
76+
77+
@abstractmethod
78+
def step_wait(self):
79+
pass
80+
81+
def close(self):
82+
return self.venv.close()
83+
84+
def render(self):
85+
self.venv.render()
86+
87+
class CloudpickleWrapper(object):
88+
"""
89+
Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle)
90+
"""
91+
def __init__(self, x):
92+
self.x = x
93+
def __getstate__(self):
94+
import cloudpickle
95+
return cloudpickle.dumps(self.x)
96+
def __setstate__(self, ob):
97+
import pickle
98+
self.x = pickle.loads(ob)
99+
100+

0 commit comments

Comments
 (0)