greydanus
diff --git a/‎.gitignore
+26 b/‎.gitignore
+26
diff --git a/‎README.md
+51 b/‎README.md
+51
diff --git a/‎__init__.py
+5 b/‎__init__.py
+5
diff --git a/‎jacobian-vs-perturbation.ipynb
+246 b/‎jacobian-vs-perturbation.ipynb
+246
diff --git a/‎make_movie.py
+78 b/‎make_movie.py
+78
diff --git a/‎overfit_atari.py
+49 b/‎overfit_atari.py
+49
diff --git a/‎policy.py
+42 b/‎policy.py
+42
diff --git a/‎rollout.py
+43 b/‎rollout.py
+43
diff --git a/‎saliency.py
+76 b/‎saliency.py
+76
@@ -0,0 +1,26 @@
+##### FILE TYPES #####
+
+*.tar
+*.pt
+*.DS_Store
+*.pyc
+*.mp4
+*.zip
+
+##### DIRECTORIES #####
+
+./pong-v0
+./breakout-v0
+./spaceinvaders-v0
+
+./overfit-pong-v0
+./overfit-breakout-v0
+./overfit-spaceinvaders-v0
+
+__pycache__
+.ipynb_checkpoints/*
+
+
+##### NAMES #####
+
+*ubyte
@@ -0,0 +1,51 @@
+Visualizing and Understanding Atari Agents
+=======
+Sam Greydanus. October 2017. MIT License.
+
+Oregon State University College of Engineering. [Explainable AI Project](http://twitter.com/DARPA/status/872547502616182785). Supported by DARPA.
+
+_Written in PyTorch_
+
+Strong agents
+--------
+
+![breakout-tunneling.gif](static/breakout-tunneling.gif)
+![pong-killshot.gif](static/pong-killshot.gif)
+![spaceinv-aiming.gif](static/spaceinv-aiming.gif)
+
+Overfit agents
+--------
+ * WITHOUT saliency:
+ 	* overfit agent: https://youtu.be/TgTpF-EXPwc
+ 	* control agent: https://youtu.be/i3Br2PzE49I
+ * WITH saliency:
+ 	* overfit agent: https://youtu.be/eeXLUI73RTo
+ 	* control agent: https://youtu.be/xXGC6CQW97E
+
+Learning
+--------
+![breakout-tunneling.gif](static/breakout-learning-2000.gif)
+
+About
+--------
+Code for results in the paper [Visualizing and Understanding Atari Agents](https://arxiv.org/).
+
+To do a quick comparison of Jacobian vs. Ours, check out [this Jupyter notebook](https://nbviewer.jupyter.org/github/greydanus/visualize_atari/blob/master/jacobian-vs-perturbation.ipynb)
+
+**Abstract.** Deep reinforcement learning (deep RL) agents have achieved remarkable success in a broad range of game-playing and continuous control tasks. While these agents are effective at maximizing rewards, it is often unclear what strategies they use to do so. In this paper, we take a step toward explaining deep RL agents through a case study in three Atari 2600 environments. In particular, we focus on understanding agents in terms of their visual attentional patterns during decision making. To this end, we introduce a method for generating rich saliency maps and use it to explain 1) what strong agents attend to 2) whether agents are making decisions for the right or wrong reasons, and 3) how agents evolve during the learning phase. We also test our method on non-expert human subjects and find that it improves their ability to reason about these agents. Our techniques are general and, though we focus on Atari, our long-term objective is to produce tools that explain any deep RL policy.
+
+Pretrained models
+--------
+Trained models were obtained using [this repo](https://github.com/greydanus/baby-a3c) (default hyperparameters).
+ 1. Download from [https://goo.gl/fqwJDB](https://goo.gl/fqwJDB)
+ 2. Unzip the file in this directory
+
+Dependencies
+--------
+All code is written in Python 3.6. You will need:
+
+ * NumPy
+ * SciPy
+ * Matplotlib
+ * [PyTorch 0.2](http://pytorch.org/): easier to write and debug than TensorFlow :)
+ * [Jupyter](https://jupyter.org/)
@@ -0,0 +1,5 @@
+from .saliency import *
+from .rollout import *
+from .make_movie import *
+from .policy import *
+from .overfit_atari import *
@@ -0,0 +1,78 @@
+# Visualizing and Understanding Atari Agents | Sam Greydanus | 2017 | MIT License
+
+from __future__ import print_function
+import warnings ; warnings.filterwarnings('ignore') # mute warnings, live dangerously
+
+import matplotlib.pyplot as plt
+import matplotlib as mpl ; mpl.use("Agg")
+import matplotlib.animation as manimation
+
+import gym, os, sys, time, argparse
+
+sys.path.append('..')
+from visualize_atari import *
+
+def make_movie(env_name, checkpoint='*.tar', num_frames=20, first_frame=0, resolution=75, \
+                save_dir='./movies/', density=5, radius=5, prefix='default', overfit_mode=False):
+    
+    # set up dir variables and environment
+    load_dir = '{}{}/'.format('overfit-' if overfit_mode else '', env_name.lower())
+    meta = get_env_meta(env_name)
+    env = gym.make(env_name) if not overfit_mode else OverfitAtari(env_name, load_dir+'expert/', seed=0) # make a seeded env
+    
+    # set up agent
+    model = NNPolicy(channels=1, num_actions=env.action_space.n)
+    model.try_load(load_dir, checkpoint=checkpoint)
+
+    # get a rollout of the policy
+    movie_title = "{}-{}-{}.mp4".format(prefix, num_frames, env_name.lower())
+    print('\tmaking movie "{}" using checkpoint at {}{}'.format(movie_title, load_dir, checkpoint))
+    max_ep_len = first_frame + num_frames + 1
+    torch.manual_seed(0)
+    history = rollout(model, env, max_ep_len=max_ep_len)
+    print()
+
+    # make the movie!
+    start = time.time()
+    FFMpegWriter = manimation.writers['ffmpeg']
+    metadata = dict(title=movie_title, artist='greydanus', comment='atari-saliency-video')
+    writer = FFMpegWriter(fps=8, metadata=metadata)
+
+    prog = '' ; total_frames = len(history['ins'])
+    f = plt.figure(figsize=[6, 6*1.3], dpi=resolution)
+    with writer.saving(f, save_dir + movie_title, resolution):
+        for i in range(num_frames):
+            ix = first_frame+i
+            if ix < total_frames: # prevent loop from trying to process a frame ix greater than rollout length
+                frame = history['ins'][ix].squeeze().copy()
+                actor_saliency = score_frame(model, history, ix, radius, density, interp_func=occlude, mode='actor')
+                critic_saliency = score_frame(model, history, ix, radius, density, interp_func=occlude, mode='critic')
+            
+                frame = saliency_on_atari_frame(actor_saliency, frame, fudge_factor=meta['actor_ff'], channel=2)
+                frame = saliency_on_atari_frame(critic_saliency, frame, fudge_factor=meta['critic_ff'], channel=0)
+
+                plt.imshow(frame) ; plt.title(env_name.lower(), fontsize=15)
+                writer.grab_frame() ; f.clear()
+                
+                tstr = time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start))
+                print('\ttime: {} | progress: {:.1f}%'.format(tstr, 100*i/min(num_frames, total_frames)), end='\r')
+    print('\nfinished.')
+
+# user might also want to access make_movie function from some other script
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description=None)
+    parser.add_argument('-e', '--env', default='Breakout-v0', type=str, help='gym environment')
+    parser.add_argument('-d', '--density', default=5, type=int, help='density of grid of gaussian blurs')
+    parser.add_argument('-r', '--radius', default=5, type=int, help='radius of gaussian blur')
+    parser.add_argument('-f', '--num_frames', default=20, type=int, help='number of frames in movie')
+    parser.add_argument('-i', '--first_frame', default=150, type=int, help='index of first frame')
+    parser.add_argument('-dpi', '--resolution', default=75, type=int, help='resolution (dpi)')
+    parser.add_argument('-s', '--save_dir', default='./movies/', type=str, help='dir to save agent logs and checkpoints')
+    parser.add_argument('-p', '--prefix', default='default', type=str, help='prefix to help make video name unique')
+    parser.add_argument('-c', '--checkpoint', default='*.tar', type=str, help='checkpoint name (in case there is more than one')
+    parser.add_argument('-o', '--overfit_mode', default=False, type=bool, help='analyze an overfit environment (see paper)')
+    args = parser.parse_args()
+
+    make_movie(args.env, args.checkpoint, args.num_frames, args.first_frame, args.resolution,
+        args.save_dir, args.density, args.radius, args.prefix, args.overfit_mode)
@@ -0,0 +1,49 @@
+# Visualizing and Understanding Atari Agents | Sam Greydanus | 2017 | MIT License
+
+from __future__ import print_function
+import warnings ; warnings.filterwarnings('ignore') # mute warnings, live dangerously ;)
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+
+import gym, sys
+import numpy as np
+from scipy.misc import imresize # preserves single-pixel info _unlike_ img = img[::2,::2]
+
+sys.path.append('..')
+from visualize_atari import *
+
+prepro = lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.
+
+class OverfitAtari():
+    def __init__(self, env_name, expert_dir, seed=0):
+        self.atari = gym.make(env_name) ; self.atari.seed(seed)
+        self.action_space = self.atari.action_space
+        self.expert = NNPolicy(channels=1, num_actions=self.action_space.n)
+        self.expert.try_load(expert_dir)
+        self.cx = Variable(torch.zeros(1, 256)) # lstm memory vector
+        self.hx = Variable(torch.zeros(1, 256)) # lstm activation vector
+        
+    def seed(self, s):
+        self.atari.seed(s) ; torch.manual_seed(s)
+
+    def reset(self):
+        self.cx = Variable(torch.zeros(1, 256))
+        self.hx = Variable(torch.zeros(1, 256))
+        return self.atari.reset()
+
+    def step(self, action):
+        state, reward, done, info = self.atari.step(action)
+        
+        expert_state = torch.Tensor(prepro(state)) # get expert policy and incorporate it into environment
+        _, logit, (hx, cx) = self.expert((Variable(expert_state.view(1,1,80,80)), (self.hx, self.cx)))
+        self.hx, self.cx = Variable(hx.data), Variable(cx.data)
+        
+        expert_action = int(F.softmax(logit).data.max(1)[1][0,0])
+        target = torch.zeros(logit.size()) ; target[0,expert_action] = 1
+        j = 72 ; k = 5
+        expert_action = expert_action if False else np.random.randint(self.atari.action_space.n)
+        for i in range(self.atari.action_space.n):
+            state[37:41, j + k*i: j+1+k*i,:] = 250 if expert_action == i else 50
+        return state, reward, done, target
@@ -0,0 +1,42 @@
+# Visualizing and Understanding Atari Agents | Sam Greydanus | 2017 | MIT License
+
+from __future__ import print_function
+import warnings ; warnings.filterwarnings('ignore') # mute warnings, live dangerously ;)
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+import torch.nn as nn
+
+import glob
+import numpy as np
+from scipy.misc import imresize # preserves single-pixel info _unlike_ img = img[::2,::2]
+
+class NNPolicy(torch.nn.Module): # an actor-critic neural network
+    def __init__(self, channels, num_actions):
+        super(NNPolicy, self).__init__()
+        self.conv1 = nn.Conv2d(channels, 32, 3, stride=2, padding=1)
+        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
+        self.lstm = nn.LSTMCell(32 * 5 * 5, 256)
+        self.critic_linear, self.actor_linear = nn.Linear(256, 1), nn.Linear(256, num_actions)
+
+    def forward(self, inputs):
+        inputs, (hx, cx) = inputs
+        x = F.elu(self.conv1(inputs))
+        x = F.elu(self.conv2(x))
+        x = F.elu(self.conv3(x))
+        x = F.elu(self.conv4(x))
+        x = x.view(-1, 32 * 5 * 5)
+        hx, cx = self.lstm(x, (hx, cx))
+        return self.critic_linear(hx), self.actor_linear(hx), (hx, cx)
+
+    def try_load(self, save_dir, checkpoint='*.tar'):
+        paths = glob.glob(save_dir + checkpoint) ; step = 0
+        if len(paths) > 0:
+            ckpts = [int(s.split('.')[-2]) for s in paths]
+            ix = np.argmax(ckpts) ; step = ckpts[ix]
+            self.load_state_dict(torch.load(paths[ix]))
+        print("\tno saved models") if step is 0 else print("\tloaded model: {}".format(paths[ix]))
+        return step
@@ -0,0 +1,43 @@
+# Visualizing and Understanding Atari Agents | Sam Greydanus | 2017 | MIT License
+
+from __future__ import print_function
+import warnings ; warnings.filterwarnings('ignore') # mute warnings, live dangerously ;)
+
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+import torch.nn.functional as F
+
+import numpy as np
+from scipy.misc import imresize # preserves single-pixel info _unlike_ img = img[::2,::2]
+
+prepro = lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.
+
+def rollout(model, env, max_ep_len=3e3, render=False):
+    history = {'ins': [], 'logits': [], 'values': [], 'outs': [], 'hx': [], 'cx': []}
+    
+    state = torch.Tensor(prepro(env.reset())) # get first state
+    episode_length, epr, eploss, done  = 0, 0, 0, False # bookkeeping
+    hx, cx = Variable(torch.zeros(1, 256)), Variable(torch.zeros(1, 256))
+
+    while not done and episode_length <= max_ep_len:
+        episode_length += 1
+        value, logit, (hx, cx) = model((Variable(state.view(1,1,80,80)), (hx, cx)))
+        hx, cx = Variable(hx.data), Variable(cx.data)
+        prob = F.softmax(logit)
+
+        action = prob.max(1)[1].data # prob.multinomial().data[0] # 
+        obs, reward, done, expert_policy = env.step(action.numpy()[0])
+        if render: env.render()
+        state = torch.Tensor(prepro(obs)) ; epr += reward
+
+        # save info!
+        history['ins'].append(obs)
+        history['hx'].append(hx.squeeze(0).data.numpy())
+        history['cx'].append(cx.squeeze(0).data.numpy())
+        history['logits'].append(logit.data.numpy()[0])
+        history['values'].append(value.data.numpy()[0])
+        history['outs'].append(prob.data.numpy()[0])
+        print('\tstep # {}, reward {:.0f}'.format(episode_length, epr), end='\r')
+
+    return history
@@ -0,0 +1,76 @@
+# Visualizing and Understanding Atari Agents | Sam Greydanus | 2017 | MIT License
+
+from __future__ import print_function
+import warnings ; warnings.filterwarnings('ignore') # mute warnings, live dangerously ;)
+
+import torch
+from torch.autograd import Variable
+import torch.nn.functional as F
+
+import numpy as np
+from scipy.ndimage.filters import gaussian_filter
+from scipy.misc import imresize # preserves single-pixel info _unlike_ img = img[::2,::2]
+
+prepro = lambda img: imresize(img[35:195].mean(2), (80,80)).astype(np.float32).reshape(1,80,80)/255.
+searchlight = lambda I, mask: I*mask + gaussian_filter(I, sigma=3)*(1-mask) # choose an area NOT to blur
+occlude = lambda I, mask: I*(1-mask) + gaussian_filter(I, sigma=3)*mask # choose an area to blur
+
+def get_mask(center, size, r):
+    y,x = np.ogrid[-center[0]:size[0]-center[0], -center[1]:size[1]-center[1]]
+    keep = x*x + y*y <= 1
+    mask = np.zeros(size) ; mask[keep] = 1 # select a circle of pixels
+    mask = gaussian_filter(mask, sigma=r) # blur the circle of pixels. this is a 2D Gaussian for r=r^2=1
+    return mask/mask.max()
+
+def run_through_model(model, history, ix, interp_func=None, mask=None, blur_memory=None, mode='actor'):
+    if mask is None:
+        im = prepro(history['ins'][ix])
+    else:
+        assert(interp_func is not None, "interp func cannot be none")
+        im = interp_func(prepro(history['ins'][ix]).squeeze(), mask).reshape(1,80,80) # perturb input I -> I'
+    tens_state = torch.Tensor(im)
+    state = Variable(tens_state.unsqueeze(0), volatile=True)
+    hx = Variable(torch.Tensor(history['hx'][ix-1]).view(1,-1))
+    cx = Variable(torch.Tensor(history['cx'][ix-1]).view(1,-1))
+    if blur_memory is not None: cx.mul_(1-blur_memory) # perturb memory vector
+    return model((state, (hx, cx)))[0] if mode == 'critic' else model((state, (hx, cx)))[1]
+
+def score_frame(model, history, ix, r, d, interp_func, mode='actor'):
+    # r: radius of blur
+    # d: density of scores (if d==1, then get a score for every pixel...
+    #    if d==2 then every other, which is 25% of total pixels for a 2D image)
+    assert mode in ['actor', 'critic'], 'mode must be either "actor" or "critic"'
+    L = run_through_model(model, history, ix, interp_func, mask=None, mode=mode)
+    scores = np.zeros((int(80/d)+1,int(80/d)+1)) # saliency scores S(t,i,j)
+    for i in range(0,80,d):
+        for j in range(0,80,d):
+            mask = get_mask(center=[i,j], size=[80,80], r=r)
+            l = run_through_model(model, history, ix, interp_func, mask=mask, mode=mode)
+            scores[int(i/d),int(j/d)] = (L-l).pow(2).sum().mul_(.5).data[0]
+    pmax = scores.max()
+    scores = imresize(scores, size=[80,80], interp='bilinear').astype(np.float32)
+    return pmax * scores / scores.max()
+
+def saliency_on_atari_frame(saliency, atari, fudge_factor, channel=2, sigma=0):
+    # sometimes saliency maps are a bit clearer if you blur them
+    # slightly...sigma adjusts the radius of that blur
+    pmax = saliency.max()
+    S = imresize(saliency, size=[160,160], interp='bilinear').astype(np.float32)
+    S = S if sigma == 0 else gaussian_filter(S, sigma=sigma)
+    S -= S.min() ; S = fudge_factor*pmax * S / S.max()
+    I = atari.astype('uint16')
+    I[35:195,:,channel] += S.astype('uint16')
+    I = I.clip(1,255).astype('uint8')
+    return I
+
+def get_env_meta(env_name):
+    meta = {}
+    if env_name=="Pong-v0":
+        meta['critic_ff'] = 600 ; meta['actor_ff'] = 500
+    elif env_name=="Breakout-v0":
+        meta['critic_ff'] = 600 ; meta['actor_ff'] = 300
+    elif env_name=="SpaceInvaders-v0":
+        meta['critic_ff'] = 400 ; meta['actor_ff'] = 400
+    else:
+        print('environment "{}" not supported'.format(env_name))
+    return meta