mit-han-lab
diff --git a/‎.gitmodules
+3 b/‎.gitmodules
+3
diff --git a/‎tsm_fpga/README.md
+34 b/‎tsm_fpga/README.md
+34
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/Makefile
+38 b/‎tsm_fpga/fpga_build/model_tf_split/Makefile
+38
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/calib_input_split.py
+63 b/‎tsm_fpga/fpga_build/model_tf_split/calib_input_split.py
+63
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/compile_split.sh
+28 b/‎tsm_fpga/fpga_build/model_tf_split/compile_split.sh
+28
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/quantize_results/.gitkeep b/‎tsm_fpga/fpga_build/model_tf_split/quantize_results/.gitkeep
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/quantize_split.sh
+42 b/‎tsm_fpga/fpga_build/model_tf_split/quantize_split.sh
+42
diff --git a/‎tsm_fpga/fpga_build/model_tf_split/src/helper.h
+23 b/‎tsm_fpga/fpga_build/model_tf_split/src/helper.h
+23
@@ -0,0 +1,3 @@
+[submodule "tsm_fpga/tf_models"]
+	path = tsm_fpga/tf_models
+	url = https://github.com/JoshNoel/tf_models
@@ -0,0 +1,34 @@
+# TSM Deployed to FPGA
+
+We deploy TSM to FPGA using the Vitis-AI framework. To do so, we generate a tensorflow implementation of the TSM model, and pipeline the network such that all shift operations are isolated. This allows deployment of the Shift operation to CPU and the remaining operations to the Vitis-AI DPU IP.
+
+We must take additional steps to deploy this pipelined model. First, isolating the shift-operations results in a number of seperate DPU kernels for the seperate portions of the network (11 for MobileNetV2 TSM). These kernels must be quantized to int8 and compiled for DPU seperately.
+
+To quantize the split model, we dump intermediate activations from the unsplit implementation at the locations of DPU kernel inputs. These inputs are then used as input to the Vitis-AI quantizer. Once quantized, the resulting splits of the model can be compiled into the final demo executable.
+
+![split-mbv2](https://github.com/mit-han-lab/temporal-shift-module/tree/master/tsm_fpga/images/split_mobilenetv2_bottleneck.png)
+
+## FPGA Setup
+
+To build the FPGA project, ensure you have initialized to tensorflow-slim submodule (git submodule update --init --recursive).
+
+This was tested with the ZCU104 MPSOC DPU TRD in the Vitis-AI repository and the Ultra96V2 Avnet 2020.1 beta branch (https://github.com/Avnet/vitis/tree/2020.1) (See the following guide for additional build instructions https://www.hackster.io/AlbertaBeef/vitis-ai-1-1-flow-for-avnet-vitis-platforms-part-2-f18be4)
+
+### 1) Dump Split TF Models
+The `mobilenet_v2_tfslim.py` is the primary scripts to build the online-TSM model for FPGA. To generate the split model set `SPLIT_MODEL`,`SPLIT_EXPORT`,and EXPORT to True at the top of the files. After running the script, you will see the split model dumped to the `model_tf_split_*` directories.
+
+### 2) Dump Quantization Inputs
+To gather quantization information, one must run the unsplit models. To do so ensure you set to quantize data paths at the TODOs at the top of the files. Then set `SPLIT_MODEL`,`SPLIT_EXPORT`, and EXPORT to False. Then set the corresponding `QUANTIZE_*` flag and `DUMP_QUANTIZE` flag to True to enable quantization.
+
+### 3) Quantize & Compile DPU Kernels
+Once quantization data is generated (see `inputs.pickle` and `quantize_info.txt` under the `model_tf_split_export/*` directories), one can move to the `fpga_build` to quantize and compile each split of the model. 
+
+Update `compile_split.sh` to use the correct target architecture variable. Use the `quantize_split.sh` and `compile_split.sh` files to launch `vai_q_tensorflow` and `vai_c_tensorflow` respectively (from within the docker container).
+
+### 4) Compile demo executable
+Once model quantization is complete, in the `fpga_build/model_tf_split` directory one can run "make `ultra96v2.tsm_online`" or "make `zcu104.tsm_online` to generate the demo executable for a given target from the src files and generated DPU kernels.
+
+## Ultra96V2 Online-TSM Jester Demo
+
+On Ultra96V2 we achieve an inference throughput of 37 FPS with a power consumpstion of 10.6W.
+
@@ -0,0 +1,38 @@
+
+# Disable default rebuild Makefile rule to avoid Makeile.o match
+MAKEFLAGS += -r
+
+CXX ?= aarch64-xilinx-linux-g++
+CFLAGS += -O3 -g -Wall -Wpointer-arith -std=c++14 -ffast-math -mcpu=cortex-a53
+LDFLAGS += -L./ -ln2cube -lpthread -lopencv_core -lopencv_imgproc -lopencv_videoio -lopencv_imgcodecs -lopencv_highgui
+CFLAGS += -fdiagnostics-color=always
+
+SRC = ./src
+COMPILE_RESULTS = compile_results
+BUILD = build
+
+VPATH = $(SRC)
+CPP_FILES = $(wildcard $(SRC)/*.cpp)
+OBJ = $(patsubst $(SRC)/%.cpp, $(BUILD)/%.o, $(CPP_FILES))
+
+TARGETS = zcu104.tsm_online ultra96v2.tsm_online
+.PHONY: all clean copy $(TARGETS)
+
+all : $(TARGETS)
+
+$(TARGETS) : SUBDIR = $(patsubst %.tsm_online,%,$@)
+$(TARGETS) : ELF = $(shell find $(SUBDIR)/$(COMPILE_RESULTS) -name *.elf)
+$(TARGETS) : %.tsm_online : $(OBJ)
+	mkdir -p $(SUBDIR)/$(BUILD)
+	$(CXX) $(CFLAGS) $^ $(ELF) -o $*/tsm_online $(LDFLAGS)
+
+%.copy : tsm_online
+	scp ./tsm_online %(dir %@):~/tsm_online/
+
+
+$(BUILD)/%.o : %.cpp
+	$(CXX) -c $(CFLAGS) $< -o $@
+
+clean : 
+	$(RM) -rf $(BUILD)
+	$(RM) tsm_online
@@ -0,0 +1,63 @@
+import os
+from PIL import Image
+import numpy as np
+import random
+import pickle
+
+IMAGENET_PATH = "/MEng/Data/ILSVRC2012_img_val/"
+MEAN = [0.485, 0.456, 0.406]
+STD = [0.229, 0.224, .225]
+
+CALIB_BASE_PATH=os.getenv("CALIB_BASE_PATH")
+if CALIB_BASE_PATH is None:
+    raise ValueError("Environment variable CALIB_BASE_PATH not set")
+
+CALIB_MODEL_SPLIT=os.getenv("CALIB_MODEL_SPLIT")
+if CALIB_MODEL_SPLIT is None:
+    raise ValueError("Environment variable CALIB_MODEL_SPLIT not set")
+
+quantize_info_path = os.path.join(CALIB_BASE_PATH, f"model_tf_split_{CALIB_MODEL_SPLIT}/quantize_info.txt")
+input_info_path = os.path.join(CALIB_BASE_PATH, f"model_tf_split_{CALIB_MODEL_SPLIT}/inputs.pickle")
+
+input_shapes = {}
+with open(quantize_info_path) as f:
+    lines = f.readlines()
+    raw_input_names = []
+    raw_input_shapes = []
+    for i in range(len(lines)):
+        if "--input_nodes" in lines[i]:
+            raw_input_names = lines[i+1].rstrip()
+        if "--input_shapes" in lines[i]:
+            raw_input_shapes = lines[i+1].rstrip()
+
+    raw_input_names = raw_input_names.split(",")
+    raw_input_shapes = raw_input_shapes.split(":")
+    raw_input_shapes = [[int(x) for x in shape.split(',')] for shape in raw_input_shapes]
+    input_shapes = dict(zip(raw_input_names, raw_input_shapes))
+
+
+input_data = {}
+# shift_concat, resid
+with open(input_info_path, 'rb') as f:
+    input_data = pickle.load(f)
+
+def input_fn(iter):
+    #files = sorted(os.listdir(IMAGENET_PATH))
+    #img = Image.open(os.path.join(IMAGENET_PATH,files[iter])).resize((224, 224))
+    #img = np.array(img) / 255.0
+    ##img = (img -  MEAN) / STD
+    #img = np.transpose(img, axes=[2, 0, 1])
+    #img = np.expand_dims(img, axis=0)
+    #return {"input_node": img}
+    inputs = {}
+    for name,shape in input_shapes.items():
+        if "/input" in name:
+            inputs[name] = np.array(input_data[iter]["resid"])
+            #inputs[name] = np.array(input_data["0"]["resid"])
+        else:
+            inputs[name] = np.array(input_data[iter]["shift_concat"])
+            #inputs[name] = np.array(input_data["0"]["shift_concat"])
+
+    #inputs = {name: np.random.rand(*shape) for name,shape in input_shapes.items()}
+
+    return inputs
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+num_splits=$(ls quantize_results | grep ^quantize_results_.* | wc -l)
+
+#ZCU104_arch="/opt/vitis_ai/compiler/arch/DPUCZDX8G/ZCU104/arch.json"
+ZCU104_arch="../zcu104_arch/arch.json"
+ULTRA96V2_arch="../ultra96v2_arch/arch.json"
+
+ZCU104_out="zcu104/compile_results"
+ULTRA96V2_out="ultra96v2/compile_results"
+
+echo "Compiling $num_splits splits..."
+
+for ((i=0;i<num_splits;i++)); do
+    printf "\n================ Compiling split # $i ====================\n"
+
+    tee_append=""
+    if [[ $i -ne 0 ]]; then
+        tee_append="-a"
+    fi
+
+    vai_c_tensorflow --arch "$ZCU104_arch" \
+        --frozen_pb "quantize_results/quantize_results_$i/deploy_model.pb" \
+        --output_dir "$ZCU104_out/compile_results_$i" \
+        --net_name "tsm_mobilenet_v2_$i" \
+        --options "{'save_kernel':'','dump':'graph','split_io_mem':'','mode':'normal'}" \
+        2>&1 | tee $tee_append compile_log.txt
+done
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -e
+
+export DECENT_DEBUG=3
+
+# Constants
+num_splits=11
+calib_iter=50
+
+# Path to output directory of TF
+base_path="$1"
+base_options="--calib_iter $calib_iter --input_fn calib_input_split.input_fn"
+
+num_split_dirs=$(ls "$base_path" | wc -l)
+
+if [[ num_splits -ne $num_split_dirs ]]; then
+    echo "Number of outputs split directories from: \n\
+        '$base_path' ($num_split_dirs)\n not equal to coded num_splits ($num_splits)"
+fi
+
+export CALIB_BASE_PATH="$base_path"
+
+if [[ $# -eq 0 ]]; then
+    echo "Missing arg: Provide path to base split model dir"
+    exit 1
+fi
+
+for ((i=0;i<num_splits;i++)); do
+    printf "\n================ Quantizing split # $i ====================\n"
+    model_dir="$base_path/model_tf_split_$i"
+    config=$(<"$model_dir/quantize_info.txt")
+    export CALIB_MODEL_SPLIT=$i
+
+    tee_append=""
+    if [[ $i -ne 0 ]]; then
+        tee_append="-a"
+    fi
+
+    vai_q_tensorflow quantize --output_dir "quantize_results/quantize_results_$i" $base_options --input_frozen_graph "$model_dir/model_tf_split_$i.pb" \
+        $(echo $config) 2>&1 | tee $tee_append quantize_log.txt
+done
+
@@ -0,0 +1,23 @@
+#include <dirent.h>
+#include <vector>
+#include <string>
+
+std::vector<std::string> listDir(const std::string& path) {
+    std::vector<std::string> res;
+    std::string prepend = (path.back() == '/') ? path : path + "/";
+
+    DIR *df;
+    struct dirent *file;
+    df = opendir(path.c_str());
+    if (df) {
+        while ((file = readdir(df))) {
+            if (!file->d_name || file->d_name[0] == '.')
+                continue;
+            res.push_back(prepend + file->d_name);
+        }
+        closedir(df);
+    }
+
+    std::sort(res.begin(), res.end());
+    return res;
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "tsm_fpga/tf_models"]`
	`2`	`+ path = tsm_fpga/tf_models`
	`3`	`+ url = https://github.com/JoshNoel/tf_models`