-
Notifications
You must be signed in to change notification settings - Fork 630
/
Copy pathtest_template_impl.sh
executable file
·225 lines (200 loc) · 9.81 KB
/
test_template_impl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/bin/bash
# Force error checking
set -e
# Force tests to be verbose
set -x
# make python output unbuffered to get more accurate timestamps
export PYTHONUNBUFFERED=1
topdir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )/..
source $topdir/qa/setup_test_common.sh
# Set runner for python tests
export PYTHONPATH=${PYTHONPATH}:$topdir/qa:$topdir/dali/test/python
python_test_runner_package="nose nose2 nose-timer nose2-test-timer"
# use DALI nose wrapper to patch nose to support Python 3.10
python_test_runner="python -m nose_wrapper"
python_test_args="--verbose --with-timer --timer-top-n 20 -s"
python_invoke_test="${python_test_runner} ${python_test_args}"
# New framework for Python Tests
# During the transition we run both
# When all tests are ported old will be removed
python_new_test_runner="python -m nose2"
python_new_test_args="--verbose --plugin=nose2_test_timer.plugin --with-timer --timer-color --timer-top-n 20"
python_new_invoke_test="${python_new_test_runner} ${python_new_test_args}"
# Set proper CUDA version for packages, like MXNet, requiring it
pip_packages=$(eval "echo \"${pip_packages}\"" | sed "s/##CUDA_VERSION##/${CUDA_VERSION}/")
last_config_index=$($topdir/qa/setup_packages.py -n -u $pip_packages --cuda ${CUDA_VERSION})
install_pip_pkg() {
install_cmd="$@"
# if no package was found in our download dir, so install it from index
${install_cmd} --no-index || ${install_cmd}
}
if [ -n "$gather_pip_packages" ]
then
# early exit
return 0
fi
# disable sanitizer as in some OSes it hangs git clone
export OLD_LD_PRELOAD=${LD_PRELOAD}
export LD_PRELOAD=""
source $topdir/qa/setup_dali_extra.sh
export LD_PRELOAD=${OLD_LD_PRELOAD}
target_dir=${target_dir-./}
cd ${target_dir}
# Limit to only one configuration (First version of each package)
if [[ $one_config_only = true ]]; then
echo "Limiting test run to one configuration of packages (first version of each)"
last_config_index=$(( 0 > $last_config_index ? $last_config_index : 0 ))
fi
# some global test setup
if [ "$(type -t do_once)" = 'function' ]; then
do_once
fi
prolog=${prolog-:}
epilog=${epilog-:}
# get the number of elements in `prolog` array
numer_of_prolog_elms=${#prolog[@]}
enable_sanitizer() {
# supress leaks that are false positive or not related to DALI
export LSAN_OPTIONS=suppressions=$topdir/qa/leak.sup
export ASAN_OPTIONS=symbolize=1:protect_shadow_gap=0:log_path=sanitizer.log:start_deactivated=true:allocator_may_return_null=1:detect_leaks=1:fast_unwind_on_malloc=0:verify_asan_link_order=0:detect_container_overflow=0
export ASAN_SYMBOLIZER_PATH=$(which llvm-symbolizer)
# avoid python false positives
export PYTHONMALLOC=malloc
# if something calls dlclose on a module that leaks and it happens before asan can extract symbols we get "unknown module"
# in the stack trace, to prevent this provide dlclose that does nothing
echo "int dlclose(void* a) { return 0; }" > /tmp/fake_dlclose.c && gcc -shared -o /tmp/libfakeclose.so /tmp/fake_dlclose.c
# for an unknown reason the more recent asan when we set PYTHONMALLOC=malloc, when captures the backtrace for
# the `new` call, calls malloc which is intercepted and backtrace is attempted to be captured
# however `_Unwind_Find_FDE` is not reentrant as it uses a mutex which leads to a deadlock
gcc -shared -fPIC $topdir/qa/test_wrapper_pre.c -o /tmp/pre.so
gcc -shared -fPIC $topdir/qa/test_wrapper_post.c -o /tmp/post.so
export OLD_LD_PRELOAD=${LD_PRELOAD}
export LD_PRELOAD="/tmp/pre.so /usr/lib/x86_64-linux-gnu/libasan.so /tmp/glibc_fix.so /tmp/post.so /usr/lib/x86_64-linux-gnu/libstdc++.so /tmp/libfakeclose.so"
# Workaround for bug in asan ignoring RPATHs https://bugzilla.redhat.com/show_bug.cgi?id=1449604
export OLD_LD_LIBRARY_PATH2=${LD_LIBRARY_PATH} # OLD_LD_LIBRARY_PATH variable name already used
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(python -c 'import nvidia.nvimgcodec as n; import os; print(os.path.dirname(n.__file__))')
}
# turn off sanitizer to avoid breaking any non-related system built-ins
disable_sanitizer() {
export ASAN_OPTIONS=start_deactivated=true:detect_leaks=0:detect_container_overflow=0
export LD_PRELOAD=${OLD_LD_PRELOAD}
export LD_LIBRARY_PATH=${OLD_LD_LIBRARY_PATH2}
unset ASAN_SYMBOLIZER_PATH
unset PYTHONMALLOC
}
# Wrap the test_body in a subshell, where we can safely execute it with `set -e`
# and turn it off in current shell to intercept the error code
# when sanitizers are on, do set +e to run all the test no matter what the result is
# and collect as much of sanitizers output as possible
test_body_wrapper() {(
if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
set +e
enable_sanitizer
else
set -e
fi
test_body
if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
disable_sanitizer
fi
)}
process_sanitizers_logs() {
find $topdir -iname "sanitizer.log.*" -print0 | xargs -0 -I file cat file > $topdir/sanitizer.log
if [ -e $topdir/sanitizer.log ]; then
cat $topdir/sanitizer.log
grep -q ERROR $topdir/sanitizer.log && exit 1 || true
fi
find $topdir -iname "sanitizer.log*" -delete
}
# get extra index url for given packages - PEP 503 Python Package Index
extra_indices=$($topdir/qa/setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -e)
extra_indices_string=""
for e in ${extra_indices}; do
extra_indices_string="${extra_indices_string} --extra-index-url=${e}"
done
# get link index url for given packages - a URL or path to an html file with links to archives
link_indices=$($topdir/qa/setup_packages.py -u $pip_packages --cuda ${CUDA_VERSION} -k)
link_indices_string=""
for e in ${link_indices}; do
link_indices_string="${link_indices_string} -f ${e}"
done
# store the original LD_LIBRARY_PATH
OLD_LD_LIBRARY_PATH=$LD_LIBRARY_PATH
for i in `seq 0 $last_config_index`;
do
echo "Test run $i"
# seq from 0 to number of elements in `prolog` array - 1
for variant in $(seq 0 $((${numer_of_prolog_elms}-1))); do
${prolog[variant]}
echo "Test variant run: $variant"
# install the latest cuda wheel for CUDA 11.x and above tests if it is x86_64
# or we just want to use CUDA from system, not wheels
# or we are in conda
version_ge "${CUDA_VERSION}" "110" && \
if [ "$(uname -m)" == "x86_64" ] && [ -z "${DO_NOT_INSTALL_CUDA_WHEEL}" ] && [ -z "${CONDA_PREFIX}" ]; then
NPP_VERSION=$(if [[ $DALI_CUDA_MAJOR_VERSION == "12" ]]; then echo "==12.2.5.30"; else echo ""; fi)
install_pip_pkg "pip install --upgrade nvidia-npp-cu${DALI_CUDA_MAJOR_VERSION}${NPP_VERSION} \
nvidia-nvjpeg-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-nvjpeg2k-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-nvtiff-cu${DALI_CUDA_MAJOR_VERSION} \
nvidia-cufft-cu${DALI_CUDA_MAJOR_VERSION} \
-f /pip-packages"
fi
# install packages
inst=$($topdir/qa/setup_packages.py -i $i -u $pip_packages --cuda ${CUDA_VERSION})
if [ -n "$inst" ]; then
for pkg in ${inst}
do
install_pip_pkg "pip install $pkg -f /pip-packages ${link_indices_string} ${extra_indices_string}"
done
# If we just installed tensorflow, we need to reinstall DALI TF plugin
if [[ "$inst" == *tensorflow* ]]; then
# The package name can be nvidia-dali-tf-plugin, nvidia-dali-tf-plugin-weekly or nvidia-dali-tf-plugin-nightly
# Different DALI can be installed as a dependency of nvidia-dali so uninstall it too
pip uninstall -y `pip list | grep nvidia-dali-tf-plugin | cut -d " " -f1` || true
# don't reinstall DALI wheen in conda as we use conda package
if [ -z "${CONDA_PREFIX}" ]; then
pip uninstall -y `pip list | grep nvidia-dali | cut -d " " -f1` || true
pip install /opt/dali/nvidia_dali*.whl;
fi
pip install /opt/dali/nvidia_dali_tf_plugin*.tar.gz
fi
# if we are using any cuda or nvidia-tensorflow wheels (nvidia-npp, nvidia-nvjpeg or nvidia-cufft)
# unset LD_LIBRARY_PATH to not used cuda from /usr/local/ but from wheels
# however avoid removing compat from the path
if [[ "$inst" == *nvidia-n* ]] || [[ "$inst" == *nvidia-c* ]] || [[ "$inst" == *nvidia-t* ]]; then
TAIL=${LD_LIBRARY_PATH#*compat}
LD_LIBRARY_PATH=${LD_LIBRARY_PATH/$TAIL/}
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH##*:}
fi
fi
# test code
# Run test_body in subshell, the exit on error is turned off in current shell,
# but it will be present in subshell (thanks to wrapper).
# We can intercept first error that happens. test_body_wrapper cannot be used with
# any conditional as it will turn on "exit on error" behaviour
set +e
test_body_wrapper
RV=$?
set -e
if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
process_sanitizers_logs
fi
# restore the original LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$OLD_LD_LIBRARY_PATH
if [ $RV -gt 0 ]; then
# if sanitizers are enabled don't capture core
if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
mkdir -p $topdir/core_artifacts
cp core* $topdir/core_artifacts || true
fi
exit ${RV}
fi
# remove packages
remove=$($topdir/qa/setup_packages.py -r -u $pip_packages --cuda ${CUDA_VERSION})
if [ -n "$remove" ]; then
pip uninstall -y $remove
fi
${epilog[variant]}
done
done