Skip to content

Commit b9a4e95

Browse files
committed
ready for release
1 parent e9a6b9c commit b9a4e95

File tree

4 files changed

+34
-79
lines changed

4 files changed

+34
-79
lines changed

PKGBUILD

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
# Maintainer: Alvin Zhu <[email protected]>
3+
pkgname=python-nbasm
4+
pkgver=11.6.2
5+
pkgrel=1
6+
pkgdesc="NB Assembler"
7+
arch=('any')
8+
url="https://github.com/AlvinZhu/NBAssembler"
9+
license=('GPL')
10+
groups=('aur-alvin')
11+
depends=('python')
12+
optdepends=("CUDA: NVIDIA's GPU programming toolkit")
13+
makedepends=('python-setuptools')
14+
15+
package() {
16+
cp -r ../nbas ../nbasm ../setup.py "$srcdir"
17+
python setup.py install --root="$pkgdir/" --optimize=1
18+
}

nbas/__main__.py

Lines changed: 6 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def decompile_ptx(asm_path, ptx_path, define_list):
145145
print(ptx, end='')
146146

147147

148-
def assemble(asm_path, out_cubin_path, define_list, out_asm_path, sort_banks):
148+
def assemble(asm_path, out_cubin_path, define_list, out_asm_path, sort_banks, strip):
149149
if not out_cubin_path:
150150
out_cubin_path = 'out.cubin'
151151
cubin = Cubin()
@@ -202,69 +202,12 @@ def assemble(asm_path, out_cubin_path, define_list, out_asm_path, sort_banks):
202202
for constant in cubin.constant_dict.values():
203203
constant_asm += constant.print() + '\n'
204204
asm = header_asm + global_asm + constant_asm + kernel_asm
205-
with open(out_asm_path, 'w') as f:
206-
f.write(asm)
207-
208-
209-
def preprocess(asm_path, out_asm_path, define_list, strip):
210-
cubin = Cubin()
211-
212-
define_dict = {}
213-
for define in define_list:
214-
if not define:
215-
continue
216-
d = define.split('=')
217-
if len(d) < 2:
218-
exec(f'{define} = True', define_dict)
219-
else:
220-
exec(f'{define}', define_dict)
221205

222-
cubin.load_asm(asm_path, define_dict)
223-
header_asm = cubin.header.print() + '\n'
224-
global_asm = ''
225-
constant_asm = ''
226-
kernel_asm = ''
206+
if strip:
207+
asm = strip_comment(asm)
227208

228-
consts = set()
229-
globals_ = set()
230-
for kernel in cubin.kernel_dict.values():
231-
# Unmap global, const0, const3
232-
cubin.unmap_constant3(kernel)
233-
kernel.unmap_reg()
234-
kernel.unmap_constant0()
235-
kernel.unmap_jump()
236-
kernel.unmap_global()
237-
kernel.schedule()
238-
kernel.sort_banks()
239-
cubin.map_constant3(kernel)
240-
kernel.map_global()
241-
kernel.map_constant0()
242-
kernel.map_jump(rel=True)
243-
kernel.mark_const2()
244-
kernel_asm += '\n' + kernel.print()
245-
consts = consts.union(kernel.consts)
246-
globals_ = globals_.union(kernel.globals)
247-
248-
for global_ in cubin.global_dict.values():
249-
if global_.name in globals_:
250-
global_asm += global_.print() + '\n'
251-
for global_ in cubin.global_init_dict.values():
252-
if global_.name in globals_:
253-
global_asm += global_.print() + '\n'
254-
for constant in cubin.constant_dict.values():
255-
if constant.name in consts or 'ALL_CONST3' in consts:
256-
constant_asm += constant.print() + '\n'
257-
258-
asm = header_asm + global_asm + constant_asm + kernel_asm
259-
260-
if strip:
261-
asm = strip_comment(asm)
262-
263-
if out_asm_path:
264209
with open(out_asm_path, 'w') as f:
265210
f.write(asm)
266-
else:
267-
print(asm, end='')
268211

269212

270213
def test_cubin(cubin_path, kernel_names, global_only, check=False):
@@ -397,14 +340,8 @@ def main():
397340
parser_as.add_argument('-D', '--define', metavar='DEFINE', nargs='+', type=str, default='',
398341
help='define variable for embedded python code')
399342
parser_as.add_argument('-d', '--debug', metavar='OUTPUT_ASM', type=str, default='', help='output asm for debug')
400-
parser_as.add_argument('-s', '--sort', action='store_true', help='sort banks')
401-
402-
parser_pre = subparsers.add_parser('pre', help='preprocess asm')
403-
parser_pre.add_argument('asm', help='input asm', metavar='ASM')
404-
parser_pre.add_argument('-D', '--define', metavar='DEFINE', nargs='+', type=str, default='',
405-
help='define variable for embedded python code')
406-
parser_pre.add_argument('-o', '--output', metavar='OUTPUT', type=str, default='', help='output asm file path')
407-
parser_pre.add_argument('-s', '--strip', action='store_true', help='strip comment')
343+
parser_as.add_argument('-b', '--bank', action='store_true', help='sort banks')
344+
parser_as.add_argument('-s', '--strip', action='store_true', help='strip comment')
408345

409346
parser_pdas = subparsers.add_parser('dcc', help='decompile asm to ptx')
410347
parser_pdas.add_argument('asm', help='input asm', metavar='ASM')
@@ -435,9 +372,7 @@ def main():
435372
global_only=args.global_only, no_line_info=args.no_line_info)
436373
elif args.cmd == 'as':
437374
assemble(asm_path=args.asm, out_cubin_path=args.output, define_list=args.define, out_asm_path=args.debug,
438-
sort_banks=args.sort)
439-
elif args.cmd == 'pre':
440-
preprocess(asm_path=args.asm, out_asm_path=args.output, define_list=args.define, strip=args.strip)
375+
sort_banks=args.bank, strip=args.strip)
441376
elif args.cmd == 'dcc':
442377
decompile_ptx(asm_path=args.asm, ptx_path=args.output, define_list=args.define)
443378
elif args.cmd == 'test':

nbas/tool.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def load_ptx_line_info(path):
3838

3939

4040
def disassemble_nv(binary, arch):
41-
# 使用nvdisasm 反汇编
4241
fd, tmp_file = mkstemp()
4342
os.close(fd)
4443
with open(tmp_file, 'wb') as f:

test.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import os
55
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
6-
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
6+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
77

88
import pycuda.driver as cuda
99
import pycuda.tools
@@ -15,20 +15,23 @@
1515
mod = cuda.module_from_file('test.cubin')
1616
kernel = mod.get_function("kTest")
1717

18-
a = np.array([0x70000]*THREADS).astype(np.uint32)
18+
a = np.array([0x3]*THREADS).astype(np.uint32)
1919
a_gpu, a_size = mod.get_global('a')
2020

21-
b = np.array([0xf]*THREADS).astype(np.uint32)
21+
b = np.array([0x2]*THREADS).astype(np.uint32)
2222
b_gpu, b_size = mod.get_global('b')
2323

24-
d = np.array([0]*THREADS).astype(np.uint32)
25-
d_gpu, d_size = mod.get_global('d')
24+
c = np.array([0]*THREADS).astype(np.uint32)
25+
c_gpu, c_size = mod.get_global('c')
26+
27+
# d = np.array([0]*THREADS).astype(np.uint32)
28+
# d_gpu, d_size = mod.get_global('d')
2629

2730
cuda.memcpy_htod(a_gpu, a)
2831
cuda.memcpy_htod(b_gpu, b)
2932

3033
kernel(block=(1, 1, 1))
3134

32-
cuda.memcpy_dtoh(d, d_gpu)
35+
cuda.memcpy_dtoh(c, c_gpu)
3336

34-
print(f'Result = {np.unique(d)[0]:#0x}')
37+
print(f'Result = {np.unique(c)[0]:#0x}')

0 commit comments

Comments
 (0)