24
24
import sys
25
25
import exodus3 as ex
26
26
import numpy as np
27
- import _pygenten as gt
28
- import _phys_utils as pu
27
+ import pygenten as gt
28
+ import pygenten . _phys_utils as pu
29
29
30
30
def torus_to_tensor (base_filename , axis , num_procs_per_poloidal_plane ,tol = 1.0e-10 ):
31
31
# error check the processor decomposition
32
32
num_procs = gt .num_procs ()
33
- if num_procs % num_procs_per_poloidal_plane > 0 :
34
- raise Exception ('Invalid num_procs_per_poloidal_plane (' + str (num_procs_per_poloidal_plane ) + '): must divide num MPI ranks (' + str (num_procs ) + ') evenly' )
33
+ if num_procs_per_poloidal_plane > 0 :
34
+ if num_procs % num_procs_per_poloidal_plane > 0 :
35
+ raise Exception ('Invalid num_procs_per_poloidal_plane (' + str (num_procs_per_poloidal_plane ) + '): must divide num MPI ranks (' + str (num_procs ) + ') evenly' )
35
36
36
37
# get filename for this proc
37
38
rank = gt .proc_rank ()
@@ -74,19 +75,21 @@ def torus_to_tensor(base_filename, axis, num_procs_per_poloidal_plane,tol=1.0e-1
74
75
total_ref_nodes = pu .global_go_sum (num_ref_nodes )
75
76
total_nodes = pu .global_go_sum (num_nodes )
76
77
total_thetas = len (unique_thetas )
78
+ num_theta_procs = - 1
77
79
if total_nodes != total_ref_nodes * total_thetas :
78
80
msg = 'Toroidal decomposition failure: total_nodes != total_ref_nodes*total_thetas'
79
81
msg = msg + ' (' + str (total_nodes ) + ' != ' + str (total_ref_nodes ) + '*' + str (total_thetas ) + ')'
80
82
raise Exception (msg )
81
- if num_procs_per_poloidal_plane > total_ref_nodes :
82
- msg = 'Invalid num_procs_per_poloidal_plane (' + str (num_procs_per_poloidal_plane ) + '): '
83
- msg = msg + 'must be less than number of nodes per poloidal plane (' + str (total_ref_nodes ) + ')'
84
- raise Exception (msg )
85
- num_procs_per_theta = num_procs // num_procs_per_poloidal_plane
86
- if num_procs_per_theta > total_thetas :
87
- msg = 'Invalid num_procs_per_theta (num_procs/num_procs_per_poloidal_plane = ' + str (num_procs_per_theta ) + '): '
88
- msg = msg + 'must be less than number of poloidal planes in the mesh (' + str (total_thetas ) + ')'
89
- raise Exception (msg )
83
+ if num_procs_per_poloidal_plane :
84
+ if num_procs_per_poloidal_plane > total_ref_nodes :
85
+ msg = 'Invalid num_procs_per_poloidal_plane (' + str (num_procs_per_poloidal_plane ) + '): '
86
+ msg = msg + 'must be less than number of nodes per poloidal plane (' + str (total_ref_nodes ) + ')'
87
+ raise Exception (msg )
88
+ num_theta_procs = num_procs // num_procs_per_poloidal_plane
89
+ if num_theta_procs > total_thetas :
90
+ msg = 'Invalid num_theta_procs (num_procs/num_procs_per_poloidal_plane = ' + str (num_theta_procs ) + '): '
91
+ msg = msg + 'must be less than number of poloidal planes in the mesh (' + str (total_thetas ) + ')'
92
+ raise Exception (msg )
90
93
91
94
# get the associated gids and r and a values
92
95
ref_gids = np .zeros (num_ref_nodes , dtype = np .longlong )
@@ -124,11 +127,20 @@ def torus_to_tensor(base_filename, axis, num_procs_per_poloidal_plane,tol=1.0e-1
124
127
cids [i ] = tids [i ]* total_ref_nodes + rids [i ]
125
128
126
129
# determine which cids should be on which procs based on the user defined decomposition
127
- redistributed_cids = distribute_composite_ids_across_procs (num_procs_per_theta ,total_thetas ,num_procs_per_poloidal_plane ,total_ref_nodes )
130
+ redistributed_cids = - 1 * np .ones (0 , dtype = np .longlong )
131
+ global_blocking = []
132
+ parallel_map = []
133
+ if num_procs_per_poloidal_plane > 0 :
134
+ redistributed_cids , global_blocking = distribute_composite_ids_across_procs (num_theta_procs ,total_thetas ,num_procs_per_poloidal_plane ,total_ref_nodes )
135
+ else :
136
+ redistributed_cids = distribute_composite_ids_to_root (num_theta_procs ,total_thetas ,num_procs_per_poloidal_plane ,total_ref_nodes )
128
137
redistributed_num_nodes = len (redistributed_cids )
129
138
130
139
# use cids to redistribute data across procs
131
140
redistributed_node_data = pu .redistribute_data_across_procs (cids ,node_data ,redistributed_cids )
141
+ tensor = np .zeros ((0 ,0 ,0 ,0 ), dtype = np .double )
142
+ if len (redistributed_node_data ) == 0 :
143
+ return tensor
132
144
133
145
# back out tids and rids from redistributed cids
134
146
redistributed_tids = - 1 * np .ones (redistributed_num_nodes , dtype = np .longlong )
@@ -149,7 +161,18 @@ def torus_to_tensor(base_filename, axis, num_procs_per_poloidal_plane,tol=1.0e-1
149
161
for v in range (num_vars ):
150
162
for t in range (num_times ):
151
163
tensor [redistributed_tids [i ]- start_tid , redistributed_rids [i ]- start_rid , v , t ] = redistributed_node_data [i ,t * num_vars + v ]
152
- return tensor
164
+
165
+ # add var and time dimensions to global blocking
166
+ if num_procs_per_poloidal_plane > 0 :
167
+ vt_blocking = np .zeros ((2 ,num_procs + 1 ),dtype = np .longlong )
168
+ vt_blocking [0 ,1 ] = num_vars
169
+ vt_blocking [1 ,1 ] = num_times
170
+ global_blocking = np .vstack ([global_blocking ,vt_blocking ])
171
+ parallel_map = np .ones ((4 ),dtype = np .longlong )
172
+ parallel_map [0 ] = num_theta_procs
173
+ parallel_map [1 ] = num_procs_per_poloidal_plane
174
+
175
+ return tensor , global_blocking , parallel_map
153
176
154
177
def get_cylindrical_coordinates (x , y , z , axis , tol ):
155
178
"""
@@ -326,6 +349,32 @@ def distribute_composite_ids_across_procs(num_procs_x,total_x,num_procs_y,total_
326
349
for i in range (num_target_xids ):
327
350
for j in range (num_target_yids ):
328
351
target_cids [i * num_target_yids + j ] = (start_xid + i )* total_y + (start_yid + j )
352
+
353
+ # create the global blocking array needed for dist tensor context
354
+ global_blocking = np .zeros ((2 , num_procs + 1 ), dtype = np .longlong )
355
+ for i in range (num_procs_x ):
356
+ global_blocking [0 ,i + 1 ] = global_blocking [0 ,i ] + total_x // num_procs_x
357
+ if i < total_x % num_procs_x :
358
+ global_blocking [0 ,i + 1 ] += 1
359
+ for i in range (num_procs_y ):
360
+ global_blocking [1 ,i + 1 ] = global_blocking [1 ,i ] + total_y // num_procs_y
361
+ if i < total_y % num_procs_y :
362
+ global_blocking [1 ,i + 1 ] += 1
363
+
364
+ return target_cids , global_blocking
365
+
366
+ def distribute_composite_ids_to_root (num_procs_x ,total_x ,num_procs_y ,total_y ):
367
+ """
368
+ Given how many procs to divide a number of ids in two coordinate directions into,
369
+ distribute composite ids on each processor that fit this decomposition
370
+ """
371
+
372
+ # assign ranks in each coordinate direction
373
+ target_cids = - 1 * np .ones (0 , dtype = np .longlong )
374
+ rank = gt .proc_rank ()
375
+ if rank == 0 :
376
+ total_cids = total_x * total_y
377
+ target_cids = np .arange (0 , total_cids - 1 , 1 , dtype = np .longlong )
329
378
return target_cids
330
379
331
380
if __name__ == "__main__" :
@@ -340,7 +389,7 @@ def distribute_composite_ids_across_procs(num_procs_x,total_x,num_procs_y,total_
340
389
tol = 1.0e-10
341
390
if len (sys .argv ) >= 5 :
342
391
tol = np .double (sys .argv [4 ])
343
- tensor = torus_to_tensor (base_filename , axis , num_procs_per_poloidal_plane , tol )
392
+ tensor , global_blocking = torus_to_tensor (base_filename , axis , num_procs_per_poloidal_plane , tol )
344
393
gt .finalizeGenten ()
345
394
346
395
0 commit comments