DISPATCH
amr_io_mod.f90
1 !===============================================================================
2 !> Module handling convenient AMR I/O. We wish to have a file format that makes
3 !> it easy to read when restarting jobs, and which makes it easy to read with
4 !> Python. We can rely on the existing mechanism to write patch metadata in the
5 !> form of file run/snapno/rank_rankno_patches.nml with namelist data for each
6 !> patch. This may be replaced later with a file written with mpi_buffer_mod.
7 !>
8 !> We can reuse the functionality in mpi_file_mod, and build a task list with
9 !> patch data, ready for output. We also reuse the mechanism which counts down
10 !> the number of tasks still remaining to be added to the list -- this saves
11 !> debugging efforts, since it is known to work and is rather delicate, since
12 !> new AMR patches may be added after the I/O process has started, until it is
13 !> complete, with all existing tasks having passed the current out_next value.
14 !>
15 !> Note that the output procedure is called from inside a critical region in
16 !> data_io_mod.f90, and hence everything in output and the procedures it calls
17 !> is trivially thread safe; only the last thread to reach next_out enters here,
18 !> and while it is active other threads are waiting at the start of the critical
19 !> region. When they enter, they find that the output has been completed.
20 !>
21 !> Call hierarchy:
22 !>
23 !> experiment_t%output ! request patch output
24 !> gpatch_t%output ! catch output request
25 !> data_io_t%output ! select output method
26 !> amr_io_t%output ! check if final thread & rank
27 !> amr_io_t%output_list ! run through list
28 !> mpi_file_t%open ! open data/run/IOUT/snapshot.dat
29 !> amr_io%t%output_buffer ! fill buffer for variable iv
30 !> mpi_file_t%write ! compute offset and size
31 !> MPI_File_write_at ! write out
32 !> mpi_file_t%close ! close file
33 !>
34 !> OpenMPI aspects: All threads call experiment_t%output repeatedly, but most
35 !> of the time they return from data_io%output, after finding that the time has
36 !> not advanced enough. When reaching a time where they should add their data
37 !> to the output list, they must operate one-at-a-time, so that part should be
38 !> inside a unique critical region (or should use a lock).
39 !>
40 !> data_io%output indeed has name critical regions around all calls that end up
41 !> inside amr_io_mod, so only one thread can ever be inside the procedures below.
42 !> But we must also make sure that when that thread decides to do collective MPI
43 !> calls, which may only be done by one thread on some systems (laptops), all
44 !> other threads must be held up at the very same critical region (data_io_cr).
45 !> This is arranged by using an OMP lock in data_io_mod -- cf. comments there.
46 !===============================================================================
47 MODULE amr_io_mod
48  USE io_mod
49  USE trace_mod
50  USE task_mod
51  USE patch_mod
52  USE link_mod
53  USE list_mod
54  USE mpi_file_mod
55  USE mpi_mod
56  USE time_slices_mod
57  implicit none
58  private
59  !.............................................................................
60  type, public:: amr_io_t
61  integer:: iout ! snapshot number
62  integer:: n(3) ! 3-D array dims in output
63  integer:: nv ! number of variables in output
64  integer:: task_size ! words per variable in a task
65  integer:: var_size ! words per variable in the run
66  integer:: rank_size ! words per variable in the rank
67  integer(8):: rank_offset ! offset from variable start
68  integer(8):: task_offset=0_8 ! offset in buffer
69  type(mpi_counter_t):: ranks_counter ! number of ranks not ready
70  type(mpi_counter_t):: offset_counter ! rank offset in variable
71  type(list_t), pointer:: list => null()
72  real, allocatable:: buffer(:,:,:,:,:)
73  type(mpi_file_t):: file
74  contains
75  procedure:: init
76  procedure:: output
77  procedure:: check
78  procedure:: output_list
79  procedure:: output_buffer
80  procedure:: input
81  end type
82  !.............................................................................
83  integer:: verbose=0 ! verbosity of log info
84  integer:: nt=2 ! number of time slots in output
85  type(amr_io_t), public:: amr_io ! global instance
86 CONTAINS
87 
88 !===============================================================================
89 !> Initialize AMR I/O, by initializing the rank counter; the counter resets to
90 !> the initial value after hitting zero.
91 !===============================================================================
92 SUBROUTINE init (self, patch)
93  class(amr_io_t):: self
94  class(patch_t):: patch
95  namelist /amr_io_params/ verbose, nt
96  integer:: iostat
97  logical, save:: first_time = .true.
98  !-----------------------------------------------------------------------------
99  call trace%begin ('amr_io_t%init')
100  if (first_time) then
101  first_time = .false.
102  rewind(io%input)
103  read (io%input, amr_io_params, iostat=iostat)
104  write (io%output, amr_io_params)
105  call self%ranks_counter%init (mpi%size)
106  call self%offset_counter%init (0_8)
107  if (nt == 2) then
108  time_slices%order = 2
109  end if
110  end if
111  if (verbose > 0) &
112  write (stdout,"('amr_io_t%init: ',a,i6)") &
113  'ranks%i =', self%ranks_counter%i
114  call trace%end ()
115 END SUBROUTINE init
116 
117 !===============================================================================
118 !> AMR I/O output. For the most exact restart results, the two time slices
119 !> bracketing the out_next time should both be saved -- interpolation in time
120 !> would not conserve mass, momentum, energu, and magnetic flux divergence.
121 !===============================================================================
122 SUBROUTINE output (self, patch, count)
123  class(amr_io_t):: self
124  class(patch_t):: patch
125  class(task_t), pointer:: task
126  class(patch_t), pointer:: out
127  integer:: count, l(3), u(3), n(3), prv, now, remains, it, iv
128  real, allocatable:: tmp(:,:,:,:)
129  logical, save:: first_time=.true.
130  !-----------------------------------------------------------------------------
131  call trace%begin ('amr_io_t%output')
132  !-----------------------------------------------------------------------------
133  ! Allocate a task with two time slots, for the two bracketing time slices,
134  ! and with a size that optionally include the guard zones.
135  !-----------------------------------------------------------------------------
136  allocate (out)
137  out%id = patch%id
138  out%nt = nt
139  out%nv = patch%nv
140  out%nw = 1
141  if (io%guard_zones) then
142  l = patch%mesh%lb
143  u = patch%mesh%ub
144  else
145  l = patch%mesh%li
146  u = patch%mesh%ui
147  end if
148  n(1:3) = u-l+1
149  allocate (out%mem(n(1),n(2),n(3),out%nv,out%nt,out%nw))
150  if (first_time) then
151  end if
152  now = patch%it
153  if (patch%istep > 1) then
154  prv = 1 + modulo(now-2,patch%nt)
155  else
156  prv = now
157  end if
158  allocate (out%t(2), out%dt(2))
159  if (nt == 2) then
160  io%nml_version = 3
161  !---------------------------------------------------------------------------
162  ! Copy the two bracketing time slots to the output task
163  !---------------------------------------------------------------------------
164  do iv=1,patch%nv
165  call convert_variables (patch, out%mem(:,:,:,iv,1,1), iv, prv)
166  call convert_variables (patch, out%mem(:,:,:,iv,2,1), iv, now)
167  end do
168  out%t(1) = patch%t(prv)
169  out%t(2) = patch%t(now)
170  out%dt(1) = patch%dt(prv)
171  out%dt(2) = patch%dt(now)
172  else
173  !---------------------------------------------------------------------------
174  ! Alternatively, use time-slice interpolation to a single output time
175  !---------------------------------------------------------------------------
176  allocate (tmp(n(1),n(2),n(3),patch%nt-1))
177  do iv=1,patch%nv
178  do it=1,patch%nt-1
179  call convert_variables (patch, tmp(:,:,:,it), iv, it)
180  end do
181  call time_slices%interpolate (patch, tmp, out%mem(:,:,:,iv,1,1))
182  end do
183  deallocate (tmp)
184  end if
185  self%task_size = product(n)*nt
186  self%task_offset = self%task_offset + self%task_size
187  patch%amr_offset = self%task_offset
188  !-----------------------------------------------------------------------------
189  ! Append the task copy to a list_t instance
190  !-----------------------------------------------------------------------------
191  if (.not.associated(self%list)) then
192  allocate (self%list)
193  call self%list%init
194  end if
195  task => out
196  call self%list%append_task (task)
197  !-----------------------------------------------------------------------------
198  ! When the last task has been appended, decrement the counter that counts
199  ! the number of ranks that are not yet ready
200  !-----------------------------------------------------------------------------
201  if (verbose > 1) &
202  write (stdout,"('amr_io_t%output: ',a,i6)") &
203  'count =', count
204  if (count == 0) then
205  !---------------------------------------------------------------------------
206  ! Increase the offset into the variable block by the size of the output
207  ! buffer (computed when allocated) in this rank
208  !---------------------------------------------------------------------------
209  self%n = n
210  self%nv = patch%nv
211  remains = self%ranks_counter%update(-1) - 1
212  !---------------------------------------------------------------------------
213  ! If other ranks remain, set a flag forcing data_io_t%output() to make
214  ! repeated calls to self%check
215  !---------------------------------------------------------------------------
216  if (verbose > 1) &
217  write (stdout,"('amr_io_t%output: ',a,i6)") &
218  'remains =', remains
219  self%iout = patch%iout
220  self%rank_size = self%task_size*self%list%n
221  if (remains == 0) then
222  call self%check
223  else
224  io%needs_check = .true.
225  end if
226  end if
227  call trace%end ()
228 END SUBROUTINE output
229 
230 !===============================================================================
231 !> Check if all ranks are ready to do I/O
232 !===============================================================================
233 SUBROUTINE check (self)
234  class(amr_io_t):: self
235  integer:: remains
236  !-----------------------------------------------------------------------------
237  call trace%begin ('amr_io_t%check')
238  remains = self%ranks_counter%update(0)
239  if (verbose > 1) &
240  write (stdout,"('amr_io_t%check: ',a,i6)") &
241  'remains =', remains
242  if (remains == 0) then
243  !---------------------------------------------------------------------------
244  ! Now that all ranks are ready, the total size per variable may be computed,
245  ! and the contents of the task list may be written out
246  !---------------------------------------------------------------------------
247  if (verbose > 0) &
248  write (stdout,"('amr_io_t%check: ',a,i10)") &
249  'var_size =', self%var_size
250  call self%output_list
251  !---------------------------------------------------------------------------
252  ! As output is now complete, the output task list should be reset, and the
253  ! MPI master should reset the rank count
254  !---------------------------------------------------------------------------
255  call self%list%reset
256  io%needs_check = .false.
257  self%task_offset = 0_8
258  if (mpi%master) then
259  call trace%begin ('counter_t%begin')
260  call self%ranks_counter%reset (mpi%size)
261  call self%offset_counter%reset (0_8)
262  call trace%end ()
263  end if
264  end if
265  call trace%end ()
266 END SUBROUTINE check
267 
268 !===============================================================================
269 !> This procedure is called when all ranks are ready to do I/O. For now, we do
270 !> it in a blocking fashion
271 !===============================================================================
272 SUBROUTINE output_list (self)
273  class(amr_io_t):: self
274  class(link_t), pointer:: link
275  class(task_t), pointer:: task
276  class(patch_t), pointer:: patch
277  integer:: i, iv, n(5)
278  integer(8):: rank_offset, var_size
279  character(len=64):: filename
280  namelist /offset_nml/ rank_offset, var_size
281  !-----------------------------------------------------------------------------
282  ! For each variable, allocate output buffer, fill it, and write it out
283  !-----------------------------------------------------------------------------
284  call trace%begin ('amr_io_t%fill_buffer')
285  n(1:3) = self%n
286  n(4) = nt
287  n(5) = self%list%n
288  allocate (self%buffer(n(1),n(2),n(3),n(4),n(5)))
289  !-----------------------------------------------------------------------------
290  ! Compute offset and size
291  !-----------------------------------------------------------------------------
292  self%rank_offset = self%offset_counter%update (self%rank_size)
293  self%var_size = self%offset_counter%update (0_8)
294  io%gb_out = self%var_size*self%nv*4.0/1024.**3
295  if (verbose > 0) &
296  write (stdout,"('amr_io_t%output: ',a,5i10)") &
297  'task_size, rank_size, var_size, rank_offset =', &
298  self%task_size, self%rank_size, self%var_size, self%rank_offset
299  !-----------------------------------------------------------------------------
300  ! Open the output file and loop over variables
301  !-----------------------------------------------------------------------------
302  link => self%list%head
303  task => link%task
304  write (filename,'(a,i5.5,"/snapshot.dat")') &
305  trim(io%outputname), self%iout
306  if (verbose > 0) &
307  write (stdout,"('amr_io_t%fill_buffer: ',a)") &
308  'filename = '//trim(filename)
309  !$omp atomic write
310  io%halt = .true.
311  !$omp end atomic
312  call self%file%openw (filename)
313  do iv=1,self%nv
314  i = 1
315  link => self%list%head
316  do while (associated(link))
317  task => link%task
318  select type (task)
319  class is (patch_t)
320 !print *, shape(self%buffer), 'buffer'
321 !print *, shape(task%mem), 'mem'
322  self%buffer(:,:,:,:,i) = task%mem(:,:,:,iv,:,1)
323  i = i + 1
324  end select
325  link => link%next
326  end do
327  call self%output_buffer (iv)
328  end do
329  deallocate (self%buffer)
330  !-----------------------------------------------------------------------------
331  ! Close the file, which ends the critical collective call section, so we can
332  ! now remove the io%halt trap, which prevents other threads from getting into
333  ! trouble in the mean time.
334  !-----------------------------------------------------------------------------
335  call self%file%close
336  !$omp atomic write
337  io%halt = .false.
338  !$omp end atomic
339  !-----------------------------------------------------------------------------
340  ! Open the file "data/run/rank_rrrrr_patches.nml", appending offset
341  !-----------------------------------------------------------------------------
342  rank_offset = self%rank_offset
343  var_size = self%var_size
344  flush (io_unit%nml2)
345  write (io_unit%nml2, offset_nml)
346  flush (io_unit%nml2)
347  call trace%end ()
348 END SUBROUTINE output_list
349 
350 !===============================================================================
351 !> Actual AMR I/O output. Compute offset into file, open the file, output the
352 !> buffer, and close the file. The file operations are collective, and hence
353 !> this needs to be done by all ranks together, and by just one thread in each,
354 !> while all other threads either wait (conservative choice), or continue.
355 !===============================================================================
356 SUBROUTINE output_buffer (self, iv)
357  class(amr_io_t):: self
358  integer:: iv
359  integer(8):: offset
360  !-----------------------------------------------------------------------------
361  call trace%begin ('amr_io_t%output_buffer')
362  !-----------------------------------------------------------------------------
363  ! The offset in the snapshot file to this buffer is the offset over total
364  ! variable size for this snapshot, plus the rank offset in a variable
365  !-----------------------------------------------------------------------------
366  offset = (iv-1)*self%var_size + self%rank_offset
367  if (verbose > 0) &
368  write (stdout,"('amr_io_t%output_buffer: ',a,2i12)") &
369  'iv, offset =', iv, offset
370  call self%file%write (4_8*offset, self%rank_size, self%buffer)
371  call trace%end ()
372 END SUBROUTINE output_buffer
373 
374 !===============================================================================
375 !> AMR I/O input. The patch is assumed to exist, with correct dimensions of
376 !> %mem(), based on the reading of metadata.
377 !===============================================================================
378 SUBROUTINE input (self, patch)
379  class(amr_io_t):: self
380  class(patch_t):: patch
381  !-----------------------------------------------------------------------------
382  call trace%begin ('amr_io_t%input')
383  call trace%end ()
384 END SUBROUTINE input
385 
386 !===============================================================================
387 ! Copy variables from `patch%mem` to `mem` array, converting variables based
388 ! on the I/O format parameter. In the process, the time slot ordering is
389 ! rearranged into increasing time order.
390 !===============================================================================
391 SUBROUTINE convert_variables (patch, mem, iv, it)
392  class(patch_t):: patch
393  real:: mem(:,:,:)
394  integer:: iv, it, jt
395  integer:: io_format, l(3), u(3), i, j, k
396  !...........................................................................
397  call trace%begin ('amr_io-t%convert_variables')
398  if (io%guard_zones) then
399  l = patch%mesh%lb
400  u = max(patch%mesh%ub,patch%mesh%gn)
401  else
402  l = patch%mesh%li
403  u = patch%mesh%ui
404  end if
405  io_format = io%format
406  if (io_format >= 10) then
407  io_format = io_format-10
408  else if (io_format >= 6) then
409  io_format = io_format-6
410  end if
411  jt = patch%iit(it)
412  if (io_format >= 2 .and. iv==patch%idx%d) then
413  ! --- Convert density to log density ---
414  mem(:,:,:) = log( &
415  patch%mem(l(1):u(1),l(2):u(2),l(3):u(3),iv ,jt,1))
416  else if ((io%format == 0 .or. io%format==2) .and. patch%solver_is('ramses') &
417  .and. iv >= patch%idx%px .and. iv <= patch%idx%pz) then
418  ! --- Convert momentum to velocity ---
419  mem(:,:,:) &
420  = patch%mem(l(1):u(1),l(2):u(2),l(3):u(3),iv ,jt,1) &
421  / patch%mem(l(1):u(1),l(2):u(2),l(3):u(3),patch%idx%d,jt,1)
422  else
423  mem(:,:,:) = &
424  patch%mem(l(1):u(1),l(2):u(2),l(3):u(3),iv ,jt,1)
425  end if
426  call trace%end()
427 END SUBROUTINE convert_variables
428 
429 END MODULE amr_io_mod
Compute time derivative from a sequence of time slices, using Lagrange interpolation.
Module with list handling for generic class task_t objects.
Definition: list_mod.f90:4
Template module for patches, which adds pointers to memory and mesh, and number of dimensions and var...
Definition: patch_mod.f90:6
Module for handling blocking and non-blocking MPI parallel I/O to a single file.
Definition: mpi_file_mod.f90:4
Module handling convenient AMR I/O. We wish to have a file format that makes it easy to read when res...
Definition: amr_io_mod.f90:47
Definition: io_mod.f90:4
Template module for tasks.
Definition: task_mod.f90:4