diff --git a/.github/scripts/prebuild-case-optimization.sh b/.github/scripts/prebuild-case-optimization.sh index 938ce2f438..f580770330 100755 --- a/.github/scripts/prebuild-case-optimization.sh +++ b/.github/scripts/prebuild-case-optimization.sh @@ -1,10 +1,10 @@ #!/bin/bash -# Pre-builds all benchmark cases with --case-optimization. -# No GPU hardware needed — compilation only. +# Pre-builds all benchmark cases with --case-optimization using --dry-run so +# binaries are cached before the GPU run job. No simulation is executed. # Can run in two modes: # 1. Direct (Frontier login nodes): pass cluster/device/interface as args -# 2. Inside SLURM (Phoenix): uses $job_device/$job_interface from submit-slurm-job.sh +# 2. Inside SLURM (Phoenix/frontier_amd): uses $job_device/$job_interface # Usage: bash prebuild-case-optimization.sh [ ] set -e @@ -22,14 +22,18 @@ case "$cluster" in *) echo "ERROR: Unknown cluster '$cluster'"; exit 1 ;; esac -source .github/scripts/clean-build.sh -clean_build +# Phoenix starts fresh (no prior dep build); other clusters pre-build deps via +# build.sh first, so we must preserve them and only clean MFC target staging. +if [ "$cluster" = "phoenix" ]; then + source .github/scripts/clean-build.sh + clean_build +else + find build/staging -maxdepth 1 -regex '.*/[0-9a-f]+' -type d -exec rm -rf {} + 2>/dev/null || true + find build/install -maxdepth 1 -regex '.*/[0-9a-f]+' -type d -exec rm -rf {} + 2>/dev/null || true +fi . ./mfc.sh load -c "$flag" -m g -# Set GPU build flags from interface — this is always a GPU build. -# Don't use gpu-opts.sh since $job_device may be "cpu" when submitted -# to a CPU SLURM partition (no GPU hardware needed for compilation). case "$job_interface" in acc) gpu_opts="--gpu acc" ;; omp) gpu_opts="--gpu mp" ;; @@ -38,5 +42,5 @@ esac for case in benchmarks/*/case.py; do echo "=== Pre-building: $case ===" - ./mfc.sh build -i "$case" --case-optimization $gpu_opts -j 8 + ./mfc.sh run "$case" --case-optimization $gpu_opts -j 8 --dry-run done diff --git a/.github/scripts/run_case_optimization.sh b/.github/scripts/run_case_optimization.sh index b5aeb35856..e0d2f797b6 100755 --- a/.github/scripts/run_case_optimization.sh +++ b/.github/scripts/run_case_optimization.sh @@ -23,13 +23,14 @@ benchmarks=( # For Frontier/Frontier AMD: deps were fetched on the login node via --deps-only; # build case-optimized binaries here on the compute node before running. -# For Phoenix: prebuild-case-optimization.sh already built everything in a prior SLURM job. +# For Phoenix and frontier_amd: prebuild-case-optimization.sh already built +# everything in a prior SLURM job (via --dry-run), so skip the build here. # # Clean stale MFC target staging before building. On self-hosted CI runners, # corrupted intermediate files from a prior failed build (e.g. CCE optcg crash) # can persist and poison subsequent builds. Each case-opt config gets its own # hash-named staging dir, but install dirs and other artifacts may be stale. -if [ "$job_cluster" != "phoenix" ]; then +if [ "$job_cluster" != "phoenix" ] && [ "$job_cluster" != "frontier_amd" ]; then # Clean stale MFC target dirs (hash-named) from prior builds, but # preserve dependency dirs (hipfort, fftw, etc.) since the compute # node has no internet to re-fetch them. diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bfda53f87c..50da79b0d4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -659,12 +659,16 @@ jobs: if: matrix.cluster == 'phoenix' run: bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh cpu ${{ matrix.interface }} ${{ matrix.cluster }} + - name: Pre-Build (SLURM) + if: matrix.cluster == 'frontier_amd' + run: bash .github/scripts/submit-slurm-job.sh .github/scripts/prebuild-case-optimization.sh gpu ${{ matrix.interface }} ${{ matrix.cluster }} + - name: Build & Run Case-Optimization Tests - if: matrix.cluster != 'phoenix' + if: matrix.cluster != 'phoenix' && matrix.cluster != 'frontier_amd' run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} - name: Run Case-Optimization Tests - if: matrix.cluster == 'phoenix' + if: matrix.cluster == 'phoenix' || matrix.cluster == 'frontier_amd' run: bash .github/scripts/submit-slurm-job.sh .github/scripts/run_case_optimization.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }} - name: Cancel SLURM Jobs diff --git a/CMakeLists.txt b/CMakeLists.txt index e696d20b61..72258149f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -644,7 +644,7 @@ exit 0 target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -fopenmp-target-fast -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a -flto-partitions=${MFC_BUILD_JOBS}) endif() endif() @@ -710,14 +710,15 @@ exit 0 PRIVATE -DFRONTIER_UNIFIED) endif() - find_library(HIP_LIB amdhip64 + find_library(HIP_LIB amdhip64 HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) find_library(HIPFORT_AMDGCN_LIB hipfort-amdgcn HINTS "$ENV{OLCF_AFAR_ROOT}/lib" REQUIRED) target_include_directories(${a_target} PRIVATE "$ENV{OLCF_AFAR_ROOT}/include/hipfort/amdgcn") target_link_libraries(${a_target} PRIVATE - ${HIP_LIB} ${HIPFORT_AMDGCN_LIB} flang_rt.hostdevice) + ${HIP_LIB} ${HIPFORT_AMDGCN_LIB}) + endif() elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") @@ -790,6 +791,12 @@ if (MFC_POST_PROCESS) # -O0 is in response to https://github.com/MFlowCode/MFC-develop/issues/95 target_compile_options(post_process PRIVATE -O0) + + # flang-23/LLD defaults to PIE; SILO and LAPACK static libs on Frontier are + # non-PIC, producing R_X86_64_32 relocations that LLD rejects in PIE mode. + if (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + target_link_options(post_process PRIVATE -no-pie) + endif() endif() if (MFC_SYSCHECK) diff --git a/examples/3D_performance_test/case.py b/examples/3D_performance_test/case.py index 4d4a4dc4b5..4e08a8ea1b 100644 --- a/examples/3D_performance_test/case.py +++ b/examples/3D_performance_test/case.py @@ -6,7 +6,7 @@ json.dumps( { # Logistics - "run_time_info": "T", + "run_time_info": "F", # Computational Domain Parameters "x_domain%beg": 0.0e00, "x_domain%end": 4.0e-03 / 1.0e-03, diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 4f35e3e77e..cd15530368 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -193,7 +193,7 @@ contains $:GPU_UPDATE(device='[isc1, isc2, isc3]') - if (chemistry .or. dummy) then + if (chemistry) then ! Set offsets based on direction using array indexing offsets = 0 offsets(idir) = 1 diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 122fb73a86..aed6e616c4 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -186,7 +186,6 @@ module m_global_parameters logical :: E_wrt logical, dimension(num_fluids_max) :: alpha_rho_e_wrt logical :: fft_wrt - logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug logical :: pres_wrt logical, dimension(num_fluids_max) :: alpha_wrt logical :: gamma_wrt @@ -397,7 +396,6 @@ contains file_per_process = .false. E_wrt = .false. fft_wrt = .false. - dummy = .false. pres_wrt = .false. alpha_wrt = .false. gamma_wrt = .false. diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index e93d4c3122..ae850a9134 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -186,7 +186,6 @@ module m_global_parameters real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) integer :: buff_size !< Number of ghost cells for boundary condition storage logical :: fft_wrt - logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug contains @@ -303,7 +302,6 @@ contains elliptic_smoothing = .false. fft_wrt = .false. - dummy = .false. simplex_perturb = .false. simplex_params%perturb_vel(:) = .false. diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index f5bc800e54..4de261864d 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -454,14 +454,16 @@ contains call s_mpi_abort('Fatal Error: Inconsistent allocation of source_spatials') end if - $:GPU_UPDATE(device='[source_spatials(ai)%coord]') - $:GPU_UPDATE(device='[source_spatials(ai)%val]') - if (support(ai) >= 5) then - if (dim == 2) then - $:GPU_UPDATE(device='[source_spatials(ai)%angle]') - end if - if (dim == 3) then - $:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]') + if (count > 0) then + $:GPU_UPDATE(device='[source_spatials(ai)%coord]') + $:GPU_UPDATE(device='[source_spatials(ai)%val]') + if (support(ai) >= 5) then + if (dim == 2) then + $:GPU_UPDATE(device='[source_spatials(ai)%angle]') + end if + if (dim == 3) then + $:GPU_UPDATE(device='[source_spatials(ai)%xyz_to_r_ratios]') + end if end if end if end do diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index e6e14aeb5d..19c1d798ff 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -529,7 +529,7 @@ contains #:for CBC_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (cbc_dir == ${CBC_DIR}$ .and. recon_type == WENO_TYPE) then ! PI2 of flux_rs_vf and flux_src_rs_vf at j = 1/2 - if (weno_order == 3 .or. dummy) then + if (weno_order == 3) then call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, & & is3, idwbuff(2)%beg, idwbuff(3)%beg) @@ -557,7 +557,7 @@ contains end if ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 - if (weno_order == 5 .or. dummy) then + if (weno_order == 5) then call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, F_rs${XYZ}$_vf, F_src_rs${XYZ}$_vf, is1, is2, & & is3, idwbuff(2)%beg, idwbuff(3)%beg) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index a5fdb3ef81..8ffcba6101 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -220,7 +220,7 @@ contains #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') icfl_max_loc = maxval(icfl_sf) #:endcall GPU_PARALLEL - if (viscous .or. dummy) then + if (viscous) then #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') vcfl_max_loc = maxval(vcfl_sf) Rc_min_loc = minval(Rc_sf) diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index ff94f3a138..2ee806d928 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -30,12 +30,12 @@ module m_fftw type(c_ptr) :: fwd_plan, bwd_plan type(c_ptr) :: fftw_real_data, fftw_cmplx_data, fftw_fltr_cmplx_data - integer :: real_size, cmplx_size, x_size, batch_size, Nfq + integer :: real_size, cmplx_size, x_size, batch_size, Nfq, i2 real(c_double), pointer :: data_real(:) !< Real data complex(c_double_complex), pointer :: data_cmplx(:) !< Complex data in Fourier space complex(c_double_complex), pointer :: data_fltr_cmplx(:) !< Filtered complex data in Fourier space #if defined(MFC_GPU) - $:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq]') + $:GPU_DECLARE(create='[real_size, cmplx_size, x_size, batch_size, Nfq, i2]') real(dp), allocatable, target :: data_real_gpu(:) complex(dp), allocatable, target :: data_cmplx_gpu(:) @@ -76,8 +76,8 @@ contains allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) gpu_fft_size(1) = real_size - iembed(1) = 0 - oembed(1) = 0 + iembed(1) = real_size + oembed(1) = cmplx_size $:GPU_ENTER_DATA(copyin='[real_size, cmplx_size, x_size, sys_size, batch_size, Nfq]') $:GPU_UPDATE(device='[real_size, cmplx_size, x_size, sys_size, batch_size]') #else @@ -189,6 +189,9 @@ contains $:END_GPU_PARALLEL_LOOP() do i = 1, fourier_rings + i2 = i + $:GPU_UPDATE(device='[i2]') + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m @@ -199,11 +202,11 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i2, l) end do end do end do @@ -241,13 +244,13 @@ contains #endif #:endcall GPU_HOST_DATA - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + $:GPU_PARALLEL_LOOP(collapse=3) do k = 1, sys_size do j = 0, m do l = 0, p data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k & & - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + q_cons_vf(k)%sf(j, i2, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) end do end do end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 8842759cbe..b91dd99e47 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -464,7 +464,6 @@ module m_global_parameters $:GPU_DECLARE(create='[Bx0]') logical :: fft_wrt - logical :: dummy !< AMDFlang workaround for case-optimization + GPU-kernel bug !> @name Continuum damage model parameters !> @{! real(wp) :: tau_star !< Stress threshold for continuum damage modeling @@ -695,7 +694,6 @@ contains #:endfor fft_wrt = .false. - dummy = .false. do j = 1, num_probes_max acoustic(j)%pulse = dflt_int diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 9b9cf64133..5f1fa9d73b 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -302,7 +302,7 @@ contains call s_populate_F_igr_buffers(bc_type, jac_sf) - if (igr_iter_solver == 1 .or. dummy) then ! Jacobi iteration + if (igr_iter_solver == 1) then ! Jacobi iteration $:GPU_PARALLEL_LOOP(private='[j, k, l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index c8eff18847..d604648a76 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -27,15 +27,11 @@ module m_muscl type(int_bounds_info) :: is1_muscl, is2_muscl, is3_muscl $:GPU_DECLARE(create='[is1_muscl, is2_muscl, is3_muscl]') - !> @name The cell-average variables that will be MUSCL-reconstructed. Formerly, they are stored in v_vf. However, they are - !! transferred to v_rs_wsL and v_rs_wsR as to be reshaped (RS) and/or characteristically decomposed. The reshaping allows the - !! muscl procedure to be independent of the coordinate direction of the reconstruction. Lastly, notice that the left (L) and - !! right (R) results of the characteristic decomposition are stored in custom-constructed muscl- stencils (WS) that are annexed - !! to each position of a given scalar field. + !> @name The cell-average variables that will be MUSCL-reconstructed, unpacked into an array for performance !> @{ - real(wp), allocatable, dimension(:,:,:,:) :: v_rs_ws_x_muscl, v_rs_ws_y_muscl, v_rs_ws_z_muscl + real(wp), allocatable, dimension(:,:,:,:) :: v_rs_ws_muscl !> @} - $:GPU_DECLARE(create='[v_rs_ws_x_muscl, v_rs_ws_y_muscl, v_rs_ws_z_muscl]') + $:GPU_DECLARE(create='[v_rs_ws_muscl]') contains @@ -60,8 +56,7 @@ contains is3_muscl%end = p - is3_muscl%beg - @:ALLOCATE(v_rs_ws_x_muscl(is1_muscl%beg:is1_muscl%end, is2_muscl%beg:is2_muscl%end, is3_muscl%beg:is3_muscl%end, & - & 1:sys_size)) + @:ALLOCATE(v_rs_ws_muscl(is1_muscl%beg:is1_muscl%end, is2_muscl%beg:is2_muscl%end, is3_muscl%beg:is3_muscl%end, 1:sys_size)) if (n == 0) return @@ -77,9 +72,6 @@ contains is3_muscl%end = p - is3_muscl%beg - @:ALLOCATE(v_rs_ws_y_muscl(is2_muscl%beg:is2_muscl%end, is1_muscl%beg:is1_muscl%end, is3_muscl%beg:is3_muscl%end, & - & 1:sys_size)) - if (p == 0) return ! initializing in z-direction @@ -87,23 +79,19 @@ contains is1_muscl%beg = -buff_size; is1_muscl%end = m - is1_muscl%beg is3_muscl%beg = -buff_size; is3_muscl%end = p - is3_muscl%beg - @:ALLOCATE(v_rs_ws_z_muscl(is3_muscl%beg:is3_muscl%end, is2_muscl%beg:is2_muscl%end, is1_muscl%beg:is1_muscl%end, & - & 1:sys_size)) - end subroutine s_initialize_muscl_module !> Perform MUSCL reconstruction of left and right cell-boundary values from cell-averaged variables - subroutine s_muscl(v_vf, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, muscl_dir, is1_muscl_d, & + subroutine s_muscl(v_vf, vL_rs_vf_x, vR_rs_vf_x, muscl_dir, is1_muscl_d, & & is2_muscl_d, is3_muscl_d) - type(scalar_field), dimension(1:), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x, vL_rs_vf_y, & - & vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z - integer, intent(in) :: muscl_dir + type(scalar_field), dimension(1:), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x, vR_rs_vf_x + integer, intent(in) :: muscl_dir type(int_bounds_info), intent(in) :: is1_muscl_d, is2_muscl_d, is3_muscl_d - integer :: j, k, l, i - real(wp) :: slopeL, slopeR, slope + integer :: j, k, l, i + real(wp) :: slopeL, slopeR, slope is1_muscl = is1_muscl_d is2_muscl = is2_muscl_d @@ -111,11 +99,7 @@ contains $:GPU_UPDATE(device='[is1_muscl, is2_muscl, is3_muscl]') - if (muscl_order /= 1 .or. dummy) then - call s_initialize_muscl(v_vf, muscl_dir) - end if - - if (muscl_order == 1 .or. dummy) then + if (muscl_order == 1) then if (muscl_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -130,26 +114,26 @@ contains end do $:END_GPU_PARALLEL_LOOP() else if (muscl_dir == 2) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + do j = is1_muscl%beg, is1_muscl%end + do k = is2_muscl%beg, is2_muscl%end + vL_rs_vf_x(k, j, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_x(k, j, l, i) = v_vf(i)%sf(k, j, l) end do end do end do end do $:END_GPU_PARALLEL_LOOP() else if (muscl_dir == 3) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) - do l = is3_muscl%beg, is3_muscl%end + do j = is1_muscl%beg, is1_muscl%end do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + do l = is3_muscl%beg, is3_muscl%end + vL_rs_vf_x(l, k, j, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_x(l, k, j, i) = v_vf(i)%sf(l, k, j) end do end do end do @@ -158,17 +142,39 @@ contains end if end if - if (muscl_order == 2 .or. dummy) then + v_size = ubound(v_vf, 1) + $:GPU_UPDATE(device='[v_size]') + + if (muscl_order /= 1) then + $:GPU_PARALLEL_LOOP(private='[j, k, l, i]', collapse=4) + do i = 1, v_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + v_rs_ws_muscl(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + + if (muscl_order == 2) then ! MUSCL Reconstruction - #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for MUSCL_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1_muscl', 'is2_muscl', 'is3_muscl'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2_muscl', 'is1_muscl', 'is3_muscl'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3_muscl', 'is2_muscl', 'is1_muscl')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (muscl_dir == ${MUSCL_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[i, j, k, l, slopeL, slopeR, slope]') - do l = is3_muscl%beg, is3_muscl%end - do k = is2_muscl%beg, is2_muscl%end - do j = is1_muscl%beg, is1_muscl%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end do i = 1, v_size - slopeL = v_rs_ws_${XYZ}$_muscl(j + 1, k, l, i) - v_rs_ws_${XYZ}$_muscl(j, k, l, i) - slopeR = v_rs_ws_${XYZ}$_muscl(j, k, l, i) - v_rs_ws_${XYZ}$_muscl(j - 1, k, l, i) + slopeL = v_rs_ws_muscl(${SF(' + 1')}$, i) - v_rs_ws_muscl(${SF('')}$, i) + slopeR = v_rs_ws_muscl(${SF('')}$, i) - v_rs_ws_muscl(${SF(' - 1')}$, i) slope = 0._wp if (muscl_lim == 0) then ! unlimited (central difference) @@ -200,10 +206,10 @@ contains end if ! reconstruct from left side - vL_rs_vf_${XYZ}$ (j, k, l, i) = v_rs_ws_${XYZ}$_muscl(j, k, l, i) - (5.e-1_wp*slope) + vL_rs_vf_x(j, k, l, i) = v_rs_ws_muscl(${SF('')}$, i) - (5.e-1_wp*slope) ! reconstruct from the right side - vR_rs_vf_${XYZ}$ (j, k, l, i) = v_rs_ws_${XYZ}$_muscl(j, k, l, i) + (5.e-1_wp*slope) + vR_rs_vf_x(j, k, l, i) = v_rs_ws_muscl(${SF('')}$, i) + (5.e-1_wp*slope) end do end do end do @@ -217,8 +223,7 @@ contains call nvtxStartRange("WENO-INTCOMP") #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then - call s_thinc_compression(v_rs_ws_${XYZ}$_muscl, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, & - & vR_rs_vf_z, muscl_dir, is1_muscl, is2_muscl, is3_muscl) + call s_thinc_compression(v_rs_ws_muscl, vL_rs_vf_x, vR_rs_vf_x, muscl_dir, is1_muscl, is2_muscl, is3_muscl) end if #:endfor call nvtxEndRange() @@ -226,78 +231,10 @@ contains end subroutine s_muscl - !> Reshape cell-averaged variable data into direction-local work arrays for MUSCL reconstruction - subroutine s_initialize_muscl(v_vf, muscl_dir) - - type(scalar_field), dimension(:), intent(in) :: v_vf - integer, intent(in) :: muscl_dir - integer :: j, k, l, q !< Generic loop iterators - ! Determine MUSCL-reconstructed variables and map coordinate directions - - v_size = ubound(v_vf, 1) - $:GPU_UPDATE(device='[v_size]') - - if (muscl_dir == 1) then - $:GPU_PARALLEL_LOOP(private='[j, k, l, q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_x_muscl(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - ! Reshaping/Projecting onto Characteristic Fields in y-direction - if (n == 0) return - - if (muscl_dir == 2) then - $:GPU_PARALLEL_LOOP(private='[j, k, l, q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_y_muscl(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - ! Reshaping/Projecting onto Characteristic Fields in z-direction - if (p == 0) return - if (muscl_dir == 3) then - $:GPU_PARALLEL_LOOP(private='[j, k, l, q]', collapse=4) - do j = 1, v_size - do q = is3_muscl%beg, is3_muscl%end - do l = is2_muscl%beg, is2_muscl%end - do k = is1_muscl%beg - muscl_polyn, is1_muscl%end + muscl_polyn - v_rs_ws_z_muscl(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - end subroutine s_initialize_muscl - !> Finalize the MUSCL module subroutine s_finalize_muscl_module() - @:DEALLOCATE(v_rs_ws_x_muscl) - - if (n == 0) return - - @:DEALLOCATE(v_rs_ws_y_muscl) - - if (p == 0) return - - @:DEALLOCATE(v_rs_ws_z_muscl) + @:DEALLOCATE(v_rs_ws_muscl) end subroutine s_finalize_muscl_module diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 4316c9278c..6975729c3f 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -104,11 +104,14 @@ module m_rhs $:GPU_DECLARE(create='[alf_sum]') real(wp), allocatable, dimension(:,:,:) :: blkmod1, blkmod2, alpha1, alpha2, Kterm - real(wp), allocatable, dimension(:,:,:,:) :: qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf - real(wp), allocatable, dimension(:,:,:,:) :: dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf + real(wp), allocatable, dimension(:,:,:,:) :: qL_rsx_vf, qR_rsx_vf + real(wp), allocatable, dimension(:,:,:,:) :: dqL_rsx_vf, dqR_rsx_vf $:GPU_DECLARE(create='[blkmod1, blkmod2, alpha1, alpha2, Kterm]') - $:GPU_DECLARE(create='[qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf]') - $:GPU_DECLARE(create='[dqL_rsx_vf, dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf]') + $:GPU_DECLARE(create='[qL_rsx_vf, qR_rsx_vf]') + $:GPU_DECLARE(create='[dqL_rsx_vf, dqR_rsx_vf]') + + integer :: iglob + $:GPU_DECLARE(create='[iglob]') contains @@ -253,7 +256,7 @@ contains end do end if - if ((.not. igr) .or. dummy) then + if ((.not. igr)) then @:ALLOCATE(dq_prim_dx_qp(1:1)) @:ALLOCATE(dq_prim_dy_qp(1:1)) @:ALLOCATE(dq_prim_dz_qp(1:1)) @@ -285,30 +288,6 @@ contains @:ALLOCATE(qR_rsx_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, & & 1:sys_size)) - if (n > 0) then - @:ALLOCATE(qL_rsy_vf(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - @:ALLOCATE(qR_rsy_vf(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - else - @:ALLOCATE(qL_rsy_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - @:ALLOCATE(qR_rsy_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - end if - - if (p > 0) then - @:ALLOCATE(qL_rsz_vf(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & 1:sys_size)) - @:ALLOCATE(qR_rsz_vf(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & 1:sys_size)) - else - @:ALLOCATE(qL_rsz_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - @:ALLOCATE(qR_rsz_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, & - & 1:sys_size)) - end if - if (.not. viscous) then do i = 1, num_dims @:ALLOCATE(dqL_prim_dx_n(i)%vf(1:sys_size)) @@ -414,30 +393,6 @@ contains & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) @:ALLOCATE(dqR_rsx_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - - if (n > 0) then - @:ALLOCATE(dqL_rsy_vf(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - @:ALLOCATE(dqR_rsy_vf(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - else - @:ALLOCATE(dqL_rsy_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - @:ALLOCATE(dqR_rsy_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - end if - - if (p > 0) then - @:ALLOCATE(dqL_rsz_vf(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(1)%beg:idwbuff(1)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - @:ALLOCATE(dqR_rsz_vf(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(1)%beg:idwbuff(1)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - else - @:ALLOCATE(dqL_rsz_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - @:ALLOCATE(dqR_rsz_vf(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, & - & idwbuff(3)%beg:idwbuff(3)%end, eqn_idx%mom%beg:eqn_idx%mom%end)) - end if end if else @:ALLOCATE(dq_prim_dx_qp(1)%vf(1:sys_size)) @@ -534,7 +489,7 @@ contains call cpu_time(t_start) - if (.not. igr .or. dummy) then + if (.not. igr) then ! Association/Population of Working Variables $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) do i = 1, sys_size @@ -572,12 +527,12 @@ contains end if end if - if (igr .or. dummy) then + if (igr) then call nvtxStartRange("RHS-COMMUNICATION") call s_populate_variables_buffers(bc_type, q_cons_vf, pb_in, mv_in, q_T_sf) call nvtxEndRange end if - if (.not. igr .or. dummy) then + if (.not. igr) then call nvtxStartRange("RHS-CONVERT") call s_convert_conservative_to_primitive_variables(q_cons_qp%vf, q_T_sf, q_prim_qp%vf, idwint) call nvtxEndRange @@ -600,11 +555,11 @@ contains if (qbmm) call s_mom_inv(q_cons_qp%vf, q_prim_qp%vf, mom_sp, mom_3d, pb_in, rhs_pb, mv_in, rhs_mv, idwbuff(1), & & idwbuff(2), idwbuff(3)) - if ((viscous .and. .not. igr) .or. dummy) then + if ((viscous .and. .not. igr)) then call nvtxStartRange("RHS-VISCOUS") - call s_get_viscous(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, qL_prim, qR_rsx_vf, & - & qR_rsy_vf, qR_rsz_vf, dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n, qR_prim, q_prim_qp, & - & dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp, idwbuff(1), idwbuff(2), idwbuff(3)) + call s_get_viscous(qL_rsx_vf, dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, qL_prim, qR_rsx_vf, dqR_prim_dx_n, & + & dqR_prim_dy_n, dqR_prim_dz_n, qR_prim, q_prim_qp, dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp, & + & idwbuff(1), idwbuff(2), idwbuff(3)) call nvtxEndRange end if @@ -620,8 +575,9 @@ contains call nvtxEndRange end if + ! Loop over coordinate directions for dimensional splitting do id = 1, num_dims - if (igr .or. dummy) then + if (igr) then if (id == 1) then $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) do l = -1, p + 1 @@ -650,59 +606,80 @@ contains call nvtxEndRange end if end if - if ((.not. igr) .or. dummy) then + if (.not. igr) then ! Reconstructing Primitive/Conservative Variables call nvtxStartRange("RHS-RECONSTRUCTION") if (.not. surface_tension) then - if (all(Re_size == 0) .or. int_comp > 0) then + if ((.not. weno_Re_flux) .or. int_comp > 0) then ! Reconstruct densitiess iv%beg = 1; iv%end = sys_size - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(1:sys_size), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(1:sys_size), qL_rsx_vf, qR_rsx_vf, id) else iv%beg = 1; iv%end = eqn_idx%cont%end - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) + + iv%beg = eqn_idx%mom%beg; iv%end = eqn_idx%mom%end; iglob = id + $:GPU_UPDATE(device='[iv, iglob]') + + $:GPU_PARALLEL_LOOP(collapse=4, private='[i, j, k, l]') + do i = iv%beg, iv%end + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + qL_rsx_vf(j, k, l, i) = qL_prim(iglob)%vf(i)%sf(j, k, l) + qR_rsx_vf(j, k, l, i) = qR_prim(iglob)%vf(i)%sf(j, k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() iv%beg = eqn_idx%E; iv%end = sys_size - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) end if else if (int_comp > 0) then ! THINC reads cont and adv from v_rs_ws; must reconstruct full sys_size range to populate both iv%beg = 1; iv%end = sys_size - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(1:sys_size), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(1:sys_size), qL_rsx_vf, qR_rsx_vf, id) ! Surface tension requires first-order energy; overwrite the higher-order result from the full pass above iv%beg = eqn_idx%E; iv%end = eqn_idx%E - call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qL_rsy_vf, & - & qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) - else if (all(Re_size == 0)) then + call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qR_rsx_vf, id) + else if ((.not. weno_Re_flux)) then iv%beg = 1; iv%end = eqn_idx%E - 1 - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) iv%beg = eqn_idx%E; iv%end = eqn_idx%E - call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qL_rsy_vf, & - & qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qR_rsx_vf, id) iv%beg = eqn_idx%E + 1; iv%end = sys_size - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) else iv%beg = 1; iv%end = eqn_idx%cont%end - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) + + iv%beg = eqn_idx%mom%beg; iv%end = eqn_idx%mom%end; iglob = id + $:GPU_UPDATE(device='[iv, iglob]') + + $:GPU_PARALLEL_LOOP(collapse=4, private='[i, j, k, l]') + do i = iv%beg, iv%end + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + qL_rsx_vf(j, k, l, i) = qL_prim(iglob)%vf(i)%sf(j, k, l) + qR_rsx_vf(j, k, l, i) = qR_prim(iglob)%vf(i)%sf(j, k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() iv%beg = eqn_idx%E; iv%end = eqn_idx%E - call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qL_rsy_vf, & - & qL_rsz_vf, qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values_first_order(q_prim_qp%vf(eqn_idx%E), qL_rsx_vf, qR_rsx_vf, id) iv%beg = eqn_idx%E + 1; iv%end = sys_size - call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, id) + call s_reconstruct_cell_boundary_values(q_prim_qp%vf(iv%beg:iv%end), qL_rsx_vf, qR_rsx_vf, id) end if end if @@ -710,18 +687,16 @@ contains if (weno_Re_flux) then iv%beg = eqn_idx%mom%beg; iv%end = eqn_idx%mom%end call s_reconstruct_cell_boundary_values_visc_deriv(dq_prim_dx_qp(1)%vf(iv%beg:iv%end), dqL_rsx_vf, & - & dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, id, dqL_prim_dx_n(id)%vf(iv%beg:iv%end), & - & dqR_prim_dx_n(id)%vf(iv%beg:iv%end), idwbuff(1), idwbuff(2), idwbuff(3)) + & dqR_rsx_vf, id, dqL_prim_dx_n(id)%vf(iv%beg:iv%end), dqR_prim_dx_n(id)%vf(iv%beg:iv%end), idwbuff(1), & + & idwbuff(2), idwbuff(3)) if (n > 0) then call s_reconstruct_cell_boundary_values_visc_deriv(dq_prim_dy_qp(1)%vf(iv%beg:iv%end), dqL_rsx_vf, & - & dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, id, & - & dqL_prim_dy_n(id)%vf(iv%beg:iv%end), dqR_prim_dy_n(id)%vf(iv%beg:iv%end), idwbuff(1), idwbuff(2), & - & idwbuff(3)) + & dqR_rsx_vf, id, dqL_prim_dy_n(id)%vf(iv%beg:iv%end), dqR_prim_dy_n(id)%vf(iv%beg:iv%end), & + & idwbuff(1), idwbuff(2), idwbuff(3)) if (p > 0) then call s_reconstruct_cell_boundary_values_visc_deriv(dq_prim_dz_qp(1)%vf(iv%beg:iv%end), dqL_rsx_vf, & - & dqL_rsy_vf, dqL_rsz_vf, dqR_rsx_vf, dqR_rsy_vf, dqR_rsz_vf, id, & - & dqL_prim_dz_n(id)%vf(iv%beg:iv%end), dqR_prim_dz_n(id)%vf(iv%beg:iv%end), idwbuff(1), & - & idwbuff(2), idwbuff(3)) + & dqR_rsx_vf, id, dqL_prim_dz_n(id)%vf(iv%beg:iv%end), dqR_prim_dz_n(id)%vf(iv%beg:iv%end), & + & idwbuff(1), idwbuff(2), idwbuff(3)) end if end if end if @@ -740,10 +715,10 @@ contains ! Computing Riemann Solver Flux and Source Flux call nvtxStartRange("RHS-RIEMANN-SOLVER") - call s_riemann_solver(qR_rsx_vf, qR_rsy_vf, qR_rsz_vf, dqR_prim_dx_n(id)%vf, dqR_prim_dy_n(id)%vf, & - & dqR_prim_dz_n(id)%vf, qR_prim(id)%vf, qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & - & dqL_prim_dx_n(id)%vf, dqL_prim_dy_n(id)%vf, dqL_prim_dz_n(id)%vf, qL_prim(id)%vf, & - & q_prim_qp%vf, flux_n(id)%vf, flux_src_n(id)%vf, flux_gsrc_n(id)%vf, id, irx, iry, irz) + call s_riemann_solver(qR_rsx_vf, dqR_prim_dx_n(id)%vf, dqR_prim_dy_n(id)%vf, dqR_prim_dz_n(id)%vf, & + & qR_prim(id)%vf, qL_rsx_vf, dqL_prim_dx_n(id)%vf, dqL_prim_dy_n(id)%vf, & + & dqL_prim_dz_n(id)%vf, qL_prim(id)%vf, q_prim_qp%vf, flux_n(id)%vf, flux_src_n(id)%vf, & + & flux_gsrc_n(id)%vf, id, irx, iry, irz) call nvtxEndRange ! Additional physics and source terms RHS addition for advection source @@ -858,7 +833,7 @@ contains ! END: Additional physics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then - if (.not. igr .or. dummy) then + if (.not. igr) then $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -1432,7 +1407,7 @@ contains end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then - if (viscous .or. dummy) then + if (viscous) then if (p > 0) then call s_compute_viscous_stress_cylindrical_boundary(q_prim_vf, & & dq_prim_dx_vf(eqn_idx%mom%beg:eqn_idx%mom%end), dq_prim_dy_vf(eqn_idx%mom%beg:eqn_idx%mom%end), & @@ -1519,7 +1494,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(private='[i, j, l]', collapse=2) do l = 0, p do j = 0, m @@ -1615,17 +1590,17 @@ contains end subroutine s_compute_additional_physics_rhs !> Reconstruct left and right cell-boundary values from cell-averaged variables - subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir) + subroutine s_reconstruct_cell_boundary_values(v_vf, vL_x, vR_x, norm_dir) type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x, vR_y, vR_z + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x integer, intent(in) :: norm_dir integer :: recon_dir !< Coordinate direction of the reconstruction integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] - if (recon_type == ${TYPE}$ .or. dummy) then + if (recon_type == ${TYPE}$) then ! Reconstruction in s1-direction if (norm_dir == 1) then is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) @@ -1641,38 +1616,26 @@ contains is1%end = is1%end - ${SCHEME}$_polyn end if - if (n > 0) then - if (p > 0) then - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:, & - & iv%beg:iv%end), recon_dir, is1, is2, is3) - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & :), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:,:), recon_dir, & - & is1, is2, is3) - end if - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,:), vL_z(:,:,:,:), vR_x(:,:,:, & - & iv%beg:iv%end), vR_y(:,:,:,:), vR_z(:,:,:,:), recon_dir, is1, is2, is3) - end if + call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), recon_dir, is1, & + & is2, is3) end if #:endfor end subroutine s_reconstruct_cell_boundary_values !> Perform first-order (piecewise constant) reconstruction of left and right cell-boundary values - subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir) + subroutine s_reconstruct_cell_boundary_values_first_order(v_vf, vL_x, vR_x, norm_dir) type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x, vR_y, vR_z + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_x integer, intent(in) :: norm_dir integer :: recon_dir !< Coordinate direction of the reconstruction integer :: i, j, k, l ! Reconstruction in s1-direction #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl', 'MUSCL_TYPE')] - if (recon_type == ${TYPE}$ .or. dummy) then + if (recon_type == ${TYPE}$) then if (norm_dir == 1) then is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) recon_dir = 1; is1%beg = is1%beg + ${SCHEME}$_polyn @@ -1692,7 +1655,7 @@ contains #:endfor if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end do k = is2%beg, is2%end @@ -1705,26 +1668,26 @@ contains end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + do j = is1%beg, is1%end + do k = is2%beg, is2%end + vL_x(k, j, l, i) = v_vf(i)%sf(k, j, l) + vR_x(k, j, l, i) = v_vf(i)%sf(k, j, l) end do end do end do end do $:END_GPU_PARALLEL_LOOP() else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(private='[i, j, k, l]', collapse=4) + $:GPU_PARALLEL_LOOP(collapse=4) do i = iv%beg, iv%end - do l = is3%beg, is3%end + do j = is1%beg, is1%end do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + do l = is3%beg, is3%end + vL_x(l, k, j, i) = v_vf(i)%sf(l, k, j) + vR_x(l, k, j, i) = v_vf(i)%sf(l, k, j) end do end do end do @@ -1767,14 +1730,6 @@ contains if (.not. igr) then @:DEALLOCATE(qL_rsx_vf, qR_rsx_vf) - if (n > 0) then - @:DEALLOCATE(qL_rsy_vf, qR_rsy_vf) - end if - - if (p > 0) then - @:DEALLOCATE(qL_rsz_vf, qR_rsz_vf) - end if - if (viscous) then do l = eqn_idx%mom%beg, eqn_idx%mom%end @:DEALLOCATE(dq_prim_dx_qp(1)%vf(l)%sf) @@ -1826,14 +1781,6 @@ contains if (weno_Re_flux) then @:DEALLOCATE(dqL_rsx_vf, dqR_rsx_vf) - - if (n > 0) then - @:DEALLOCATE(dqL_rsy_vf, dqR_rsy_vf) - end if - - if (p > 0) then - @:DEALLOCATE(dqL_rsz_vf, dqR_rsz_vf) - end if end if do i = 1, num_dims diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 78a00759ad..7672094f55 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -36,36 +36,26 @@ module m_riemann_solvers !! dy or dz. !> @{ real(wp), allocatable, dimension(:,:,:,:) :: flux_rsx_vf, flux_src_rsx_vf - real(wp), allocatable, dimension(:,:,:,:) :: flux_rsy_vf, flux_src_rsy_vf - real(wp), allocatable, dimension(:,:,:,:) :: flux_rsz_vf, flux_src_rsz_vf - $:GPU_DECLARE(create='[flux_rsx_vf, flux_src_rsx_vf, flux_rsy_vf, flux_src_rsy_vf, flux_rsz_vf, flux_src_rsz_vf]') + $:GPU_DECLARE(create='[flux_rsx_vf, flux_src_rsx_vf]') !> @} !> The cell-boundary values of the geometrical source flux that are computed through the chosen Riemann problem solver by using !! the left and right states given in qK_prim_rs_vf. Currently 2D axisymmetric for inviscid only. !> @{ real(wp), allocatable, dimension(:,:,:,:) :: flux_gsrc_rsx_vf - real(wp), allocatable, dimension(:,:,:,:) :: flux_gsrc_rsy_vf - real(wp), allocatable, dimension(:,:,:,:) :: flux_gsrc_rsz_vf - $:GPU_DECLARE(create='[flux_gsrc_rsx_vf, flux_gsrc_rsy_vf, flux_gsrc_rsz_vf]') + $:GPU_DECLARE(create='[flux_gsrc_rsx_vf]') !> @} ! Cell-boundary velocity from Riemann solution; used for source flux real(wp), allocatable, dimension(:,:,:,:) :: vel_src_rsx_vf - real(wp), allocatable, dimension(:,:,:,:) :: vel_src_rsy_vf - real(wp), allocatable, dimension(:,:,:,:) :: vel_src_rsz_vf - $:GPU_DECLARE(create='[vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf]') + $:GPU_DECLARE(create='[vel_src_rsx_vf]') real(wp), allocatable, dimension(:,:,:,:) :: mom_sp_rsx_vf - real(wp), allocatable, dimension(:,:,:,:) :: mom_sp_rsy_vf - real(wp), allocatable, dimension(:,:,:,:) :: mom_sp_rsz_vf - $:GPU_DECLARE(create='[mom_sp_rsx_vf, mom_sp_rsy_vf, mom_sp_rsz_vf]') + $:GPU_DECLARE(create='[mom_sp_rsx_vf]') real(wp), allocatable, dimension(:,:,:,:) :: Re_avg_rsx_vf - real(wp), allocatable, dimension(:,:,:,:) :: Re_avg_rsy_vf - real(wp), allocatable, dimension(:,:,:,:) :: Re_avg_rsz_vf - $:GPU_DECLARE(create='[Re_avg_rsx_vf, Re_avg_rsy_vf, Re_avg_rsz_vf]') + $:GPU_DECLARE(create='[Re_avg_rsx_vf]') !> @name Indical bounds in the s1-, s2- and s3-directions !> @{ @@ -85,14 +75,13 @@ contains !> Dispatch to the subroutines that are utilized to compute the Riemann problem solution. For additional information please !! reference: 1) s_hll_riemann_solver 2) s_hllc_riemann_solver 3) s_lf_riemann_solver 4) s_hlld_riemann_solver - subroutine s_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & + subroutine s_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & - & qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, & - & q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + & qL_prim_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, & + & flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: qL_prim_vf, qR_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -103,10 +92,9 @@ contains #:for NAME, NUM in [('hll', 1), ('hllc', 2), ('hlld', 4), ('lf', 5)] if (riemann_solver == ${NUM}$) then - call s_${NAME}$_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, & - & dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, & - & flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + call s_${NAME}$_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, qL_prim_vf, & + & qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, & + & q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) end if #:endfor @@ -137,14 +125,13 @@ contains end subroutine s_compute_viscous_source_flux !> HLL approximate Riemann solver, Harten et al. SIAM Review (1983) - subroutine s_hll_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & + subroutine s_hll_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, & + & flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: qL_prim_vf, qR_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -212,13 +199,17 @@ contains integer :: i, j, k, l, q !< Generic loop iterators ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions - call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, norm_dir, ix, iy, iz) + call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & + & qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz) ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver(flux_src_vf, norm_dir) - #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3', 'is2', 'is1')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & & alpha_R, tau_e_L, tau_e_R, Re_L, Re_R, s_L, s_R, s_S, Ys_L, Ys_R, xi_field_L, xi_field_R, & @@ -229,49 +220,49 @@ contains & gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, qv_avg, c_L, c_R, G_L, G_R, rho_avg, H_avg, c_avg, & & gamma_avg, ptilde_L, ptilde_R, vel_L_rms, vel_R_rms, vel_avg_rms, Ms_L, Ms_R, pres_SL, & & pres_SR, alpha_L_sum, alpha_R_sum, flux_tau_L, flux_tau_R]', copyin='[norm_dir]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) if (mhd) then if (n == 0) then ! 1D: constant Bx; By, Bz as variables B%L(1) = Bx0 B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 1) + B%L(2) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg) + B%R(2) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg) + B%L(3) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 1) + B%R(3) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 1) else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 2) + B%L(1) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg) + B%R(1) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg) + B%L(2) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 1) + B%R(2) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 1) + B%L(3) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 2) + B%R(3) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 2) end if end if @@ -342,8 +333,8 @@ contains if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rsx_vf(${SF('')}$, i) + Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do call get_mixture_molecular_weight(Ys_L, MW_L) @@ -446,14 +437,14 @@ contains end do if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%damage)), 0._wp) + G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) end if $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) ! Elastic contribution to energy if G large enough TODO take out if statement if stable without if ((G_L > 1000) .and. (G_R > 1000)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) @@ -492,7 +483,7 @@ contains end if $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + Re_avg_rsx_vf(${SF('')}$, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do end if @@ -568,17 +559,17 @@ contains if (.not. relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*alpha_rho_R(i)*vel_R(norm_dir) - s_P*alpha_rho_L(i) & - & *vel_L(norm_dir) + s_M*s_P*(alpha_rho_L(i) - alpha_rho_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*alpha_rho_R(i)*vel_R(norm_dir) - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + & + s_M*s_P*(alpha_rho_L(i) - alpha_rho_R(i)))/(s_M - s_P) end do else if (relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) - s_P*Ga%L*alpha_rho_L(i) & - & *vel_L(norm_dir) + s_M*s_P*(Ga%L*alpha_rho_L(i) - Ga%R*alpha_rho_R(i))) & - & /(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) - s_P*Ga%L*alpha_rho_L(i) & + & *vel_L(norm_dir) + s_M*s_P*(Ga%L*alpha_rho_L(i) - Ga%R*alpha_rho_R(i)))/(s_M & + & - s_P) end do end if @@ -588,54 +579,52 @@ contains do i = 1, 3 ! Flux of rho*v_i in the ${XYZ}$ direction = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + ! delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + i) = (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) - B%R(i) & - & *B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(rho_L*vel_L(i) & - & *vel_L(norm_dir) - B%L(i)*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L) & - & ) + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + i) = (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) - B%R(i) & + & *B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(rho_L*vel_L(i) & + & *vel_L(norm_dir) - B%L(i)*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L)) & + & + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i)))/(s_M - s_P) end do else if (mhd .and. relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 ! Flux of m_i in the ${XYZ}$ direction = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + ! delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + i) = (s_M*(cm%R(i)*vel_R(norm_dir) - b4%R(i) & - & /Ga%R*B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(cm%L(i) & - & *vel_L(norm_dir) - b4%L(i)/Ga%L*B%L(norm_dir) + dir_flg(i)*(pres_L & - & + pres_mag%L)) + s_M*s_P*(cm%L(i) - cm%R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + i) = (s_M*(cm%R(i)*vel_R(norm_dir) - b4%R(i) & + & /Ga%R*B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(cm%L(i) & + & *vel_L(norm_dir) - b4%L(i)/Ga%L*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L) & + & ) + s_M*s_P*(cm%L(i) - cm%R(i)))/(s_M - s_P) end do else if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *(pres_L - ptilde_L)) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & - & *pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) - s_P*(rho_L*vel_L(dir_idx(1)) & + & *vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + & + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) & + & + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do else if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *pres_L - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & + & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*pres_L & + & - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) end do else $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*pres_R) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *pres_L) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i)))) & - & /(s_M - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) & - & - vel_L(dir_idx(i))) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_R) - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_L) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & + & *pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do end if @@ -643,24 +632,23 @@ contains if (mhd .and. (.not. relativity)) then ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) & - & - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - & - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir) & - & *(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) + s_M*s_P*(E_L & - & - E_R))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir) & + & *(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) - s_P*(vel_L(norm_dir) & + & *(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) & + & + vel_L(3)*B%L(3))) + s_M*s_P*(E_L - E_R))/(s_M - s_P) #:endif else if (mhd .and. relativity) then ! energy flux = m_${XYZ}$ - mass flux Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - & - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) + s_M*s_P*(E_L & - & - E_R))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + & - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) + s_M*s_P*(E_L - E_R)) & + & /(s_M - s_P) else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - & - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M & - & - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) - s_P*vel_L(dir_idx(1) & + & )*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & + & *pcorr*(vel_R_rms - vel_L_rms)/2._wp else if (hypoelasticity) then flux_tau_L = 0._wp; flux_tau_R = 0._wp $:GPU_LOOP(parallelism='[seq]') @@ -668,53 +656,53 @@ contains flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) end do - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R)) & - & /(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R))/(s_M & + & - s_P) else - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1)) & - & *(E_L + pres_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & - & *pcorr*(vel_R_rms - vel_L_rms)/2._wp + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1))*(E_L & + & + pres_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms & + & - vel_L_rms)/2._wp end if ! Elastic Stresses if (hypoelasticity) then do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 ! TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & - & - rho_R*tau_e_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & + & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & + & - rho_R*tau_e_R(i)))/(s_M - s_P) end do end if ! Advection flux and source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = (qL_prim_rs${XYZ}$_vf(j, k, l, i) - qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i))*s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i) - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, i) = (qL_prim_rsx_vf(${SF('')}$, i) - qR_prim_rsx_vf(${SF(' + 1')}$, & + & i))*s_M*s_P/(s_M - s_P) + flux_src_rsx_vf(${SF('')}$, i) = (s_M*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i) - s_P*qL_prim_rsx_vf(${SF('')}$, i))/(s_M - s_P) end do if (bubbles_euler) then ! From HLLC: Kills mass transport @ bubble gas density if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end) = 0._wp end if end if if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Y_L = qL_prim_rsx_vf(${SF('')}$, i) + Y_R = qR_prim_rsx_vf(${SF(' + 1')}$, i) - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - & + s_M*s_P*(Y_L*rho_L - Y_R*rho_R))/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + & + s_M*s_P*(Y_L*rho_L - Y_R*rho_R))/(s_M - s_P) + flux_src_rsx_vf(${SF('')}$, i) = 0._wp end do end if @@ -735,30 +723,28 @@ contains ! flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) $:GPU_LOOP(parallelism='[seq]') do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%B%beg + i) = (s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1) & - & *B%R(norm_dir)) - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1) & - & *B%L(norm_dir)) + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + i) = (s_M*(vel_R(dir_idx(1))*B%R(i + 1) - vel_R(i + 1) & + & *B%R(norm_dir)) - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i + 1) & + & *B%L(norm_dir)) + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do if (hyper_cleaning) then ! propagate magnetic field divergence as a wave - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + norm_dir - 1) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%B%beg + norm_dir - 1) + (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, & - & l, eqn_idx%psi) - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%psi))/(s_M - s_P) - - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%psi) = (hyper_cleaning_speed**2*(s_M*B%R(norm_dir) & - & - s_P*B%L(norm_dir)) + s_M*s_P*(qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%psi) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & eqn_idx%psi)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + norm_dir - 1) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + norm_dir - 1) + (s_M*qR_prim_rsx_vf(${SF(' + 1')}$, & + & eqn_idx%psi) - s_P*qL_prim_rsx_vf(${SF('')}$, eqn_idx%psi))/(s_M - s_P) + + flux_rsx_vf(${SF('')}$, & + & eqn_idx%psi) = (hyper_cleaning_speed**2*(s_M*B%R(norm_dir) & + & - s_P*B%L(norm_dir)) + s_M*s_P*(qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%psi) - qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%psi)))/(s_M - s_P) else ! Without hyperbolic cleaning, make sure flux of B_normal is identically zero - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + norm_dir - 1) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + norm_dir - 1) = 0._wp end if end if - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = 0._wp + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = 0._wp end if #:if (NORM_DIR == 2) @@ -766,26 +752,26 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + 2) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + 2) - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + 2) - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if if (cyl_coord .and. hypoelasticity) then ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + 2) = flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%stress%beg, eqn_idx%stress%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if #:endif @@ -796,7 +782,7 @@ contains end if #:endfor - if (viscous .or. dummy) then + if (viscous) then if (weno_Re_flux) then call s_compute_viscous_source_flux(qL_prim_vf(eqn_idx%mom%beg:eqn_idx%mom%end), & & dqL_prim_dx_vf(eqn_idx%mom%beg:eqn_idx%mom%end), & @@ -825,14 +811,13 @@ contains end subroutine s_hll_riemann_solver !> Lax-Friedrichs (Rusanov) approximate Riemann solver - subroutine s_lf_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & + subroutine s_lf_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, & + & flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: qL_prim_vf, qR_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -903,13 +888,17 @@ contains integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions - call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, norm_dir, ix, iy, iz) + call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & + & qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz) ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver(flux_src_vf, norm_dir) - #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3', 'is2', 'is1')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, q, alpha_rho_L, alpha_rho_R, vel_L, vel_R, alpha_L, & & alpha_R, tau_e_L, tau_e_R, G_L, G_R, Re_L, Re_R, rho_avg, h_avg, gamma_avg, s_L, s_R, s_S, & @@ -920,49 +909,49 @@ contains & rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, qv_L, qv_R, c_L, c_R, E_L, E_R, H_L, & & H_R, ptilde_L, ptilde_R, s_M, s_P, xi_M, xi_P, Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi, & & Cp_L, Cp_R, Cv_L, Cv_R, R_gas_L, R_gas_R, MW_L, MW_R, T_L, T_R, Y_L, Y_R]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) if (mhd) then if (n == 0) then ! 1D: constant Bx; By, Bz as variables B%L(1) = Bx0 B%R(1) = Bx0 - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 1) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 1) + B%L(2) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg) + B%R(2) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg) + B%L(3) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 1) + B%R(3) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 1) else ! 2D/3D: Bx, By, Bz as variables - B%L(1) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg) - B%R(1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg) - B%L(2) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 1) - B%R(2) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 1) - B%L(3) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 2) - B%R(3) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + 2) + B%L(1) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg) + B%R(1) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg) + B%L(2) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 1) + B%R(2) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 1) + B%L(3) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + 2) + B%R(3) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + 2) end if end if @@ -1033,8 +1022,8 @@ contains if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rsx_vf(${SF('')}$, i) + Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do call get_mixture_molecular_weight(Ys_L, MW_L) @@ -1135,13 +1124,13 @@ contains end do if (cont_damage) then - G_L = G_L*max((1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%damage)), 0._wp) - G_R = G_R*max((1._wp - qR_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%damage)), 0._wp) + G_L = G_L*max((1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) + G_R = G_R*max((1._wp - qR_prim_rsx_vf(${SF('')}$, eqn_idx%damage)), 0._wp) end if do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) ! Elastic contribution to energy if G large enough TODO take out if statement if stable without if ((G_L > 1000) .and. (G_R > 1000)) then E_L = E_L + (tau_e_L(i)*tau_e_L(i))/(4._wp*G_L) @@ -1194,17 +1183,17 @@ contains if (.not. relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*alpha_rho_R(i)*vel_R(norm_dir) - s_P*alpha_rho_L(i) & - & *vel_L(norm_dir) + s_M*s_P*(alpha_rho_L(i) - alpha_rho_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*alpha_rho_R(i)*vel_R(norm_dir) - s_P*alpha_rho_L(i)*vel_L(norm_dir) & + & + s_M*s_P*(alpha_rho_L(i) - alpha_rho_R(i)))/(s_M - s_P) end do else if (relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) - s_P*Ga%L*alpha_rho_L(i) & - & *vel_L(norm_dir) + s_M*s_P*(Ga%L*alpha_rho_L(i) - Ga%R*alpha_rho_R(i))) & - & /(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*Ga%R*alpha_rho_R(i)*vel_R(norm_dir) - s_P*Ga%L*alpha_rho_L(i) & + & *vel_L(norm_dir) + s_M*s_P*(Ga%L*alpha_rho_L(i) - Ga%R*alpha_rho_R(i)))/(s_M & + & - s_P) end do end if @@ -1214,54 +1203,52 @@ contains do i = 1, 3 ! Flux of rho*v_i in the ${XYZ}$ direction = rho * v_i * v_${XYZ}$ - B_i * B_${XYZ}$ + ! delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + i) = (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) - B%R(i) & - & *B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(rho_L*vel_L(i) & - & *vel_L(norm_dir) - B%L(i)*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L) & - & ) + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + i) = (s_M*(rho_R*vel_R(i)*vel_R(norm_dir) - B%R(i) & + & *B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(rho_L*vel_L(i) & + & *vel_L(norm_dir) - B%L(i)*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L)) & + & + s_M*s_P*(rho_L*vel_L(i) - rho_R*vel_R(i)))/(s_M - s_P) end do else if (mhd .and. relativity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, 3 ! Flux of m_i in the ${XYZ}$ direction = m_i * v_${XYZ}$ - b_i/Gamma * B_${XYZ}$ + ! delta_(${XYZ}$,i) * p_tot - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + i) = (s_M*(cm%R(i)*vel_R(norm_dir) - b4%R(i) & - & /Ga%R*B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(cm%L(i) & - & *vel_L(norm_dir) - b4%L(i)/Ga%L*B%L(norm_dir) + dir_flg(i)*(pres_L & - & + pres_mag%L)) + s_M*s_P*(cm%L(i) - cm%R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + i) = (s_M*(cm%R(i)*vel_R(norm_dir) - b4%R(i) & + & /Ga%R*B%R(norm_dir) + dir_flg(i)*(pres_R + pres_mag%R)) - s_P*(cm%L(i) & + & *vel_L(norm_dir) - b4%L(i)/Ga%L*B%L(norm_dir) + dir_flg(i)*(pres_L + pres_mag%L) & + & ) + s_M*s_P*(cm%L(i) - cm%R(i)))/(s_M - s_P) end do else if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *(pres_L - ptilde_L)) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & - & *pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*(pres_R - ptilde_R)) - s_P*(rho_L*vel_L(dir_idx(1)) & + & *vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*(pres_L - ptilde_L)) & + & + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) & + & + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do else if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *pres_L - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & - & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_R - tau_e_R(dir_idx_tau(i))) & + & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*pres_L & + & - tau_e_L(dir_idx_tau(i))) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) end do else $:GPU_LOOP(parallelism='[seq]') do i = 1, num_vels - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*pres_R) & - & - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *pres_L) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) - rho_R*vel_R(dir_idx(i)))) & - & /(s_M - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R(dir_idx(i)) & - & - vel_L(dir_idx(i))) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = (s_M*(rho_R*vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_R) - s_P*(rho_L*vel_L(dir_idx(1))*vel_L(dir_idx(i)) & + & + dir_flg(dir_idx(i))*pres_L) + s_M*s_P*(rho_L*vel_L(dir_idx(i)) & + & - rho_R*vel_R(dir_idx(i))))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & + & *pcorr*(vel_R(dir_idx(i)) - vel_L(dir_idx(i))) end do end if @@ -1269,24 +1256,23 @@ contains if (mhd .and. (.not. relativity)) then ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) & - & - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - & - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir) & - & *(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) + s_M*s_P*(E_L & - & - E_R))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir) & + & *(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) - s_P*(vel_L(norm_dir) & + & *(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) & + & + vel_L(3)*B%L(3))) + s_M*s_P*(E_L - E_R))/(s_M - s_P) #:endif else if (mhd .and. relativity) then ! energy flux = m_${XYZ}$ - mass flux Hard-coded for single-component for now - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & - & - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) + s_M*s_P*(E_L & - & - E_R))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(cm%R(norm_dir) - Ga%R*alpha_rho_R(1)*vel_R(norm_dir)) & + & - s_P*(cm%L(norm_dir) - Ga%L*alpha_rho_L(1)*vel_L(norm_dir)) + s_M*s_P*(E_L - E_R)) & + & /(s_M - s_P) else if (bubbles_euler) then - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) & - & - s_P*vel_L(dir_idx(1))*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M & - & - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms - vel_L_rms)/2._wp + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R - ptilde_R) - s_P*vel_L(dir_idx(1) & + & )*(E_L + pres_L - ptilde_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & + & *pcorr*(vel_R_rms - vel_L_rms)/2._wp else if (hypoelasticity) then flux_tau_L = 0._wp; flux_tau_R = 0._wp $:GPU_LOOP(parallelism='[seq]') @@ -1294,53 +1280,53 @@ contains flux_tau_L = flux_tau_L + tau_e_L(dir_idx_tau(i))*vel_L(dir_idx(i)) flux_tau_R = flux_tau_R + tau_e_R(dir_idx_tau(i))*vel_R(dir_idx(i)) end do - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & - & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R)) & - & /(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*(vel_R(dir_idx(1))*(E_R + pres_R) - flux_tau_R) & + & - s_P*(vel_L(dir_idx(1))*(E_L + pres_L) - flux_tau_L) + s_M*s_P*(E_L - E_R))/(s_M & + & - s_P) else - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1)) & - & *(E_L + pres_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R) & - & *pcorr*(vel_R_rms - vel_L_rms)/2._wp + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = (s_M*vel_R(dir_idx(1))*(E_R + pres_R) - s_P*vel_L(dir_idx(1))*(E_L & + & + pres_L) + s_M*s_P*(E_L - E_R))/(s_M - s_P) + (s_M/s_L)*(s_P/s_R)*pcorr*(vel_R_rms & + & - vel_L_rms)/2._wp end if ! Elastic Stresses if (hypoelasticity) then do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 ! TODO: this indexing may be slow - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & - & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & - & - rho_R*tau_e_R(i)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%stress%beg - 1 + i) = (s_M*(rho_R*vel_R(dir_idx(1))*tau_e_R(i)) & + & - s_P*(rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + s_M*s_P*(rho_L*tau_e_L(i) & + & - rho_R*tau_e_R(i)))/(s_M - s_P) end do end if ! Advection flux and source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = (qL_prim_rs${XYZ}$_vf(j, k, l, i) - qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i))*s_M*s_P/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = (s_M*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i) - s_P*qL_prim_rs${XYZ}$_vf(j, k, l, i))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, i) = (qL_prim_rsx_vf(${SF('')}$, i) - qR_prim_rsx_vf(${SF(' + 1')}$, & + & i))*s_M*s_P/(s_M - s_P) + flux_src_rsx_vf(${SF('')}$, i) = (s_M*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i) - s_P*qL_prim_rsx_vf(${SF('')}$, i))/(s_M - s_P) end do if (bubbles_euler) then ! From HLLC: Kills mass transport @ bubble gas density if (num_fluids > 1) then - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end) = 0._wp end if end if if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Y_L = qL_prim_rsx_vf(${SF('')}$, i) + Y_R = qR_prim_rsx_vf(${SF(' + 1')}$, i) - flux_rs${XYZ}$_vf(j, k, l, & - & i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & - & + s_M*s_P*(Y_L*rho_L - Y_R*rho_R))/(s_M - s_P) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_rsx_vf(${SF('')}$, & + & i) = (s_M*Y_R*rho_R*vel_R(dir_idx(1)) - s_P*Y_L*rho_L*vel_L(dir_idx(1)) & + & + s_M*s_P*(Y_L*rho_L - Y_R*rho_R))/(s_M - s_P) + flux_src_rsx_vf(${SF('')}$, i) = 0._wp end do end if @@ -1361,14 +1347,13 @@ contains ! flux = (1 - delta(z,${XYZ}$)) * (v_${XYZ}$ * B_z - v_z * B_${XYZ}$) $:GPU_LOOP(parallelism='[seq]') do i = 0, 2 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%B%beg + i) = (1 - dir_flg(i + 1))*(s_M*(vel_R(dir_idx(1)) & - & *B%R(i + 1) - vel_R(i + 1)*B%R(norm_dir)) - s_P*(vel_L(dir_idx(1)) & - & *B%L(i + 1) - vel_L(i + 1)*B%L(norm_dir)) + s_M*s_P*(B%L(i + 1) & - & - B%R(i + 1)))/(s_M - s_P) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + i) = (1 - dir_flg(i + 1))*(s_M*(vel_R(dir_idx(1))*B%R(i + 1) & + & - vel_R(i + 1)*B%R(norm_dir)) - s_P*(vel_L(dir_idx(1))*B%L(i + 1) - vel_L(i & + & + 1)*B%L(norm_dir)) + s_M*s_P*(B%L(i + 1) - B%R(i + 1)))/(s_M - s_P) end do end if - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = 0._wp + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = 0._wp end if #:if (NORM_DIR == 2) @@ -1376,26 +1361,26 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + 2) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + 2) - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + 2) - (s_M*pres_R - s_P*pres_L)/(s_M - s_P) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if if (cyl_coord .and. hypoelasticity) then ! += tau_sigmasigma using HLL - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + 2) = flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%cont%end + 2) = flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + 2) + (s_M*tau_e_R(4) - s_P*tau_e_L(4))/(s_M - s_P) $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%stress%beg, eqn_idx%stress%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do end if #:endif @@ -1406,7 +1391,7 @@ contains end if #:endfor - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i, j, k, l, idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, & & vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]') do l = isz%beg, isz%end @@ -1432,25 +1417,25 @@ contains else if (norm_dir == 2) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rsy_vf(k, j, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rsy_vf(k + 1, j, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(j, k, l, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(j, k + 1, l, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rsy_vf(k, j, l, eqn_idx%mom%beg + i - 1) - vel_R(i) = qR_prim_rsy_vf(k + 1, j, l, eqn_idx%mom%beg + i - 1) + vel_L(i) = qL_prim_rsx_vf(j, k, l, eqn_idx%mom%beg + i - 1) + vel_R(i) = qR_prim_rsx_vf(j, k + 1, l, eqn_idx%mom%beg + i - 1) end do else $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rsz_vf(l, k, j, eqn_idx%E + i) - alpha_R(i) = qR_prim_rsz_vf(l + 1, k, j, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(j, k, l, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(j, k, l + 1, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rsz_vf(l, k, j, eqn_idx%mom%beg + i - 1) - vel_R(i) = qR_prim_rsz_vf(l + 1, k, j, eqn_idx%mom%beg + i - 1) + vel_L(i) = qL_prim_rsx_vf(j, k, l, eqn_idx%mom%beg + i - 1) + vel_R(i) = qR_prim_rsx_vf(j, k, l + 1, eqn_idx%mom%beg + i - 1) end do end if @@ -1698,14 +1683,13 @@ contains end subroutine s_lf_riemann_solver !> HLLC Riemann solver with contact restoration, Toro et al. Shock Waves (1994) - subroutine s_hllc_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & + subroutine s_hllc_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, & + & flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf - type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: qL_prim_vf, qR_prim_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -1792,15 +1776,19 @@ contains integer :: Re_max, i, j, k, l, q !< Generic loop iterators ! Populating the buffers of the left and right Riemann problem states variables, based on the choice of boundary conditions - call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, norm_dir, ix, iy, iz) + call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & + & qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz) ! Reshaping inputted data based on dimensional splitting direction call s_initialize_riemann_solver(flux_src_vf, norm_dir) - #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3', 'is2', 'is1')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then ! 6-EQUATION MODEL WITH HLLC HLLC star-state flux with contact wave speed s_S if (model_eqns == 3) then @@ -1815,9 +1803,9 @@ contains & vel_avg_rms, vel_L_tmp, vel_R_tmp, Ms_L, Ms_R, pres_SL, pres_SR, alpha_L_sum, & & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, & & xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end vel_L_rms = 0._wp; vel_R_rms = 0._wp rho_L = 0._wp; rho_R = 0._wp gamma_L = 0._wp; gamma_R = 0._wp @@ -1827,14 +1815,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) rho_L = 0._wp gamma_L = 0._wp @@ -1852,43 +1840,43 @@ contains if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, & - & l, eqn_idx%E + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) + qL_prim_rsx_vf(${SF('')}$, i) = max(0._wp, qL_prim_rsx_vf(${SF('')}$, i)) + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) = min(max(0._wp, qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%E + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) = min(max(0._wp, & - & qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)), 1._wp) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + qR_prim_rsx_vf(${SF(' + 1')}$, i) = max(0._wp, qR_prim_rsx_vf(${SF(' + 1')}$, i)) + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) = min(max(0._wp, & + & qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)), 1._wp) + alpha_R_sum = alpha_R_sum + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) = qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & eqn_idx%E + i)/max(alpha_R_sum, sgm_eps) + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) = qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%E + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) = qR_prim_rsx_vf(${SF(' + 1')}$, & + & eqn_idx%E + i)/max(alpha_R_sum, sgm_eps) end do end if $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) - - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg + i - 1) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%adv%beg + i - 1) + rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) + gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) + + rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) + gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) + + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%adv%beg + i - 1) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%adv%beg + i - 1) end do if (viscous) then @@ -1900,8 +1888,8 @@ contains if (Re_size(i) > 0) Re_R(i) = 0._wp $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i) - Re_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + Re_idx(i, q))/Res_gs(i, & + Re_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, q) + Re_L(i) + Re_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, q))/Res_gs(i, & & q) + Re_R(i) end do Re_L(i) = 1._wp/max(Re_L(i), sgm_eps) @@ -1916,8 +1904,8 @@ contains if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do G_L = 0._wp; G_R = 0._wp $:GPU_LOOP(parallelism='[seq]') @@ -1944,8 +1932,8 @@ contains if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%xi%beg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%xi%beg - 1 + i) + xi_field_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%xi%beg - 1 + i) + xi_field_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%xi%beg - 1 + i) end do G_L = 0._wp; G_R = 0._wp $:GPU_LOOP(parallelism='[seq]') @@ -1956,13 +1944,13 @@ contains end do ! Elastic contribution to energy if G large enough if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%xi%end + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%xi%end + 1) + E_L = E_L + G_L*qL_prim_rsx_vf(${SF('')}$, eqn_idx%xi%end + 1) + E_R = E_R + G_R*qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%xi%end + 1) end if $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do end if @@ -1985,7 +1973,7 @@ contains if (viscous) then $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + Re_avg_rsx_vf(${SF('')}$, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do end if @@ -2074,23 +2062,23 @@ contains ! COMPUTING FLUXES MASS FLUX. $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! MOMENTUM FLUX. f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = rho_Star*vel_K_Star*(dir_flg(dir_idx(i)) & - & *vel_K_Star + (1._wp - dir_flg(dir_idx(i)))*(xi_M*vel_L(dir_idx(i)) & - & + xi_P*vel_R(dir_idx(i)))) + dir_flg(dir_idx(i))*p_Star + (s_M/s_L) & - & *(s_P/s_R)*dir_flg(dir_idx(i))*pcorr + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = rho_Star*vel_K_Star*(dir_flg(dir_idx(i)) & + & *vel_K_Star + (1._wp - dir_flg(dir_idx(i)))*(xi_M*vel_L(dir_idx(i)) & + & + xi_P*vel_R(dir_idx(i)))) + dir_flg(dir_idx(i))*p_Star + (s_M/s_L)*(s_P/s_R) & + & *dir_flg(dir_idx(i))*pcorr end do ! ENERGY FLUX. f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) = (E_star + p_Star)*vel_K_Star + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + flux_rsx_vf(${SF('')}$, eqn_idx%E) = (E_star + p_Star)*vel_K_Star + (s_M/s_L)*(s_P/s_R)*pcorr*s_S ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then @@ -2098,9 +2086,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(i)) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) - xi_M*tau_e_L(dir_idx_tau(i)) & - & - xi_P*tau_e_R(dir_idx_tau(i)) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(i)) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) - xi_M*tau_e_L(dir_idx_tau(i)) & + & - xi_P*tau_e_R(dir_idx_tau(i)) ! ENERGY ELASTIC FLUX. flux_ene_e = flux_ene_e - xi_M*(vel_L(dir_idx(i))*tau_e_L(dir_idx_tau(i)) & & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i)) & @@ -2108,24 +2096,23 @@ contains & *tau_e_R(dir_idx_tau(i)) + s_P*(xi_R*((s_S - vel_R(i)) & & *(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) = flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) + flux_ene_e + flux_rsx_vf(${SF('')}$, eqn_idx%E) = flux_rsx_vf(${SF('')}$, eqn_idx%E) + flux_ene_e end if ! VOLUME FRACTION FLUX. $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*s_S + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*s_S + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*s_S + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, i)*s_S end do ! Advection velocity source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, & - & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *(s_S*(xi_MP*xi_L_m1 + 1) - vel_L(dir_idx(i)))) & - & + xi_P*(vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*(s_S*(xi_PP*xi_R_m1 & - & + 1) - vel_R(dir_idx(i)))) + vel_src_rsx_vf(${SF('')}$, & + & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & + & *(s_S*(xi_MP*xi_L_m1 + 1) - vel_L(dir_idx(i)))) + xi_P*(vel_R(dir_idx(i)) & + & + dir_flg(dir_idx(i))*(s_S*(xi_PP*xi_R_m1 + 1) - vel_R(dir_idx(i)))) end do ! INTERNAL ENERGIES ADVECTION FLUX. K-th pressure and velocity in preparation for the internal @@ -2138,28 +2125,27 @@ contains & *xi_R**(1._wp/gammas(i) + 1._wp) - pi_infs(i)/(1._wp + gammas(i)) - pres_R) & & + pres_R) - flux_rs${XYZ}$_vf(j, k, l, i + eqn_idx%int_en%beg - 1) = ((xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i + eqn_idx%adv%beg - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i + eqn_idx%adv%beg - 1))*(gammas(i)*p_K_Star + pi_infs(i)) & - & + (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i + eqn_idx%cont%beg - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i + eqn_idx%cont%beg - 1))*qvs(i))*vel_K_Star + (s_M/s_L)*(s_P/s_R) & - & *pcorr*s_S*(xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i + eqn_idx%adv%beg - 1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i + eqn_idx%adv%beg - 1)) + flux_rsx_vf(${SF('')}$, i + eqn_idx%int_en%beg - 1) = ((xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i + eqn_idx%adv%beg - 1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i + eqn_idx%adv%beg - 1))*(gammas(i)*p_K_Star + pi_infs(i)) & + & + (xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i + eqn_idx%cont%beg - 1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i + eqn_idx%cont%beg - 1))*qvs(i))*vel_K_Star + (s_M/s_L)*(s_P/s_R) & + & *pcorr*s_S*(xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i + eqn_idx%adv%beg - 1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i + eqn_idx%adv%beg - 1)) end do - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S)) & - & *(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) & - & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) & - & - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & + & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & + & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) end do end if @@ -2167,18 +2153,17 @@ contains if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%xi%beg - 1 + i) = xi_M*(s_S/(s_L - s_S)) & - & *(s_L*rho_L*xi_field_L(i) - rho_L*vel_L(dir_idx(1))*xi_field_L(i)) & - & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - & - rho_R*vel_R(dir_idx(1))*xi_field_R(i)) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%xi%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + & - rho_L*vel_L(dir_idx(1))*xi_field_L(i)) + xi_P*(s_S/(s_R - s_S)) & + & *(s_R*rho_R*xi_field_R(i) - rho_R*vel_R(dir_idx(1))*xi_field_R(i)) end do end if ! COLOR FUNCTION FLUX if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%c) = (xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%c) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%c))*s_S + flux_rsx_vf(${SF('')}$, eqn_idx%c) = (xi_M*qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%c) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%c))*s_S end if ! Geometrical source flux for cylindrical coordinates @@ -2187,20 +2172,20 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%int_en%beg, eqn_idx%int_en%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg - 1 + dir_idx(1)) = flux_gsrc_rs${XYZ}$_vf(j, k, & - & l, eqn_idx%mom%beg - 1 + dir_idx(1)) - p_Star + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg - 1 + dir_idx(1)) = flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg - 1 + dir_idx(1)) - p_Star ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do end if #:endif @@ -2208,14 +2193,13 @@ contains if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg - 1 + dir_idx(1)) = flux_gsrc_rs${XYZ}$_vf(j, k, & - & l, eqn_idx%mom%beg - 1 + dir_idx(1)) - p_Star + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg - 1 + dir_idx(1)) = flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg - 1 + dir_idx(1)) - p_Star - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%mom%end) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif end do @@ -2233,9 +2217,9 @@ contains & alpha_R_sum, rho_Star, E_Star, p_Star, p_K_Star, vel_K_star, s_L, s_R, s_M, s_P, s_S, & & xi_M, xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, & & Xs_R, Gamma_iL, Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end vel_L_rms = 0._wp; vel_R_rms = 0._wp rho_L = 0._wp; rho_R = 0._wp gamma_L = 0._wp; gamma_R = 0._wp @@ -2244,27 +2228,27 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') @@ -2280,8 +2264,8 @@ contains qv_R = qv_R + alpha_rho_R(i)*qvs(i) end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms + qv_L E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms + qv_R @@ -2344,60 +2328,59 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, & - & i) = xi_M*alpha_rho_L(i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*alpha_rho_R(i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, & + & i) = xi_M*alpha_rho_L(i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*alpha_rho_R(i) & + & *(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! Momentum flux. f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(i)) + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + (1._wp & - & - dir_flg(dir_idx(i)))*vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) & - & + dir_flg(dir_idx(i))*pres_L) + xi_P*(rho_R*(vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + (1._wp & - & - dir_flg(dir_idx(i)))*vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) & - & + dir_flg(dir_idx(i))*pres_R) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(i) & + & ) + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + (1._wp - dir_flg(dir_idx(i))) & + & *vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + dir_flg(dir_idx(i))*pres_L) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + (1._wp - dir_flg(dir_idx(i))) & + & *vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + dir_flg(dir_idx(i))*pres_R) end do if (bubbles_euler) then ! Put p_tilde in $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(i)) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) + xi_M*(dir_flg(dir_idx(i)) & - & *(-1._wp*ptilde_L)) + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(i)) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) + xi_M*(dir_flg(dir_idx(i))*(-1._wp*ptilde_L) & + & ) + xi_P*(dir_flg(dir_idx(i))*(-1._wp*ptilde_R)) end do end if - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%E) = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%alf, eqn_idx%alf ! only advect the void fraction - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! Advection velocity source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(i)) = 0._wp + vel_src_rsx_vf(${SF('')}$, dir_idx(i)) = 0._wp ! IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp end do - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) ! Add advection flux for bubble variables if (bubbles_euler) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%bub%beg, eqn_idx%bub%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*nbub_L*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*nbub_R*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do end if @@ -2408,20 +2391,20 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & + & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & + & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do end if #:endif @@ -2429,17 +2412,16 @@ contains if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%mom%end) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & + & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif end do @@ -2456,9 +2438,9 @@ contains & xi_P, xi_L, xi_R, xi_L_m1, xi_R_m1, xi_MP, xi_PP, nbub_L, nbub_R, PbwR3Lbar, PbwR3Rbar, & & R3Lbar, R3Rbar, R3V2Lbar, R3V2Rbar, Ys_L, Ys_R, Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, & & Gamma_iR, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end vel_L_rms = 0._wp; vel_R_rms = 0._wp rho_L = 0._wp; rho_R = 0._wp gamma_L = 0._wp; gamma_R = 0._wp @@ -2467,16 +2449,16 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do vel_L_rms = 0._wp; vel_R_rms = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do @@ -2485,33 +2467,33 @@ contains if (mpp_lim .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) + gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) + rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) + gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) end do else if (num_fluids > 2) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) + gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) + rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) + gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) end do else - rho_L = qL_prim_rs${XYZ}$_vf(j, k, l, 1) + rho_L = qL_prim_rsx_vf(${SF('')}$, 1) gamma_L = gammas(1) pi_inf_L = pi_infs(1) qv_L = qvs(1) - rho_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, 1) + rho_R = qR_prim_rsx_vf(${SF(' + 1')}$, 1) gamma_R = gammas(1) pi_inf_R = pi_infs(1) qv_R = qvs(1) @@ -2529,9 +2511,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 1, Re_size(i) - Re_L(i) = (1._wp - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + Re_idx(i, & + Re_L(i) = (1._wp - qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + Re_idx(i, & & q)))/Res_gs(i, q) + Re_L(i) - Re_R(i) = (1._wp - qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + Re_idx(i, & + Re_R(i) = (1._wp - qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + Re_idx(i, & & q)))/Res_gs(i, q) + Re_R(i) end do @@ -2541,8 +2523,8 @@ contains end if end if - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) E_L = gamma_L*pres_L + pi_inf_L + 5.e-1_wp*rho_L*vel_L_rms E_R = gamma_R*pres_R + pi_inf_R + 5.e-1_wp*rho_R*vel_R_rms @@ -2553,21 +2535,21 @@ contains if (avg_state == 2) then $:GPU_LOOP(parallelism='[seq]') do i = 1, nb - R0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, rs(i)) - R0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, rs(i)) + R0_L(i) = qL_prim_rsx_vf(${SF('')}$, rs(i)) + R0_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, rs(i)) - V0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, vs(i)) - V0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, vs(i)) + V0_L(i) = qL_prim_rsx_vf(${SF('')}$, vs(i)) + V0_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, vs(i)) if (.not. polytropic .and. .not. qbmm) then - P0_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, ps(i)) - P0_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, ps(i)) + P0_L(i) = qL_prim_rsx_vf(${SF('')}$, ps(i)) + P0_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, ps(i)) end if end do if (.not. qbmm) then if (adv_n) then - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%n) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%n) + nbub_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%n) + nbub_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%n) else nbub_L = 0._wp nbub_R = 0._wp @@ -2577,14 +2559,14 @@ contains nbub_R = nbub_R + (R0_R(i)**3._wp)*weight(i) end do - nbub_L = (3._wp/(4._wp*pi))*qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + num_fluids)/nbub_L - nbub_R = (3._wp/(4._wp*pi))*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & + nbub_L = (3._wp/(4._wp*pi))*qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + num_fluids)/nbub_L + nbub_R = (3._wp/(4._wp*pi))*qR_prim_rsx_vf(${SF(' + 1')}$, & & eqn_idx%E + num_fluids)/nbub_R end if else ! nb stored in 0th moment of first R0 bin in variable conversion module - nbub_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%bub%beg) - nbub_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%bub%beg) + nbub_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%bub%beg) + nbub_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%bub%beg) end if $:GPU_LOOP(parallelism='[seq]') @@ -2596,14 +2578,14 @@ contains end do if (qbmm) then - PbwR3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 4) - PbwR3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 4) + PbwR3Lbar = mom_sp_rsx_vf(${SF('')}$, 4) + PbwR3Rbar = mom_sp_rsx_vf(${SF(' + 1')}$, 4) - R3Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 1) - R3Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 1) + R3Lbar = mom_sp_rsx_vf(${SF('')}$, 1) + R3Rbar = mom_sp_rsx_vf(${SF(' + 1')}$, 1) - R3V2Lbar = mom_sp_rs${XYZ}$_vf(j, k, l, 3) - R3V2Rbar = mom_sp_rs${XYZ}$_vf(j + 1, k, l, 3) + R3V2Lbar = mom_sp_rsx_vf(${SF('')}$, 3) + R3V2Rbar = mom_sp_rsx_vf(${SF(' + 1')}$, 3) else PbwR3Lbar = 0._wp PbwR3Rbar = 0._wp @@ -2653,7 +2635,7 @@ contains if (viscous) then $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + Re_avg_rsx_vf(${SF('')}$, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do end if @@ -2710,14 +2692,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do if (bubbles_euler .and. (num_fluids > 1)) then ! Kill mass transport @ gas density - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end) = 0._wp end if ! Momentum flux. f = \rho u u + p I, q = \rho u, q_star = \xi * \rho*(s_star, v, w) @@ -2740,64 +2722,62 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(i)) + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + (1._wp & - & - dir_flg(dir_idx(i)))*vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) & - & + dir_flg(dir_idx(i))*(pres_L)) + xi_P*(rho_R*(vel_R(dir_idx(1)) & - & *vel_R(dir_idx(i)) + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + (1._wp & - & - dir_flg(dir_idx(i)))*vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) & - & + dir_flg(dir_idx(i))*(pres_R)) + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i)) & - & *pcorr + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(i) & + & ) + s_M*(xi_L*(dir_flg(dir_idx(i))*s_S + (1._wp - dir_flg(dir_idx(i))) & + & *vel_L(dir_idx(i))) - vel_L(dir_idx(i)))) + dir_flg(dir_idx(i))*(pres_L)) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(i)) & + & + s_P*(xi_R*(dir_flg(dir_idx(i))*s_S + (1._wp - dir_flg(dir_idx(i))) & + & *vel_R(dir_idx(i))) - vel_R(dir_idx(i)))) + dir_flg(dir_idx(i))*(pres_R)) & + & + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr end do ! Energy flux. f = u*(E+p), q = E, q_star = \xi*E+(s-u)(\rho s_star + p/(s-u)) - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = xi_M*(vel_L(dir_idx(1))*(E_L + pres_L) + s_M*(xi_L*(E_L + (s_S & - & - vel_L(dir_idx(1)))*(rho_L*s_S + (pres_L)/(s_L - vel_L(dir_idx(1))))) - E_L)) & - & + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R) + s_P*(xi_R*(E_R + (s_S & - & - vel_R(dir_idx(1)))*(rho_R*s_S + (pres_R)/(s_R - vel_R(dir_idx(1))))) - E_R)) & - & + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = xi_M*(vel_L(dir_idx(1))*(E_L + pres_L) + s_M*(xi_L*(E_L + (s_S & + & - vel_L(dir_idx(1)))*(rho_L*s_S + (pres_L)/(s_L - vel_L(dir_idx(1))))) - E_L)) & + & + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R) + s_P*(xi_R*(E_R + (s_S - vel_R(dir_idx(1)) & + & )*(rho_R*s_S + (pres_R)/(s_R - vel_R(dir_idx(1))))) - E_R)) + (s_M/s_L)*(s_P/s_R) & + & *pcorr*s_S ! Volume fraction flux $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! Advection velocity source: interface velocity for volume fraction transport $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, & - & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *s_M*xi_L_m1) + xi_P*(vel_R(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *s_P*xi_R_m1) + vel_src_rsx_vf(${SF('')}$, & + & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*s_M*xi_L_m1) & + & + xi_P*(vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*s_P*xi_R_m1) ! IF ( (model_eqns == 4) .or. (num_fluids==1) ) vel_src_rs_vf(dir_idx(i))%sf(j,k,l) = 0._wp end do - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) ! Add advection flux for bubble variables $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%bub%beg, eqn_idx%bub%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*nbub_L*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*nbub_R*qR_prim_rs${XYZ}$_vf(j & - & + 1, k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*nbub_L*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*nbub_R*qR_prim_rsx_vf(${SF(' + 1')}$, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do if (qbmm) then - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%bub%beg) = xi_M*nbub_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*nbub_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%bub%beg) = xi_M*nbub_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*nbub_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end if if (adv_n) then - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%n) = xi_M*nbub_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*nbub_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%n) = xi_M*nbub_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*nbub_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end if ! Geometrical source flux for cylindrical coordinates @@ -2806,20 +2786,20 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & + & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & + & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do end if #:endif @@ -2827,18 +2807,17 @@ contains if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%mom%end) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & + & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif end do @@ -2856,9 +2835,9 @@ contains & vel_L_tmp, vel_R_tmp, Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR, & & tau_e_L, tau_e_R, xi_field_L, xi_field_R, Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2, G_L, & & G_R]', copyin='[is1, is2, is3]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end vel_L_rms = 0._wp; vel_R_rms = 0._wp rho_L = 0._wp; rho_R = 0._wp gamma_L = 0._wp; gamma_R = 0._wp @@ -2868,55 +2847,55 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + i) - vel_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + i) + vel_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + i) + vel_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + i) vel_L_rms = vel_L_rms + vel_L(i)**2._wp vel_R_rms = vel_R_rms + vel_R(i)**2._wp end do - pres_L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres_L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres_R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) ! Change this by splitting it into the cases present in the bubbles_euler if (mpp_lim) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, i) = max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, l, i)) - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) = min(max(0._wp, qL_prim_rs${XYZ}$_vf(j, k, & - & l, eqn_idx%E + i)), 1._wp) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) = max(0._wp, qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) = min(max(0._wp, & - & qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)), 1._wp) - alpha_L_sum = alpha_L_sum + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R_sum = alpha_R_sum + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + qL_prim_rsx_vf(${SF('')}$, i) = max(0._wp, qL_prim_rsx_vf(${SF('')}$, i)) + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) = min(max(0._wp, qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%E + i)), 1._wp) + qR_prim_rsx_vf(${SF(' + 1')}$, i) = max(0._wp, qR_prim_rsx_vf(${SF(' + 1')}$, i)) + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) = min(max(0._wp, & + & qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)), 1._wp) + alpha_L_sum = alpha_L_sum + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R_sum = alpha_R_sum + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) = qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E + i)/max(alpha_L_sum, sgm_eps) - qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & eqn_idx%E + i)/max(alpha_R_sum, sgm_eps) + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) = qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%E + i)/max(alpha_L_sum, sgm_eps) + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) = qR_prim_rsx_vf(${SF(' + 1')}$, & + & eqn_idx%E + i)/max(alpha_R_sum, sgm_eps) end do end if $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_L = rho_L + qL_prim_rs${XYZ}$_vf(j, k, l, i) - gamma_L = gamma_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_L = pi_inf_L + qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i)*pi_infs(i) - qv_L = qv_L + qL_prim_rs${XYZ}$_vf(j, k, l, i)*qvs(i) - - rho_R = rho_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - gamma_R = gamma_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*gammas(i) - pi_inf_R = pi_inf_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i)*pi_infs(i) - qv_R = qv_R + qR_prim_rs${XYZ}$_vf(j + 1, k, l, i)*qvs(i) + rho_L = rho_L + qL_prim_rsx_vf(${SF('')}$, i) + gamma_L = gamma_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*gammas(i) + pi_inf_L = pi_inf_L + qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i)*pi_infs(i) + qv_L = qv_L + qL_prim_rsx_vf(${SF('')}$, i)*qvs(i) + + rho_R = rho_R + qR_prim_rsx_vf(${SF(' + 1')}$, i) + gamma_R = gamma_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*gammas(i) + pi_inf_R = pi_inf_R + qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i)*pi_infs(i) + qv_R = qv_R + qR_prim_rsx_vf(${SF(' + 1')}$, i)*qvs(i) end do Re_max = 0 @@ -2944,8 +2923,8 @@ contains c_sum_Yi_Phi = 0.0_wp $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Ys_L(i - eqn_idx%species%beg + 1) = qL_prim_rsx_vf(${SF('')}$, i) + Ys_R(i - eqn_idx%species%beg + 1) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do call get_mixture_molecular_weight(Ys_L, MW_L) @@ -3005,8 +2984,8 @@ contains if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do G_L = 0._wp G_R = 0._wp @@ -3034,8 +3013,8 @@ contains if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - xi_field_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%xi%beg - 1 + i) - xi_field_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%xi%beg - 1 + i) + xi_field_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%xi%beg - 1 + i) + xi_field_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%xi%beg - 1 + i) end do G_L = 0._wp G_R = 0._wp @@ -3047,13 +3026,13 @@ contains end do ! Elastic contribution to energy if G large enough if (G_L > verysmall .and. G_R > verysmall) then - E_L = E_L + G_L*qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%xi%end + 1) - E_R = E_R + G_R*qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%xi%end + 1) + E_L = E_L + G_L*qL_prim_rsx_vf(${SF('')}$, eqn_idx%xi%end + 1) + E_R = E_R + G_R*qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%xi%end + 1) end if $:GPU_LOOP(parallelism='[seq]') do i = 1, b_size - 1 - tau_e_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%stress%beg - 1 + i) - tau_e_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%stress%beg - 1 + i) + tau_e_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%stress%beg - 1 + i) + tau_e_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%stress%beg - 1 + i) end do end if @@ -3079,7 +3058,7 @@ contains end if $:GPU_LOOP(parallelism='[seq]') do i = 1, 2 - Re_avg_rs${XYZ}$_vf(j, k, l, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) + Re_avg_rsx_vf(${SF('')}$, i) = 2._wp/(1._wp/Re_L(i) + 1._wp/Re_R(i)) end do end if @@ -3153,33 +3132,32 @@ contains ! COMPUTING THE HLLC FLUXES MASS FLUX. $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%cont%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! MOMENTUM FLUX. f = \rho u u - \sigma, q = \rho u, q_star = \xi * \rho*(s_star, v, w) identity: ! xi*(dir_flg*s_S+(1-dir_flg)*u_i)-u_i = (dir_flg*s_L/R+(1-dir_flg)*u_i)*xi_m1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(i)) + s_M*(dir_flg(dir_idx(i))*s_L + (1._wp & - & - dir_flg(dir_idx(i)))*vel_L(dir_idx(i)))*xi_L_m1) + dir_flg(dir_idx(i)) & - & *(pres_L)) + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(i)) & - & + s_P*(dir_flg(dir_idx(i))*s_R + (1._wp - dir_flg(dir_idx(i))) & - & *vel_R(dir_idx(i)))*xi_R_m1) + dir_flg(dir_idx(i))*(pres_R)) + (s_M/s_L) & - & *(s_P/s_R)*dir_flg(dir_idx(i))*pcorr + flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) = xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(i) & + & ) + s_M*(dir_flg(dir_idx(i))*s_L + (1._wp - dir_flg(dir_idx(i))) & + & *vel_L(dir_idx(i)))*xi_L_m1) + dir_flg(dir_idx(i))*(pres_L)) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(i)) + s_P*(dir_flg(dir_idx(i)) & + & *s_R + (1._wp - dir_flg(dir_idx(i)))*vel_R(dir_idx(i)))*xi_R_m1) & + & + dir_flg(dir_idx(i))*(pres_R)) + (s_M/s_L)*(s_P/s_R)*dir_flg(dir_idx(i))*pcorr end do ! ENERGY FLUX. f = u*(E-\sigma), q = E, q_star = \xi*E+(s-u)(\rho s_star - \sigma/(s-u)) ! xi*(E+expr)-E = E*xi_m1 + xi*expr avoids E*(xi-1) cancellation - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%E) = xi_M*(vel_L(dir_idx(1))*(E_L + pres_L) + s_M*(E_L*xi_L_m1 & - & + xi_L*(s_S - vel_L(dir_idx(1)))*(rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1))) & - & ))) + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R) + s_P*(E_R*xi_R_m1 + xi_R*(s_S & - & - vel_R(dir_idx(1)))*(rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1)))))) & - & + (s_M/s_L)*(s_P/s_R)*pcorr*s_S + flux_rsx_vf(${SF('')}$, & + & eqn_idx%E) = xi_M*(vel_L(dir_idx(1))*(E_L + pres_L) + s_M*(E_L*xi_L_m1 + xi_L*(s_S & + & - vel_L(dir_idx(1)))*(rho_L*s_S + pres_L/(s_L - vel_L(dir_idx(1)))))) & + & + xi_P*(vel_R(dir_idx(1))*(E_R + pres_R) + s_P*(E_R*xi_R_m1 + xi_R*(s_S & + & - vel_R(dir_idx(1)))*(rho_R*s_S + pres_R/(s_R - vel_R(dir_idx(1)))))) + (s_M/s_L) & + & *(s_P/s_R)*pcorr*s_S ! ELASTICITY. Elastic shear stress additions for the momentum and energy flux if (elasticity) then @@ -3187,9 +3165,9 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims ! MOMENTUM ELASTIC FLUX. - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(i)) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(i)) - xi_M*tau_e_L(dir_idx_tau(i)) & - & - xi_P*tau_e_R(dir_idx_tau(i)) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(i)) = flux_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(i)) - xi_M*tau_e_L(dir_idx_tau(i)) & + & - xi_P*tau_e_R(dir_idx_tau(i)) ! ENERGY ELASTIC FLUX. flux_ene_e = flux_ene_e - xi_M*(vel_L(dir_idx(i))*tau_e_L(dir_idx_tau(i)) & & + s_M*(xi_L*((s_S - vel_L(i))*(tau_e_L(dir_idx_tau(i)) & @@ -3197,70 +3175,66 @@ contains & *tau_e_R(dir_idx_tau(i)) + s_P*(xi_R*((s_S - vel_R(i)) & & *(tau_e_R(dir_idx_tau(i))/(s_R - vel_R(i)))))) end do - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) = flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) + flux_ene_e + flux_rsx_vf(${SF('')}$, eqn_idx%E) = flux_rsx_vf(${SF('')}$, eqn_idx%E) + flux_ene_e end if ! HYPOELASTIC STRESS EVOLUTION FLUX. if (hypoelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%stress%end - eqn_idx%stress%beg + 1 - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S)) & - & *(s_L*rho_L*tau_e_L(i) - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) & - & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*tau_e_R(i) & - & - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%stress%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*tau_e_L(i) & + & - rho_L*vel_L(dir_idx(1))*tau_e_L(i)) + xi_P*(s_S/(s_R - s_S)) & + & *(s_R*rho_R*tau_e_R(i) - rho_R*vel_R(dir_idx(1))*tau_e_R(i)) end do end if ! VOLUME FRACTION FLUX. $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, & - & k, l, i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, i) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & i)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, & + & i)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end do ! VOLUME FRACTION SOURCE FLUX. $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_src_rs${XYZ}$_vf(j, k, l, & - & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *s_M*xi_L_m1) + xi_P*(vel_R(dir_idx(i)) + dir_flg(dir_idx(i)) & - & *s_P*xi_R_m1) + vel_src_rsx_vf(${SF('')}$, & + & dir_idx(i)) = xi_M*(vel_L(dir_idx(i)) + dir_flg(dir_idx(i))*s_M*xi_L_m1) & + & + xi_P*(vel_R(dir_idx(i)) + dir_flg(dir_idx(i))*s_P*xi_R_m1) end do ! COLOR FUNCTION FLUX if (surface_tension) then - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%c) = xi_M*qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%c)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & eqn_idx%c)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_rsx_vf(${SF('')}$, eqn_idx%c) = xi_M*qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%c)*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%c)*(vel_R(dir_idx(1)) + s_P*xi_R_m1) end if ! Hyperelastic reference map flux for material deformation tracking if (hyperelasticity) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%xi%beg - 1 + i) = xi_M*(s_S/(s_L - s_S)) & - & *(s_L*rho_L*xi_field_L(i) - rho_L*vel_L(dir_idx(1))*xi_field_L(i)) & - & + xi_P*(s_S/(s_R - s_S))*(s_R*rho_R*xi_field_R(i) & - & - rho_R*vel_R(dir_idx(1))*xi_field_R(i)) + flux_rsx_vf(${SF('')}$, & + & eqn_idx%xi%beg - 1 + i) = xi_M*(s_S/(s_L - s_S))*(s_L*rho_L*xi_field_L(i) & + & - rho_L*vel_L(dir_idx(1))*xi_field_L(i)) + xi_P*(s_S/(s_R - s_S)) & + & *(s_R*rho_R*xi_field_R(i) - rho_R*vel_R(dir_idx(1))*xi_field_R(i)) end do end if - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = vel_src_rs${XYZ}$_vf(j, k, l, dir_idx(1)) + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = vel_src_rsx_vf(${SF('')}$, dir_idx(1)) if (chemistry) then $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%species%beg, eqn_idx%species%end - Y_L = qL_prim_rs${XYZ}$_vf(j, k, l, i) - Y_R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + Y_L = qL_prim_rsx_vf(${SF('')}$, i) + Y_R = qR_prim_rsx_vf(${SF(' + 1')}$, i) - flux_rs${XYZ}$_vf(j, k, l, & - & i) = xi_M*rho_L*Y_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & - & + xi_P*rho_R*Y_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) - flux_src_rs${XYZ}$_vf(j, k, l, i) = 0.0_wp + flux_rsx_vf(${SF('')}$, & + & i) = xi_M*rho_L*Y_L*(vel_L(dir_idx(1)) + s_M*xi_L_m1) & + & + xi_P*rho_R*Y_R*(vel_R(dir_idx(1)) + s_P*xi_R_m1) + flux_src_rsx_vf(${SF('')}$, i) = 0.0_wp end do end if @@ -3270,20 +3244,20 @@ contains ! Substituting the advective flux into the inviscid geometrical source flux $:GPU_LOOP(parallelism='[seq]') do i = 1, eqn_idx%E - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = flux_rs${XYZ}$_vf(j, k, l, i) + flux_gsrc_rsx_vf(${SF('')}$, i) = flux_rsx_vf(${SF('')}$, i) end do ! Recalculating the radial momentum geometric source flux - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%cont%end + dir_idx(1)) = xi_M*(rho_L*(vel_L(dir_idx(1)) & + & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & + & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & + xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) ! Geometrical source of the void fraction(s) is zero $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do end if #:endif @@ -3291,18 +3265,17 @@ contains if (grid_geometry == 3) then $:GPU_LOOP(parallelism='[seq]') do i = 1, sys_size - flux_gsrc_rs${XYZ}$_vf(j, k, l, i) = 0._wp + flux_gsrc_rsx_vf(${SF('')}$, i) = 0._wp end do - flux_gsrc_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1)) & - & *vel_L(dir_idx(1)) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & - & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & - & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp & - & - dir_flg(dir_idx(1)))*vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) - flux_gsrc_rs${XYZ}$_vf(j, k, l, eqn_idx%mom%end) = flux_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%mom%beg + 1) + flux_gsrc_rsx_vf(${SF('')}$, & + & eqn_idx%mom%beg + 1) = -xi_M*(rho_L*(vel_L(dir_idx(1))*vel_L(dir_idx(1) & + & ) + s_M*(xi_L*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_L(dir_idx(1))) - vel_L(dir_idx(1))))) & + & - xi_P*(rho_R*(vel_R(dir_idx(1))*vel_R(dir_idx(1)) & + & + s_P*(xi_R*(dir_flg(dir_idx(1))*s_S + (1._wp - dir_flg(dir_idx(1))) & + & *vel_R(dir_idx(1))) - vel_R(dir_idx(1))))) + flux_gsrc_rsx_vf(${SF('')}$, eqn_idx%mom%end) = flux_rsx_vf(${SF('')}$, eqn_idx%mom%beg + 1) end if #:endif end do @@ -3314,7 +3287,7 @@ contains #:endfor ! Computing HLLC flux and source flux for Euler system of equations - if (viscous .or. dummy) then + if (viscous) then if (weno_Re_flux) then call s_compute_viscous_source_flux(qL_prim_vf(eqn_idx%mom%beg:eqn_idx%mom%end), & & dqL_prim_dx_vf(eqn_idx%mom%beg:eqn_idx%mom%end), & @@ -3339,8 +3312,7 @@ contains end if if (surface_tension) then - call s_compute_capillary_source_flux(vel_src_rsx_vf, vel_src_rsy_vf, vel_src_rsz_vf, flux_src_vf, norm_dir, isx, isy, & - & isz) + call s_compute_capillary_source_flux(vel_src_rsx_vf, flux_src_vf, norm_dir, isx, isy, isz) end if call s_finalize_riemann_solver(flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir) @@ -3348,14 +3320,12 @@ contains end subroutine s_hllc_riemann_solver !> HLLD Riemann solver for MHD, Miyoshi & Kusano JCP (2005) - subroutine s_hlld_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - - & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + subroutine s_hlld_riemann_solver(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, & - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf + & dqL_prim_dz_vf, qL_prim_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, qR_prim_vf, q_prim_vf, & + & flux_vf, flux_src_vf, flux_gsrc_vf, norm_dir, ix, iy, iz) + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -3394,13 +3364,17 @@ contains real(wp) :: v_double, w_double, By_double, Bz_double, E_doubleL, E_doubleR, E_double integer :: i, j, k, l - call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, norm_dir, ix, iy, iz) + call s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, dqL_prim_dy_vf, dqL_prim_dz_vf, & + & qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz) call s_initialize_riemann_solver(flux_src_vf, norm_dir) - #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for NORM_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1', 'is2', 'is3'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2', 'is1', 'is3'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3', 'is2', 'is1')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (norm_dir == ${NORM_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres, E, & & H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, & @@ -3408,47 +3382,46 @@ contains & pTot_L, pTot_R, p_star, rhoL_star, rhoR_star, E_starL, E_starR, sqrt_rhoL_star, & & sqrt_rhoR_star, denom_ds, sign_Bx, vL_star, vR_star, wL_star, wR_star, v_double, w_double, & & By_double, Bz_double, E_doubleL, E_doubleR, E_double]', copyin='[norm_dir]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end ! (1) Extract the left/right primitive states do i = 1, eqn_idx%cont%end - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + alpha_rho_L(i) = qL_prim_rsx_vf(${SF('')}$, i) + alpha_rho_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, i) end do ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%cont%end + dir_idx(i)) + vel%L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(i)) + vel%R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%cont%end + dir_idx(i)) end do vel_rms%L = sum(vel%L**2._wp) vel_rms%R = sum(vel%R**2._wp) do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E + i) + alpha_L(i) = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E + i) + alpha_R(i) = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E + i) end do - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%E) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%E) + pres%L = qL_prim_rsx_vf(${SF('')}$, eqn_idx%E) + pres%R = qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%E) ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic if (mhd) then if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg), qL_prim_rs${XYZ}$_vf(j, k, l, & - & eqn_idx%B%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, & - & l, eqn_idx%B%beg + 1)] + B%L = [Bx0, qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg), qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + 1)] + B%R = [Bx0, qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg), qR_prim_rsx_vf(${SF(' + 1')}$, & + & eqn_idx%B%beg + 1)] else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + dir_idx(1) - 1), qL_prim_rs${XYZ}$_vf(j, & - & k, l, eqn_idx%B%beg + dir_idx(2) - 1), qL_prim_rs${XYZ}$_vf(j, & - & k, l, eqn_idx%B%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, eqn_idx%B%beg + dir_idx(1) - 1), & - & qR_prim_rs${XYZ}$_vf(j + 1, k, l, & - & eqn_idx%B%beg + dir_idx(2) - 1), qR_prim_rs${XYZ}$_vf(j + 1, k, & - & l, eqn_idx%B%beg + dir_idx(3) - 1)] + B%L = [qL_prim_rsx_vf(${SF('')}$, eqn_idx%B%beg + dir_idx(1) - 1), qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + dir_idx(2) - 1), qL_prim_rsx_vf(${SF('')}$, & + & eqn_idx%B%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + dir_idx(1) - 1), & + & qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + dir_idx(2) - 1), & + & qR_prim_rsx_vf(${SF(' + 1')}$, eqn_idx%B%beg + dir_idx(3) - 1)] end if end if @@ -3567,28 +3540,29 @@ contains end if ! (12) Write HLLD flux to output arrays - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + flux_rsx_vf(${SF('')}$, 1) = F_hlld(1) ! TODO multi-component ! Momentum - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(1)) = F_hlld(2) - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(2)) = F_hlld(3) - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%cont%end + dir_idx(3)) = F_hlld(4) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(1)) = F_hlld(2) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(2)) = F_hlld(3) + flux_rsx_vf(${SF('')}$, eqn_idx%cont%end + dir_idx(3)) = F_hlld(4) ! Magnetic field if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg) = F_hlld(5) - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + 1) = F_hlld(6) + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg) = F_hlld(5) + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + 1) = F_hlld(6) else - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + dir_idx(2) - 1) = F_hlld(5) - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%B%beg + dir_idx(3) - 1) = F_hlld(6) + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + dir_idx(1) - 1) = 0._wp + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + dir_idx(2) - 1) = F_hlld(5) + flux_rsx_vf(${SF('')}$, eqn_idx%B%beg + dir_idx(3) - 1) = F_hlld(6) end if ! Energy - flux_rs${XYZ}$_vf(j, k, l, eqn_idx%E) = F_hlld(7) + flux_rsx_vf(${SF('')}$, eqn_idx%E) = F_hlld(7) ! Volume fractions $:GPU_LOOP(parallelism='[seq]') do i = eqn_idx%adv%beg, eqn_idx%adv%end - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) + flux_rsx_vf(${SF('')}$, i) = 0._wp ! TODO multi-component (zero for now) end do - flux_src_rs${XYZ}$_vf(j, k, l, eqn_idx%adv%beg) = 0._wp + flux_src_rsx_vf(${SF('')}$, eqn_idx%adv%beg) = 0._wp end do end do end do @@ -3632,65 +3606,26 @@ contains is1%beg = -1; is2%beg = 0; is3%beg = 0 is1%end = m; is2%end = n; is3%end = p - @:ALLOCATE(flux_rsx_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_gsrc_rsx_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_src_rsx_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, eqn_idx%adv%beg:sys_size)) - @:ALLOCATE(vel_src_rsx_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:num_vels)) - if (qbmm) then - @:ALLOCATE(mom_sp_rsx_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4)) - end if - - if (viscous) then - @:ALLOCATE(Re_avg_rsx_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:2)) - end if - - if (n == 0) return - - is1%beg = -1; is2%beg = 0; is3%beg = 0 - is1%end = n; is2%end = m; is3%end = p - - @:ALLOCATE(flux_rsy_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_gsrc_rsy_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_src_rsy_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, eqn_idx%adv%beg:sys_size)) - @:ALLOCATE(vel_src_rsy_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:num_vels)) - + @:ALLOCATE(flux_rsx_vf(-1:m, -1:n, -1:p, 1:sys_size)) + @:ALLOCATE(flux_gsrc_rsx_vf(-1:m, -1:n, -1:p, 1:sys_size)) + @:ALLOCATE(flux_src_rsx_vf(-1:m, -1:n, -1:p, eqn_idx%adv%beg:sys_size)) + @:ALLOCATE(vel_src_rsx_vf(-1:m, -1:n, -1:p, 1:num_vels)) if (qbmm) then - @:ALLOCATE(mom_sp_rsy_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4)) + @:ALLOCATE(mom_sp_rsx_vf(-1:m+1, -1:n+1, -1:p+1, 1:4)) end if if (viscous) then - @:ALLOCATE(Re_avg_rsy_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:2)) - end if - - if (p == 0) return - - is1%beg = -1; is2%beg = 0; is3%beg = 0 - is1%end = p; is2%end = n; is3%end = m - - @:ALLOCATE(flux_rsz_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_gsrc_rsz_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:sys_size)) - @:ALLOCATE(flux_src_rsz_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, eqn_idx%adv%beg:sys_size)) - @:ALLOCATE(vel_src_rsz_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:num_vels)) - - if (qbmm) then - @:ALLOCATE(mom_sp_rsz_vf(is1%beg:is1%end + 1, is2%beg:is2%end, is3%beg:is3%end, 1:4)) - end if - - if (viscous) then - @:ALLOCATE(Re_avg_rsz_vf(is1%beg:is1%end, is2%beg:is2%end, is3%beg:is3%end, 1:2)) + @:ALLOCATE(Re_avg_rsx_vf(-1:m, -1:n, -1:p, 1:2)) end if end subroutine s_initialize_riemann_solvers_module !> Populate the left and right Riemann state variable buffers based on boundary conditions - subroutine s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & - - & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, & - & dqR_prim_dz_vf, norm_dir, ix, iy, iz) + subroutine s_populate_riemann_states_variables_buffers(qL_prim_rsx_vf, dqL_prim_dx_vf, & - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf + & dqL_prim_dy_vf, dqL_prim_dz_vf, qR_prim_rsx_vf, dqR_prim_dx_vf, dqR_prim_dy_vf, dqR_prim_dz_vf, norm_dir, ix, iy, iz) + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf type(scalar_field), allocatable, dimension(:), intent(inout) :: dqL_prim_dx_vf, dqR_prim_dx_vf, dqL_prim_dy_vf, & & dqR_prim_dy_vf, dqL_prim_dz_vf, dqR_prim_dz_vf @@ -3740,7 +3675,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do l = isz%beg, isz%end @@ -3789,7 +3724,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do l = isz%beg, isz%end @@ -3834,13 +3769,13 @@ contains do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end - qL_prim_rsy_vf(-1, k, l, i) = qR_prim_rsy_vf(0, k, l, i) + qL_prim_rsx_vf(k, -1, l, i) = qR_prim_rsx_vf(k, 0, l, i) end do end do end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do l = isz%beg, isz%end @@ -3881,13 +3816,13 @@ contains do i = 1, sys_size do l = is3%beg, is3%end do k = is2%beg, is2%end - qR_prim_rsy_vf(n + 1, k, l, i) = qL_prim_rsy_vf(n, k, l, i) + qR_prim_rsx_vf(k, n + 1, l, i) = qL_prim_rsx_vf(k, n, l, i) end do end do end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do l = isz%beg, isz%end @@ -3928,15 +3863,15 @@ contains if (bc_z%beg == BC_RIEMANN_EXTRAP) then ! Riemann state extrap. BC at beginning $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qL_prim_rsz_vf(-1, k, l, i) = qR_prim_rsz_vf(0, k, l, i) + do k = is2%beg, is2%end + do l = is3%beg, is3%end + qL_prim_rsx_vf(l, k, -1, i) = qR_prim_rsx_vf(l, k, 0, i) end do end do end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do k = isy%beg, isy%end @@ -3971,15 +3906,15 @@ contains $:GPU_PARALLEL_LOOP(collapse=3) do i = 1, sys_size - do l = is3%beg, is3%end - do k = is2%beg, is2%end - qR_prim_rsz_vf(p + 1, k, l, i) = qL_prim_rsz_vf(p, k, l, i) + do k = is2%beg, is2%end + do l = is3%beg, is3%end + qR_prim_rsx_vf(l, k, p + 1, i) = qL_prim_rsx_vf(l, k, p, i) end do end do end do $:END_GPU_PARALLEL_LOOP() - if (viscous .or. dummy) then + if (viscous) then $:GPU_PARALLEL_LOOP(collapse=3) do i = eqn_idx%mom%beg, eqn_idx%mom%end do k = isy%beg, isy%end @@ -4026,7 +3961,7 @@ contains ! Reshaping Inputted Data in x-direction if (norm_dir == 1) then - if (viscous .or. (surface_tension) .or. dummy) then + if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(collapse=4) do i = eqn_idx%mom%beg, eqn_idx%E do l = is3%beg, is3%end @@ -4072,7 +4007,7 @@ contains ! Reshaping Inputted Data in y-direction else if (norm_dir == 2) then - if (viscous .or. (surface_tension) .or. dummy) then + if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(collapse=4) do i = eqn_idx%mom%beg, eqn_idx%E do l = is3%beg, is3%end @@ -4108,7 +4043,7 @@ contains do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end + 1 - mom_sp_rsy_vf(j, k, l, i) = mom_sp(i)%sf(k, j, l) + mom_sp_rsx_vf(k, j, l, i) = mom_sp(i)%sf(k, j, l) end do end do end do @@ -4118,7 +4053,7 @@ contains ! Reshaping Inputted Data in z-direction else - if (viscous .or. (surface_tension) .or. dummy) then + if (viscous .or. (surface_tension)) then $:GPU_PARALLEL_LOOP(collapse=4) do i = eqn_idx%mom%beg, eqn_idx%E do j = is1%beg, is1%end @@ -4154,7 +4089,7 @@ contains do l = is3%beg, is3%end do k = is2%beg, is2%end do j = is1%beg, is1%end + 1 - mom_sp_rsz_vf(j, k, l, i) = mom_sp(i)%sf(l, k, j) + mom_sp_rsx_vf(l, k, j, i) = mom_sp(i)%sf(l, k, j) end do end do end do @@ -4247,14 +4182,14 @@ contains vel_src_int = vel_src_rsx_vf(j, k, l,1:num_dims) r_eff = y_cc(k) case (2) ! y-face (radial face in r_cyl direction) - Re_s = Re_avg_rsy_vf(k, j, l, 1) - Re_b = Re_avg_rsy_vf(k, j, l, 2) - vel_src_int = vel_src_rsy_vf(k, j, l,1:num_dims) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l,1:num_dims) r_eff = y_cb(k) case (3) ! z-face (azimuthal face in theta_cyl direction) - Re_s = Re_avg_rsz_vf(l, k, j, 1) - Re_b = Re_avg_rsz_vf(l, k, j, 2) - vel_src_int = vel_src_rsz_vf(l, k, j,1:num_dims) + Re_s = Re_avg_rsx_vf(j, k, l, 1) + Re_b = Re_avg_rsx_vf(j, k, l, 2) + vel_src_int = vel_src_rsx_vf(j, k, l,1:num_dims) r_eff = y_cc(k) end select @@ -4416,16 +4351,16 @@ contains vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do else if (norm_dir == 2) then - Re_shear = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 1) - Re_bulk = Re_avg_rsy_vf(k_loop, j_loop, l_loop, 2) + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsy_vf(k_loop, j_loop, l_loop, i_dim) + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do else - Re_shear = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 1) - Re_bulk = Re_avg_rsz_vf(l_loop, k_loop, j_loop, 2) + Re_shear = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 1) + Re_bulk = Re_avg_rsx_vf(j_loop, k_loop, l_loop, 2) do i_dim = 1, num_dims - vel_src_at_interface(i_dim) = vel_src_rsz_vf(l_loop, k_loop, j_loop, i_dim) + vel_src_at_interface(i_dim) = vel_src_rsx_vf(j_loop, k_loop, l_loop, i_dim) end do end if @@ -4533,7 +4468,7 @@ contains do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end - flux_vf(i)%sf(k, j, l) = flux_rsy_vf(j, k, l, i) + flux_vf(i)%sf(k, j, l) = flux_rsx_vf(k, j, l, i) end do end do end do @@ -4546,7 +4481,7 @@ contains do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end - flux_gsrc_vf(i)%sf(k, j, l) = flux_gsrc_rsy_vf(j, k, l, i) + flux_gsrc_vf(i)%sf(k, j, l) = flux_gsrc_rsx_vf(k, j, l, i) end do end do end do @@ -4558,7 +4493,7 @@ contains do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end - flux_src_vf(eqn_idx%adv%beg)%sf(k, j, l) = flux_src_rsy_vf(j, k, l, eqn_idx%adv%beg) + flux_src_vf(eqn_idx%adv%beg)%sf(k, j, l) = flux_src_rsx_vf(k, j, l, eqn_idx%adv%beg) end do end do end do @@ -4570,7 +4505,7 @@ contains do l = is3%beg, is3%end do j = is1%beg, is1%end do k = is2%beg, is2%end - flux_src_vf(i)%sf(k, j, l) = flux_src_rsy_vf(j, k, l, i) + flux_src_vf(i)%sf(k, j, l) = flux_src_rsx_vf(k, j, l, i) end do end do end do @@ -4584,7 +4519,7 @@ contains do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end - flux_vf(i)%sf(l, k, j) = flux_rsz_vf(j, k, l, i) + flux_vf(i)%sf(l, k, j) = flux_rsx_vf(l, k, j, i) end do end do end do @@ -4596,7 +4531,7 @@ contains do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end - flux_gsrc_vf(i)%sf(l, k, j) = flux_gsrc_rsz_vf(j, k, l, i) + flux_gsrc_vf(i)%sf(l, k, j) = flux_gsrc_rsx_vf(l, k, j, i) end do end do end do @@ -4608,7 +4543,7 @@ contains do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end - flux_src_vf(eqn_idx%adv%beg)%sf(l, k, j) = flux_src_rsz_vf(j, k, l, eqn_idx%adv%beg) + flux_src_vf(eqn_idx%adv%beg)%sf(l, k, j) = flux_src_rsx_vf(l, k, j, eqn_idx%adv%beg) end do end do end do @@ -4620,7 +4555,7 @@ contains do j = is1%beg, is1%end do k = is2%beg, is2%end do l = is3%beg, is3%end - flux_src_vf(i)%sf(l, k, j) = flux_src_rsz_vf(j, k, l, i) + flux_src_vf(i)%sf(l, k, j) = flux_src_rsx_vf(l, k, j, i) end do end do end do @@ -4681,32 +4616,6 @@ contains @:DEALLOCATE(mom_sp_rsx_vf) end if - if (n == 0) return - - if (viscous) then - @:DEALLOCATE(Re_avg_rsy_vf) - end if - @:DEALLOCATE(vel_src_rsy_vf) - @:DEALLOCATE(flux_rsy_vf) - @:DEALLOCATE(flux_src_rsy_vf) - @:DEALLOCATE(flux_gsrc_rsy_vf) - if (qbmm) then - @:DEALLOCATE(mom_sp_rsy_vf) - end if - - if (p == 0) return - - if (viscous) then - @:DEALLOCATE(Re_avg_rsz_vf) - end if - @:DEALLOCATE(vel_src_rsz_vf) - @:DEALLOCATE(flux_rsz_vf) - @:DEALLOCATE(flux_src_rsz_vf) - @:DEALLOCATE(flux_gsrc_rsz_vf) - if (qbmm) then - @:DEALLOCATE(mom_sp_rsz_vf) - end if - end subroutine s_finalize_riemann_solvers_module end module m_riemann_solvers diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 65cc825e40..95e68a7c80 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -933,10 +933,10 @@ contains ! Computation of parameters, allocation of memory, association of pointers, and/or execution of any other tasks that are ! needed to properly configure the modules. The preparations below DO DEPEND on the grid being complete. - if (igr .or. dummy) then + if (igr) then call s_initialize_igr_module() end if - if (.not. igr .or. dummy) then + if (.not. igr) then if (recon_type == WENO_TYPE) then call s_initialize_weno_module() else if (recon_type == MUSCL_TYPE) then diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 9a4d0c6425..0b7caf7a47 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -31,9 +31,9 @@ module m_surface_tension !> @name cell boundary reconstructed gradient components and magnitude !> @{ - real(wp), allocatable, dimension(:,:,:,:) :: gL_x, gR_x, gL_y, gR_y, gL_z, gR_z + real(wp), allocatable, dimension(:,:,:,:) :: gL_x, gR_x !> @} - $:GPU_DECLARE(create='[gL_x, gR_x, gL_y, gR_y, gL_z, gR_z]') + $:GPU_DECLARE(create='[gL_x, gR_x]') type(int_bounds_info) :: is1, is2, is3, iv $:GPU_DECLARE(create='[is1, is2, is3, iv]') @@ -55,24 +55,12 @@ contains @:ALLOCATE(gL_x(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, num_dims + 1)) @:ALLOCATE(gR_x(idwbuff(1)%beg:idwbuff(1)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(3)%beg:idwbuff(3)%end, num_dims + 1)) - @:ALLOCATE(gL_y(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, idwbuff(3)%beg:idwbuff(3)%end, num_dims + 1)) - @:ALLOCATE(gR_y(idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, idwbuff(3)%beg:idwbuff(3)%end, num_dims + 1)) - - if (p > 0) then - @:ALLOCATE(gL_z(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & num_dims + 1)) - @:ALLOCATE(gR_z(idwbuff(3)%beg:idwbuff(3)%end, idwbuff(2)%beg:idwbuff(2)%end, idwbuff(1)%beg:idwbuff(1)%end, & - & num_dims + 1)) - end if - end subroutine s_initialize_surface_tension_module !> Compute the capillary source flux from reconstructed color-gradient fields - subroutine s_compute_capillary_source_flux(vSrc_rsx_vf, vSrc_rsy_vf, vSrc_rsz_vf, flux_src_vf, id, isx, isy, isz) + subroutine s_compute_capillary_source_flux(vSrc_rsx_vf, flux_src_vf, id, isx, isy, isz) - real(wp), dimension(-1:,0:,0:,1:), intent(in) :: vSrc_rsx_vf - real(wp), dimension(-1:,0:,0:,1:), intent(in) :: vSrc_rsy_vf - real(wp), dimension(-1:,0:,0:,1:), intent(in) :: vSrc_rsz_vf + real(wp), dimension(-1:,-1:,-1:,1:), intent(in) :: vSrc_rsx_vf type(scalar_field), dimension(sys_size), intent(inout) :: flux_src_vf integer, intent(in) :: id type(int_bounds_info), intent(in) :: isx, isy, isz @@ -134,18 +122,18 @@ contains do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) + w1R = gR_x(j, k + 1, l, 1) + w2R = gR_x(j, k + 1, l, 2) w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + if (p > 0) w3R = gR_x(j, k + 1, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j, k + 1, l, num_dims + 1) w1 = (w1L + w1R)/2._wp w2 = (w2L + w2R)/2._wp @@ -160,11 +148,11 @@ contains & k, l) + Omega(2, i) flux_src_vf(eqn_idx%E)%sf(j, k, l) = flux_src_vf(eqn_idx%E)%sf(j, k, l) + Omega(2, & - & i)*vSrc_rsy_vf(k, j, l, i) + & i)*vSrc_rsx_vf(j, k, l, i) end do flux_src_vf(eqn_idx%E)%sf(j, k, l) = flux_src_vf(eqn_idx%E)%sf(j, k, & - & l) + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + & l) + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 2) end if end do end do @@ -177,18 +165,18 @@ contains do l = isz%beg, isz%end do k = isy%beg, isy%end do j = isx%beg, isx%end - w1L = gL_z(l, k, j, 1) - w2L = gL_z(l, k, j, 2) + w1L = gL_x(j, k, l, 1) + w2L = gL_x(j, k, l, 2) w3L = 0._wp - if (p > 0) w3L = gL_z(l, k, j, 3) + if (p > 0) w3L = gL_x(j, k, l, 3) - w1R = gR_z(l + 1, k, j, 1) - w2R = gR_z(l + 1, k, j, 2) + w1R = gR_x(j, k, l + 1, 1) + w2R = gR_x(j, k, l + 1, 2) w3R = 0._wp - if (p > 0) w3R = gR_z(l + 1, k, j, 3) + if (p > 0) w3R = gR_x(j, k, l + 1, 3) - normWL = gL_z(l, k, j, num_dims + 1) - normWR = gR_z(l + 1, k, j, num_dims + 1) + normWL = gL_x(j, k, l, num_dims + 1) + normWR = gR_x(j, k, l + 1, num_dims + 1) w1 = (w1L + w1R)/2._wp w2 = (w2L + w2R)/2._wp @@ -203,11 +191,11 @@ contains & k, l) + Omega(3, i) flux_src_vf(eqn_idx%E)%sf(j, k, l) = flux_src_vf(eqn_idx%E)%sf(j, k, l) + Omega(3, & - & i)*vSrc_rsz_vf(l, k, j, i) + & i)*vSrc_rsx_vf(j, k, l, i) end do flux_src_vf(eqn_idx%E)%sf(j, k, l) = flux_src_vf(eqn_idx%E)%sf(j, k, & - & l) + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsz_vf(l, k, j, 3) + & l) + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsx_vf(j, k, l, 3) end if end do end do @@ -289,84 +277,33 @@ contains iv%beg = 1; iv%end = num_dims + 1 ! reconstruct gradient components at cell boundaries - do i = 1, num_dims - call s_reconstruct_cell_boundary_values_capillary(c_divs, gL_x, gL_y, gL_z, gR_x, gR_y, gR_z, i) - end do + call s_reconstruct_cell_boundary_values_capillary(c_divs, gL_x, gR_x, i) end subroutine s_get_capillary !> Reconstruct left and right cell-boundary values of capillary variables - subroutine s_reconstruct_cell_boundary_values_capillary(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir) - - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(out) :: vL_x, vL_y, vL_z - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(out) :: vR_x, vR_y, vR_z - integer, intent(in) :: norm_dir - integer :: recon_dir !< Coordinate direction of the reconstruction - integer :: i, j, k, l - - #:for SCHEME, TYPE in [('weno', 'WENO_TYPE'),('muscl', 'MUSCL_TYPE')] - if (recon_type == ${TYPE}$ .or. dummy) then - ! Reconstruction in s1-direction - - if (norm_dir == 1) then - is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) - recon_dir = 1; is1%beg = is1%beg + ${SCHEME}$_polyn - is1%end = is1%end - ${SCHEME}$_polyn - else if (norm_dir == 2) then - is1 = idwbuff(2); is2 = idwbuff(1); is3 = idwbuff(3) - recon_dir = 2; is1%beg = is1%beg + ${SCHEME}$_polyn - is1%end = is1%end - ${SCHEME}$_polyn - else - is1 = idwbuff(3); is2 = idwbuff(2); is3 = idwbuff(1) - recon_dir = 3; is1%beg = is1%beg + ${SCHEME}$_polyn - is1%end = is1%end - ${SCHEME}$_polyn - end if - - $:GPU_UPDATE(device='[is1, is2, is3, iv]') - end if - #:endfor - - if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do + subroutine s_reconstruct_cell_boundary_values_capillary(v_vf, vL_x, vR_x, norm_dir) + + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(out) :: vL_x + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(out) :: vR_x + integer, intent(in) :: norm_dir + integer :: i, j, k, l + + $:GPU_UPDATE(device='[iv]') + + $:GPU_PARALLEL_LOOP(collapse=4, private='[i, j, k, l]') + do i = iv%beg, iv%end + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_reconstruct_cell_boundary_values_capillary @@ -383,11 +320,6 @@ contains @:DEALLOCATE(gL_x, gR_x) - @:DEALLOCATE(gL_y, gR_y) - if (p > 0) then - @:DEALLOCATE(gL_z, gR_z) - end if - end subroutine s_finalize_surface_tension_module end module m_surface_tension diff --git a/src/simulation/m_thinc.fpp b/src/simulation/m_thinc.fpp index a52ec19d76..3947df1032 100644 --- a/src/simulation/m_thinc.fpp +++ b/src/simulation/m_thinc.fpp @@ -291,71 +291,67 @@ contains !> @brief Applies THINC (int_comp=1) or MTHINC (int_comp=2) interface compression to sharpen volume-fraction and density !! reconstructions at material interfaces. Called after WENO/MUSCL reconstruction per direction. - subroutine s_thinc_compression(v_rs_ws, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, recon_dir, & - & is1_d, is2_d, is3_d) + subroutine s_thinc_compression(v_rs_ws, vL_rs_vf_x, vR_rs_vf_x, recon_dir, is1_d, is2_d, is3_d) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(in) :: v_rs_ws - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x, vL_rs_vf_y, & - & vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z - integer, intent(in) :: recon_dir + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(in) :: v_rs_ws + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x, vR_rs_vf_x + integer, intent(in) :: recon_dir type(int_bounds_info), intent(in) :: is1_d, is2_d, is3_d - integer :: j, k, l, ix, iy, iz - real(wp) :: aCL, aCR, aC, aTHINC, qmin, qmax, A, B, C - real(wp) :: sgn, moncon, beta_eff - real(wp) :: nh1, nh2, nh3, d_local, rho1, rho2 - real(wp) :: rho_b, rho_e - - #:for REC_DIR, XYZ, CC_PRI in [(1, 'x', 'x_cc'), (2, 'y', 'y_cc'), (3, 'z', 'z_cc')] + integer :: j, k, l + real(wp) :: aCL, aCR, aC, aTHINC, qmin, qmax, A, B, C + real(wp) :: sgn, moncon, beta_eff + real(wp) :: nh1, nh2, nh3, d_local, rho1, rho2 + real(wp) :: rho_b, rho_e + + #:for REC_DIR, XYZ, CC_PRI, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'x_cc', 'j', '{STENCIL_IDX}, k, l', 'is1_d', 'is2_d', 'is3_d'), & + (2, 'y', 'y_cc', 'k', 'j, {STENCIL_IDX}, l', 'is2_d', 'is1_d', 'is3_d'), & + (3, 'z', 'z_cc', 'l', 'j, k, {STENCIL_IDX}', 'is3_d', 'is2_d', 'is1_d')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (recon_dir == ${REC_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, ix, iy, iz, aCL, aC, aCR, aTHINC, moncon, sgn, qmin, qmax, A, & - & B, C, beta_eff, nh1, nh2, nh3, d_local, rho1, rho2, rho_b, rho_e]') - do l = is3_d%beg, is3_d%end - do k = is2_d%beg, is2_d%end - do j = is1_d%beg, is1_d%end - aCL = v_rs_ws(j - 1, k, l, eqn_idx%adv%beg) - aC = v_rs_ws(j, k, l, eqn_idx%adv%beg) - aCR = v_rs_ws(j + 1, k, l, eqn_idx%adv%beg) + $:GPU_PARALLEL_LOOP(collapse=3,private='[j, k, l, aCL, aC, aCR, aTHINC, moncon, sgn, qmin, qmax, A, B, C, & + & beta_eff, nh1, nh2, nh3, d_local, rho1, rho2, rho_b, rho_e]') + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end + aCL = v_rs_ws(${SF(' - 1')}$, eqn_idx%adv%beg) + aC = v_rs_ws(${SF('')}$, eqn_idx%adv%beg) + aCR = v_rs_ws(${SF(' + 1')}$, eqn_idx%adv%beg) if (aC >= ic_eps .and. aC <= 1._wp - ic_eps) then if (int_comp == 2 .and. n > 0) then ! MTHINC ! Map reshaped (j,k,l) to physical (ix,iy,iz) - #:if REC_DIR == 1 - ix = j; iy = k; iz = l - #:elif REC_DIR == 2 - ix = k; iy = j; iz = l - #:else - ix = l; iy = k; iz = j - #:endif - - nh1 = mthinc_nhat(1, ix, iy, iz) - nh2 = mthinc_nhat(2, ix, iy, iz) - nh3 = mthinc_nhat(3, ix, iy, iz) - d_local = mthinc_d(ix, iy, iz) + + nh1 = mthinc_nhat(1, j, k, l) + nh2 = mthinc_nhat(2, j, k, l) + nh3 = mthinc_nhat(3, j, k, l) + d_local = mthinc_d(j, k, l) ! Skip if no valid normal was computed if (nh1*nh1 + nh2*nh2 + nh3*nh3 > 5e-1_wp) then - rho1 = v_rs_ws(j, k, l, eqn_idx%cont%beg)/aC - rho2 = v_rs_ws(j, k, l, eqn_idx%cont%end)/(1._wp - aC) + rho1 = v_rs_ws(${SF('')}$, eqn_idx%cont%beg)/aC + rho2 = v_rs_ws(${SF('')}$, eqn_idx%cont%end)/(1._wp - aC) ! Left face aTHINC = f_mthinc_face_average(nh1, nh2, nh3, d_local, ic_beta, ${REC_DIR}$, -5e-1_wp, & & num_dims) if (aTHINC < ic_eps) aTHINC = ic_eps if (aTHINC > 1._wp - ic_eps) aTHINC = 1._wp - ic_eps - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%beg) = rho1*aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%end) = rho2*(1._wp - aTHINC) - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%beg) = aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%cont%beg) = rho1*aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%cont%end) = rho2*(1._wp - aTHINC) + vL_rs_vf_x(j, k, l, eqn_idx%adv%beg) = aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC ! Right face aTHINC = f_mthinc_face_average(nh1, nh2, nh3, d_local, ic_beta, ${REC_DIR}$, 5e-1_wp, & & num_dims) if (aTHINC < ic_eps) aTHINC = ic_eps if (aTHINC > 1._wp - ic_eps) aTHINC = 1._wp - ic_eps - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%beg) = rho1*aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%end) = rho2*(1._wp - aTHINC) - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%beg) = aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%cont%beg) = rho1*aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%cont%end) = rho2*(1._wp - aTHINC) + vR_rs_vf_x(j, k, l, eqn_idx%adv%beg) = aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC end if else ! THINC moncon = (aCR - aC)*(aC - aCL) @@ -376,26 +372,26 @@ contains B = exp(sgn*beta_eff*(2._wp*C - 1._wp)) A = (B/cosh(beta_eff) - 1._wp)/tanh(beta_eff) - rho_b = v_rs_ws(j, k, l, eqn_idx%cont%beg)/aC - rho_e = v_rs_ws(j, k, l, eqn_idx%cont%end)/(1._wp - aC) + rho_b = v_rs_ws(${SF('')}$, eqn_idx%cont%beg)/aC + rho_e = v_rs_ws(${SF('')}$, eqn_idx%cont%end)/(1._wp - aC) ! Left face aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sgn*A) if (aTHINC < ic_eps) aTHINC = ic_eps if (aTHINC > 1._wp - ic_eps) aTHINC = 1._wp - ic_eps - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%beg) = rho_b*aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%end) = rho_e*(1._wp - aTHINC) - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%beg) = aTHINC - vL_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%cont%beg) = rho_b*aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%cont%end) = rho_e*(1._wp - aTHINC) + vL_rs_vf_x(j, k, l, eqn_idx%adv%beg) = aTHINC + vL_rs_vf_x(j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC ! Right face aTHINC = qmin + 5e-1_wp*qmax*(1._wp + sgn*(tanh(beta_eff) + A)/(1._wp + A*tanh(beta_eff))) if (aTHINC < ic_eps) aTHINC = ic_eps if (aTHINC > 1._wp - ic_eps) aTHINC = 1._wp - ic_eps - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%beg) = rho_b*aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%cont%end) = rho_e*(1._wp - aTHINC) - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%beg) = aTHINC - vR_rs_vf_${XYZ}$ (j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%cont%beg) = rho_b*aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%cont%end) = rho_e*(1._wp - aTHINC) + vR_rs_vf_x(j, k, l, eqn_idx%adv%beg) = aTHINC + vR_rs_vf_x(j, k, l, eqn_idx%adv%end) = 1._wp - aTHINC end if end if end if diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 130053fce5..4d142eca55 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -471,10 +471,10 @@ contains if (s == 1) then if (run_time_info) then - if (igr .or. dummy) then + if (igr) then call s_write_run_time_information(q_cons_ts(1)%vf, t_step) end if - if (.not. igr .or. dummy) then + if (.not. igr) then call s_write_run_time_information(q_prim_vf, t_step) end if end if @@ -638,7 +638,7 @@ contains real(wp) :: dt_local integer :: j, k, l !< Generic loop iterators - if (.not. igr .or. dummy) then + if (.not. igr) then call s_convert_conservative_to_primitive_variables(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, idwint) end if diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index dfa32e5c98..55cdcc343e 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -478,16 +478,14 @@ contains end subroutine s_compute_viscous_stress_cylindrical_boundary !> Computes viscous terms - subroutine s_get_viscous(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, & + subroutine s_get_viscous(qL_prim_rsx_vf, dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, & - & qL_prim, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n, qR_prim, & - & q_prim_qp, dq_prim_dx_qp, dq_prim_dy_qp, dq_prim_dz_qp, ix, iy, iz) + & qL_prim, qR_prim_rsx_vf, dqR_prim_dx_n, dqR_prim_dy_n, dqR_prim_dz_n, qR_prim, q_prim_qp, dq_prim_dx_qp, dq_prim_dy_qp, & + & dq_prim_dz_qp, ix, iy, iz) - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf, & - & qL_prim_rsy_vf, qR_prim_rsy_vf, qL_prim_rsz_vf, qR_prim_rsz_vf - - type(vector_field), dimension(num_dims), intent(inout) :: qL_prim, qR_prim - type(vector_field), intent(in) :: q_prim_qp + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: qL_prim_rsx_vf, qR_prim_rsx_vf + type(vector_field), dimension(num_dims), intent(inout) :: qL_prim, qR_prim + type(vector_field), intent(in) :: q_prim_qp type(vector_field), dimension(1:num_dims), intent(inout) :: dqL_prim_dx_n, dqR_prim_dx_n, dqL_prim_dy_n, dqR_prim_dy_n, & & dqL_prim_dz_n, dqR_prim_dz_n @@ -495,17 +493,15 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz integer :: i, j, k, l - do i = 1, num_dims - iv%beg = eqn_idx%mom%beg; iv%end = eqn_idx%mom%end - - $:GPU_UPDATE(device='[iv]') - - call s_reconstruct_cell_boundary_values_visc(q_prim_qp%vf(iv%beg:iv%end), qL_prim_rsx_vf, qL_prim_rsy_vf, & - & qL_prim_rsz_vf, qR_prim_rsx_vf, qR_prim_rsy_vf, qR_prim_rsz_vf, i, qL_prim(i)%vf(iv%beg:iv%end), & - & qR_prim(i)%vf(iv%beg:iv%end), ix, iy, iz) - end do + iv%beg = eqn_idx%mom%beg; iv%end = eqn_idx%mom%end + $:GPU_UPDATE(device='[iv]') if (weno_Re_flux) then + do i = 1, num_dims + call s_reconstruct_cell_boundary_values_visc(q_prim_qp%vf(iv%beg:iv%end), qL_prim_rsx_vf, qR_prim_rsx_vf, i, & + & qL_prim(i)%vf(iv%beg:iv%end), qR_prim(i)%vf(iv%beg:iv%end), ix, iy, iz) + end do + ! Compute velocity gradients via divergence theorem on cell-boundary reconstructed values do i = 1, num_dims if (i == 1) then @@ -848,20 +844,20 @@ contains end subroutine s_get_viscous !> Reconstruct left and right cell-boundary values of viscous primitive variables - subroutine s_reconstruct_cell_boundary_values_visc(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir, vL_prim_vf, & + subroutine s_reconstruct_cell_boundary_values_visc(v_vf, vL_x, vR_x, norm_dir, vL_prim_vf, & & vR_prim_vf, ix, iy, iz) type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf type(scalar_field), dimension(iv%beg:iv%end), intent(inout) :: vL_prim_vf, vR_prim_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vL_y, vL_z, vR_x, vR_y, vR_z + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_x, vR_x integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz integer :: recon_dir !< Coordinate direction of the WENO reconstruction integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] - if (recon_type == ${TYPE}$ .or. dummy) then + if (recon_type == ${TYPE}$) then ! Reconstruction in s1-direction if (norm_dir == 1) then @@ -879,25 +875,13 @@ contains end if $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]') - if (n > 0) then - if (p > 0) then - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:, & - & iv%beg:iv%end), recon_dir, is1_viscous, is2_viscous, is3_viscous) - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & :), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:,:), recon_dir, & - & is1_viscous, is2_viscous, is3_viscous) - end if - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,:), vL_z(:,:,:,:), vR_x(:,:,:, & - & iv%beg:iv%end), vR_y(:,:,:,:), vR_z(:,:,:,:), recon_dir, is1_viscous, is2_viscous, & - & is3_viscous) - end if + + call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), recon_dir, & + & is1_viscous, is2_viscous, is3_viscous) end if #:endfor - if (viscous .or. dummy) then + if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) @@ -905,8 +889,8 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + vL_prim_vf(i)%sf(k, j, l) = vL_x(k, j, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_x(k, j, l, i) end do end do end do @@ -918,8 +902,8 @@ contains do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + vL_prim_vf(i)%sf(l, k, j) = vL_x(l, k, j, i) + vR_prim_vf(i)%sf(l, k, j) = vR_x(l, k, j, i) end do end do end do @@ -945,17 +929,16 @@ contains end subroutine s_reconstruct_cell_boundary_values_visc !> Reconstruct left and right cell-boundary values of viscous primitive variable derivatives - subroutine s_reconstruct_cell_boundary_values_visc_deriv(v_vf, vL_x, vL_y, vL_z, vR_x, vR_y, vR_z, norm_dir, vL_prim_vf, & + subroutine s_reconstruct_cell_boundary_values_visc_deriv(v_vf, vL_x, vR_x, norm_dir, vL_prim_vf, & & vR_prim_vf, ix, iy, iz) - type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(inout) :: vL_x, vL_y, vL_z, vR_x, & - & vR_y, vR_z + type(scalar_field), dimension(iv%beg:iv%end), intent(in) :: v_vf + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,iv%beg:), intent(inout) :: vL_x, vR_x type(scalar_field), dimension(iv%beg:iv%end), intent(inout) :: vL_prim_vf, vR_prim_vf - type(int_bounds_info), intent(in) :: ix, iy, iz - integer, intent(in) :: norm_dir - integer :: recon_dir !< Coordinate direction of the WENO reconstruction - integer :: i, j, k, l + type(int_bounds_info), intent(in) :: ix, iy, iz + integer, intent(in) :: norm_dir + integer :: recon_dir !< Coordinate direction of the WENO reconstruction + integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] if (recon_type == ${TYPE}$) then @@ -975,25 +958,13 @@ contains is1_viscous%end = is1_viscous%end - ${SCHEME}$_polyn end if $:GPU_UPDATE(device='[is1_viscous, is2_viscous, is3_viscous, iv]') - if (n > 0) then - if (p > 0) then - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:, & - & iv%beg:iv%end), recon_dir, is1_viscous, is2_viscous, is3_viscous) - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,iv%beg:iv%end), vL_z(:,:,:, & - & :), vR_x(:,:,:,iv%beg:iv%end), vR_y(:,:,:,iv%beg:iv%end), vR_z(:,:,:,:), recon_dir, & - & is1_viscous, is2_viscous, is3_viscous) - end if - else - call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vL_y(:,:,:,:), vL_z(:,:,:,:), vR_x(:,:,:, & - & iv%beg:iv%end), vR_y(:,:,:,:), vR_z(:,:,:,:), recon_dir, is1_viscous, is2_viscous, & - & is3_viscous) - end if + + call s_${SCHEME}$ (v_vf(iv%beg:iv%end), vL_x(:,:,:,iv%beg:iv%end), vR_x(:,:,:,iv%beg:iv%end), recon_dir, & + & is1_viscous, is2_viscous, is3_viscous) end if #:endfor - if (viscous .or. dummy) then + if (viscous) then if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) @@ -1001,8 +972,8 @@ contains do l = is3_viscous%beg, is3_viscous%end do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) + vL_prim_vf(i)%sf(k, j, l) = vL_x(k, j, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_x(k, j, l, i) end do end do end do @@ -1014,8 +985,8 @@ contains do j = is1_viscous%beg, is1_viscous%end do k = is2_viscous%beg, is2_viscous%end do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) + vL_prim_vf(i)%sf(l, k, j) = vL_x(l, k, j, i) + vR_prim_vf(i)%sf(l, k, j) = vR_x(l, k, j, i) end do end do end do diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 5ccd85756f..78e7678b1a 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -16,17 +16,13 @@ module m_weno use m_thinc, only: s_thinc_compression use m_nvtx - private; public :: s_initialize_weno_module, s_initialize_weno, s_finalize_weno_module, s_weno + private; public :: s_initialize_weno_module, s_finalize_weno_module, s_weno, s_pack_weno_input_arr - !> @name The cell-average variables that will be WENO-reconstructed. Formerly, they are stored in v_vf. However, they are - !! transferred to v_rs_wsL and v_rs_wsR as to be reshaped (RS) and/or characteristically decomposed. The reshaping allows the - !! WENO procedure to be independent of the coordinate direction of the reconstruction. Lastly, notice that the left (L) and - !! right (R) results of the characteristic decomposition are stored in custom-constructed WENO- stencils (WS) that are annexed - !! to each position of a given scalar field. + !> @name The cell-average variables that will be WENO-reconstructed unpacked into an array for performance !> @{ - real(wp), allocatable, dimension(:,:,:,:) :: v_rs_ws_x, v_rs_ws_y, v_rs_ws_z + real(wp), allocatable, dimension(:,:,:,:) :: v_rs_weno !> @} - $:GPU_DECLARE(create='[v_rs_ws_x, v_rs_ws_y, v_rs_ws_z]') + $:GPU_DECLARE(create='[v_rs_weno]') ! WENO Coefficients @@ -123,7 +119,7 @@ contains call s_compute_weno_coefficients(1, is1_weno) - @:ALLOCATE(v_rs_ws_x(is1_weno%beg:is1_weno%end, is2_weno%beg:is2_weno%end, is3_weno%beg:is3_weno%end, 1:sys_size)) + @:ALLOCATE(v_rs_weno(is1_weno%beg:is1_weno%end, is2_weno%beg:is2_weno%end, is3_weno%beg:is3_weno%end, 1:sys_size)) ! Allocating/Computing WENO Coefficients in y-direction if (n == 0) return @@ -150,8 +146,6 @@ contains call s_compute_weno_coefficients(2, is2_weno) - @:ALLOCATE(v_rs_ws_y(is2_weno%beg:is2_weno%end, is1_weno%beg:is1_weno%end, is3_weno%beg:is3_weno%end, 1:sys_size)) - ! Allocating/Computing WENO Coefficients in z-direction if (p == 0) return @@ -170,8 +164,6 @@ contains call s_compute_weno_coefficients(3, is3_weno) - @:ALLOCATE(v_rs_ws_z(is3_weno%beg:is3_weno%end, is2_weno%beg:is2_weno%end, is1_weno%beg:is1_weno%end, 1:sys_size)) - end subroutine s_initialize_weno_module !> Compute WENO polynomial coefficients, ideal weights, and smoothness indicators for a given direction @@ -873,14 +865,33 @@ contains end subroutine s_compute_weno_coefficients + subroutine s_pack_weno_input_arr(v_vf) + + type(scalar_field), dimension(1:), intent(in) :: v_vf + integer :: i, j, k, l, n_vars + + $:GPU_PARALLEL_LOOP(collapse=4) + do i = 1, v_size + do l = idwbuff(3)%beg, idwbuff(3)%end + do k = idwbuff(2)%beg, idwbuff(2)%end + do j = idwbuff(1)%beg, idwbuff(1)%end + v_rs_weno(j, k, l, i) = v_vf(i)%sf(j, k, l) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_pack_weno_input_arr + !> Perform WENO reconstruction of left and right cell-boundary values from cell-averaged variables - subroutine s_weno(v_vf, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z, weno_dir, is1_weno_d, & + subroutine s_weno(v_vf, vL_rs_vf_x, vR_rs_vf_x, weno_dir, is1_weno_d, & & is2_weno_d, is3_weno_d) type(scalar_field), dimension(1:), intent(in) :: v_vf - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z - real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_rs_vf_x, vR_rs_vf_y, vR_rs_vf_z + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf_x + real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vR_rs_vf_x integer, intent(in) :: weno_dir type(int_bounds_info), intent(in) :: is1_weno_d, is2_weno_d, is3_weno_d @@ -902,6 +913,7 @@ contains real(wp), dimension(-3:3) :: v !< temporary field value array for clarity (WENO7 only) real(wp) :: tau integer :: i, j, k, l, q + real(wp) :: vp0, vp1, vp2, vp3, vm1, vm2, vm3 is1_weno = is1_weno_d is2_weno = is2_weno_d @@ -909,14 +921,13 @@ contains $:GPU_UPDATE(device='[is1_weno, is2_weno, is3_weno]') - if (weno_order /= 1 .or. dummy) then - call s_initialize_weno(v_vf, weno_dir) - end if + v_size = ubound(v_vf, 1) + $:GPU_UPDATE(device='[v_size]') - if (weno_order == 1 .or. dummy) then + if (weno_order == 1) then if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) + do i = 1, v_size do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -929,12 +940,12 @@ contains $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) + do i = 1, v_size do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_rs_vf_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + do j = is1_weno%beg, is1_weno%end + do k = is2_weno%beg, is2_weno%end + vL_rs_vf_x(k, j, l, i) = v_vf(i)%sf(k, j, l) + vR_rs_vf_x(k, j, l, i) = v_vf(i)%sf(k, j, l) end do end do end do @@ -942,12 +953,12 @@ contains $:END_GPU_PARALLEL_LOOP() else if (weno_dir == 3) then $:GPU_PARALLEL_LOOP(collapse=4) - do i = 1, ubound(v_vf, 1) - do l = is3_weno%beg, is3_weno%end + do i = 1, v_size + do j = is1_weno%beg, is1_weno%end do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - vL_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_rs_vf_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + do l = is3_weno%beg, is3_weno%end + vL_rs_vf_x(l, k, j, i) = v_vf(i)%sf(l, k, j) + vR_rs_vf_x(l, k, j, i) = v_vf(i)%sf(l, k, j) end do end do end do @@ -955,83 +966,93 @@ contains $:END_GPU_PARALLEL_LOOP() end if end if - if (weno_order == 3 .or. dummy) then - #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + + if (weno_order /= 1) then + call s_pack_weno_input_arr(v_vf) + end if + + if (weno_order == 3) then + #:for WENO_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1_weno', 'is2_weno', 'is3_weno'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2_weno', 'is1_weno', 'is3_weno'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3_weno', 'is2_weno', 'is1_weno')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau, q]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end + $:GPU_PARALLEL_LOOP(collapse=4,private='[beta, dvd, poly, omega, alpha, tau, q, vp0, vp1, vm1]') + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end do i = 1, v_size ! reconstruct from left side alpha(:) = 0._wp - omega(:) = 0._wp - beta(:) = weno_eps - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) - v_rs_ws_${XYZ}$ (j - 1, k, l, i) + vp0 = v_rs_weno(${SF('')}$, i) + vm1 = v_rs_weno(${SF(' - 1')}$, i) + vp1 = v_rs_weno(${SF(' + 1')}$, i) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 1, 0)*dvd(-1) + dvd(0) = vp1 - vp0 + dvd(-1) = vp0 - vm1 - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(0)*dvd(0) + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(-1)*dvd(-1) + weno_eps + poly(0) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 0, 0)*dvd(0) + poly(1) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 1, 0)*dvd(-1) + + beta(0) = beta_coef_${XYZ}$ (${SV}$, 0, 0)*dvd(0)*dvd(0) + weno_eps + beta(1) = beta_coef_${XYZ}$ (${SV}$, 1, 0)*dvd(-1)*dvd(-1) + weno_eps if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + alpha(q) = (d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbL_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then ! Borges, et al. (2008) tau = abs(beta(1) - beta(0)) do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + tau/beta(q)) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + tau/beta(q)) end do end if - omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vL_rs_vf_x(j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) ! reconstruct from right side - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 0, 0)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) + poly(0) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 0, 0)*dvd(0) + poly(1) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 1, 0)*dvd(-1) if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + alpha(q) = (d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbR_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + tau/beta(q)) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + tau/beta(q)) end do end if - omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + vR_rs_vf_x(j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) end do end do end do @@ -1040,37 +1061,44 @@ contains end if #:endfor end if - if (weno_order == 5 .or. dummy) then + if (weno_order == 5) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 1 - #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for WENO_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1_weno', 'is2_weno', 'is3_weno'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2_weno', 'is1_weno', 'is3_weno'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3_weno', 'is2_weno', 'is1_weno')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd, poly, beta, alpha, omega, tau, delta, q]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end + $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd, poly, beta, alpha, omega, tau, delta, q, vp0, vm1, vm2, & + & vp1, vp2]') + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size ! reconstruct from left side alpha(:) = 0._wp - omega(:) = 0._wp - delta(:) = 0._wp - beta(:) = weno_eps - - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 0, & - & 0)*dvd(1) + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 1, & - & 0)*dvd(0) + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 2, & - & 0)*dvd(-1) + poly_coef_cbL_${XYZ}$ (j, 2, 1)*dvd(-2) - - ! Jiang & Shu (1996) sum-of-squares form on uniform grids: all terms non-negative, no - ! cancellation. On non-uniform grids, fall back to precomputed coefficients. + + vp0 = v_rs_weno(${SF('')}$, i) + vm1 = v_rs_weno(${SF(' - 1')}$, i) + vm2 = v_rs_weno(${SF(' - 2')}$, i) + vp1 = v_rs_weno(${SF(' + 1')}$, i) + vp2 = v_rs_weno(${SF(' + 2')}$, i) + + dvd(1) = vp2 - vp1 + dvd(0) = vp1 - vp0 + dvd(-1) = vp0 - vm1 + dvd(-2) = vm1 - vm2 + + poly(0) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 0, & + & 0)*dvd(1) + poly_coef_cbL_${XYZ}$ (${SV}$, 0, 1)*dvd(0) + poly(1) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 1, & + & 0)*dvd(0) + poly_coef_cbL_${XYZ}$ (${SV}$, 1, 1)*dvd(-1) + poly(2) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 2, & + & 0)*dvd(-1) + poly_coef_cbL_${XYZ}$ (${SV}$, 2, 1)*dvd(-2) + if (uniform_grid(${WENO_DIR}$)) then beta(0) = 13._wp/12._wp*(dvd(1) - dvd(0))**2 + 0.25_wp*(dvd(1) - 3._wp*dvd(0))**2 & & + weno_eps @@ -1078,46 +1106,49 @@ contains beta(2) = 13._wp/12._wp*(dvd(-1) - dvd(-2))**2 + 0.25_wp*(3._wp*dvd(-1) - dvd(-2))**2 & & + weno_eps else - beta(0) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (j, 0, & - & 1)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (j, 0, 2)*dvd(0)*dvd(0) + weno_eps - beta(1) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (j, 1, & - & 1)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (j, 1, 2)*dvd(-1)*dvd(-1) + weno_eps - beta(2) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (j, 2, & - & 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (j, 2, 2)*dvd(-2)*dvd(-2) + weno_eps + beta(0) = beta_coef_${XYZ}$ (${SV}$, 0, 0)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (${SV}$, & + & 0, 1)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, 0, 2)*dvd(0)*dvd(0) + weno_eps + beta(1) = beta_coef_${XYZ}$ (${SV}$, 1, 0)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, & + & 1, 1)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 1, & + & 2)*dvd(-1)*dvd(-1) + weno_eps + beta(2) = beta_coef_${XYZ}$ (${SV}$, 2, & + & 0)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 2, & + & 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (${SV}$, 2, 2)*dvd(-2)*dvd(-2) + weno_eps end if if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + alpha(q) = (d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbL_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then ! Borges, et al. (2008) + tau = abs(beta(2) - beta(0)) ! Equation 25 $:GPU_LOOP(parallelism='[seq]') - do q = 0, weno_num_stencils ! Equation 28 (note: weno_eps was already added to beta) - alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + (tau/beta(q))) + ! Equation 28 (note: weno_eps was already added to beta) end do else if (teno) then ! Fu, et al. (2016) Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 - tau = abs(beta(2) - beta(0)) ! Equation 25 + tau = abs(beta(2) - beta(0)) $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha(q) = 1._wp + tau/beta(q) + alpha(q) = 1._wp + tau/beta(q) ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) alpha(q) = (alpha(q)**3._wp)**2._wp end do - omega = alpha/sum(alpha) + omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils @@ -1126,7 +1157,7 @@ contains else delta(q) = 1._wp end if - alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 + alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, ${SV}$) ! Equation 27 end do end if @@ -1134,40 +1165,40 @@ contains omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2)) omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2)) - vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + vL_rs_vf_x(j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) ! reconstruct from right side - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 0, & - & 0)*dvd(1) + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(0) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 1, & - & 0)*dvd(0) + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(-1) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 2, & - & 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) + poly(0) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 0, & + & 0)*dvd(1) + poly_coef_cbR_${XYZ}$ (${SV}$, 0, 1)*dvd(0) + poly(1) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 1, & + & 0)*dvd(0) + poly_coef_cbR_${XYZ}$ (${SV}$, 1, 1)*dvd(-1) + poly(2) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 2, & + & 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (${SV}$, 2, 1)*dvd(-2) if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + alpha(q) = (d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbR_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + (tau/beta(q))) end do else if (teno) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, ${SV}$) end do end if @@ -1175,7 +1206,7 @@ contains omega(1) = alpha(1)/(alpha(0) + alpha(1) + alpha(2)) omega(2) = alpha(2)/(alpha(0) + alpha(1) + alpha(2)) - vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + vR_rs_vf_x(j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) end do end do end do @@ -1183,50 +1214,68 @@ contains $:END_GPU_PARALLEL_LOOP() if (mp_weno) then - call s_preserve_monotonicity(v_rs_ws_${XYZ}$, vL_rs_vf_${XYZ}$, vR_rs_vf_${XYZ}$) + call s_preserve_monotonicity(v_rs_weno, vL_rs_vf_x, vR_rs_vf_x, weno_dir) end if end if #:endfor #:endif end if - if (weno_order == 7 .or. dummy) then + if (weno_order == 7) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 2 - #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] + #:for WENO_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1_weno', 'is2_weno', 'is3_weno'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2_weno', 'is1_weno', 'is3_weno'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3_weno', 'is2_weno', 'is1_weno')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[poly, beta, alpha, omega, tau, delta, dvd, v, q]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end + $:GPU_PARALLEL_LOOP(collapse=3,private='[poly, beta, alpha, omega, tau, delta, dvd, v, q, vp0, vp1, vp2, & + & vp3, vm1, vm2, vm3]') + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size alpha(:) = 0._wp - omega(:) = 0._wp - delta(:) = 0._wp - beta(:) = weno_eps - ! temporary field value array for clarity - if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3,k, l, i) + vp0 = v_rs_weno(${SF('')}$, i) + vm1 = v_rs_weno(${SF(' - 1')}$, i) + vm2 = v_rs_weno(${SF(' - 2')}$, i) + vm3 = v_rs_weno(${SF(' - 3')}$, i) + vp1 = v_rs_weno(${SF(' + 1')}$, i) + vp2 = v_rs_weno(${SF(' + 2')}$, i) + vp3 = v_rs_weno(${SF(' + 3')}$, i) + + if (teno) then + v(-3) = vm3 + v(-2) = vm2 + v(-1) = vm1 + v(0) = vp0 + v(1) = vp1 + v(2) = vp2 + v(3) = vp3 + end if if (.not. teno) then - dvd(2) = v_rs_ws_${XYZ}$ (j + 3, k, l, i) - v_rs_ws_${XYZ}$ (j + 2, k, l, i) - dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) - v_rs_ws_${XYZ}$ (j + 1, k, l, i) - dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) - v_rs_ws_${XYZ}$ (j, k, l, i) - dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) - v_rs_ws_${XYZ}$ (j - 1, k, l, i) - dvd(-2) = v_rs_ws_${XYZ}$ (j - 1, k, l, i) - v_rs_ws_${XYZ}$ (j - 2, k, l, i) - dvd(-3) = v_rs_ws_${XYZ}$ (j - 2, k, l, i) - v_rs_ws_${XYZ}$ (j - 3, k, l, i) - - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 0, & - & 0)*dvd(2) + poly_coef_cbL_${XYZ}$ (j, 0, 1)*dvd(1) + poly_coef_cbL_${XYZ}$ (j, & - & 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 1, & - & 0)*dvd(1) + poly_coef_cbL_${XYZ}$ (j, 1, 1)*dvd(0) + poly_coef_cbL_${XYZ}$ (j, & - & 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 2, & - & 0)*dvd(0) + poly_coef_cbL_${XYZ}$ (j, 2, & - & 1)*dvd(-1) + poly_coef_cbL_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbL_${XYZ}$ (j, 3, & - & 0)*dvd(-1) + poly_coef_cbL_${XYZ}$ (j, 3, & - & 1)*dvd(-2) + poly_coef_cbL_${XYZ}$ (j, 3, 2)*dvd(-3) + dvd(2) = vp3 - vp2 + dvd(1) = vp2 - vp1 + dvd(0) = vp1 - vp0 + dvd(-1) = vp0 - vm1 + dvd(-2) = vm1 - vm2 + dvd(-3) = vm2 - vm3 + + poly(3) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 0, & + & 0)*dvd(2) + poly_coef_cbL_${XYZ}$ (${SV}$, 0, & + & 1)*dvd(1) + poly_coef_cbL_${XYZ}$ (${SV}$, 0, 2)*dvd(0) + poly(2) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 1, & + & 0)*dvd(1) + poly_coef_cbL_${XYZ}$ (${SV}$, 1, & + & 1)*dvd(0) + poly_coef_cbL_${XYZ}$ (${SV}$, 1, 2)*dvd(-1) + poly(1) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 2, & + & 0)*dvd(0) + poly_coef_cbL_${XYZ}$ (${SV}$, 2, & + & 1)*dvd(-1) + poly_coef_cbL_${XYZ}$ (${SV}$, 2, 2)*dvd(-2) + poly(0) = vp0 + poly_coef_cbL_${XYZ}$ (${SV}$, 3, & + & 0)*dvd(-1) + poly_coef_cbL_${XYZ}$ (${SV}$, 3, & + & 1)*dvd(-2) + poly_coef_cbL_${XYZ}$ (${SV}$, 3, 2)*dvd(-3) else #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 ! (Fu, et al., 2016) Table 1 Note: Unlike TENO5, TENO7 stencils differ from WENO7 @@ -1242,29 +1291,30 @@ contains end if if (.not. teno) then - beta(3) = beta_coef_${XYZ}$ (j, 0, 0)*dvd(2)*dvd(2) + beta_coef_${XYZ}$ (j, 0, & - & 1)*dvd(2)*dvd(1) + beta_coef_${XYZ}$ (j, 0, & - & 2)*dvd(2)*dvd(0) + beta_coef_${XYZ}$ (j, 0, & - & 3)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (j, 0, & - & 4)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (j, 0, 5)*dvd(0)*dvd(0) + weno_eps - - beta(2) = beta_coef_${XYZ}$ (j, 1, 0)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (j, 1, & - & 1)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (j, 1, & - & 2)*dvd(1)*dvd(-1) + beta_coef_${XYZ}$ (j, 1, & - & 3)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (j, 1, & - & 4)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (j, 1, 5)*dvd(-1)*dvd(-1) + weno_eps - - beta(1) = beta_coef_${XYZ}$ (j, 2, 0)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (j, 2, & - & 1)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (j, 2, & - & 2)*dvd(0)*dvd(-2) + beta_coef_${XYZ}$ (j, 2, & - & 3)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (j, 2, & - & 4)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (j, 2, 5)*dvd(-2)*dvd(-2) + weno_eps - - beta(0) = beta_coef_${XYZ}$ (j, 3, 0)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (j, 3, & - & 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (j, 3, & - & 2)*dvd(-1)*dvd(-3) + beta_coef_${XYZ}$ (j, 3, & - & 3)*dvd(-2)*dvd(-2) + beta_coef_${XYZ}$ (j, 3, & - & 4)*dvd(-2)*dvd(-3) + beta_coef_${XYZ}$ (j, 3, 5)*dvd(-3)*dvd(-3) + weno_eps + beta(3) = beta_coef_${XYZ}$ (${SV}$, 0, 0)*dvd(2)*dvd(2) + beta_coef_${XYZ}$ (${SV}$, & + & 0, 1)*dvd(2)*dvd(1) + beta_coef_${XYZ}$ (${SV}$, 0, & + & 2)*dvd(2)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, 0, & + & 3)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (${SV}$, 0, & + & 4)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, 0, 5)*dvd(0)*dvd(0) + weno_eps + + beta(2) = beta_coef_${XYZ}$ (${SV}$, 1, 0)*dvd(1)*dvd(1) + beta_coef_${XYZ}$ (${SV}$, & + & 1, 1)*dvd(1)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, 1, & + & 2)*dvd(1)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 1, & + & 3)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, 1, & + & 4)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 1, 5)*dvd(-1)*dvd(-1) + weno_eps + + beta(1) = beta_coef_${XYZ}$ (${SV}$, 2, 0)*dvd(0)*dvd(0) + beta_coef_${XYZ}$ (${SV}$, & + & 2, 1)*dvd(0)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 2, & + & 2)*dvd(0)*dvd(-2) + beta_coef_${XYZ}$ (${SV}$, 2, & + & 3)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 2, & + & 4)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (${SV}$, 2, 5)*dvd(-2)*dvd(-2) + weno_eps + + beta(0) = beta_coef_${XYZ}$ (${SV}$, 3, & + & 0)*dvd(-1)*dvd(-1) + beta_coef_${XYZ}$ (${SV}$, 3, & + & 1)*dvd(-1)*dvd(-2) + beta_coef_${XYZ}$ (${SV}$, 3, & + & 2)*dvd(-1)*dvd(-3) + beta_coef_${XYZ}$ (${SV}$, 3, & + & 3)*dvd(-2)*dvd(-2) + beta_coef_${XYZ}$ (${SV}$, 3, & + & 4)*dvd(-2)*dvd(-3) + beta_coef_${XYZ}$ (${SV}$, 3, 5)*dvd(-3)*dvd(-3) + weno_eps else #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 ! High-Order Low-Dissipation Targeted ENO Schemes for Ideal Magnetohydrodynamics (Fu @@ -1288,17 +1338,17 @@ contains if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbL_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbL_${XYZ}$ (q, j)*(1._wp + d_cbL_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, j)))) + alpha(q) = (d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbL_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbL_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbL_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then ! Castro, et al. (2010) Don & Borges (2013) also helps @@ -1306,7 +1356,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils ! wenoz_q = 2,3,4 for stability - alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) + alpha(q) = d_cbL_${XYZ}$ (q, ${SV}$)*(1._wp + (tau/beta(q))**wenoz_q) end do else if (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 @@ -1322,36 +1372,35 @@ contains else delta(q) = 1._wp end if - alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 + alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, ${SV}$) ! Equation 27 end do #:endif end if omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, & - & i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3) & - & *poly(3) + vL_rs_vf_x(j, k, l, & + & i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) if (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - vL_rs_vf_${XYZ}$ (j, k, l, i) = vL_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) + vL_rs_vf_x(j, k, l, i) = vL_rs_vf_x(j, k, l, i) + omega(4)*poly(4) #:endif end if if (.not. teno) then - poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 0, & - & 0)*dvd(2) + poly_coef_cbR_${XYZ}$ (j, 0, 1)*dvd(1) + poly_coef_cbR_${XYZ}$ (j, & - & 0, 2)*dvd(0) - poly(2) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 1, & - & 0)*dvd(1) + poly_coef_cbR_${XYZ}$ (j, 1, 1)*dvd(0) + poly_coef_cbR_${XYZ}$ (j, & - & 1, 2)*dvd(-1) - poly(1) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 2, & - & 0)*dvd(0) + poly_coef_cbR_${XYZ}$ (j, 2, & - & 1)*dvd(-1) + poly_coef_cbR_${XYZ}$ (j, 2, 2)*dvd(-2) - poly(0) = v_rs_ws_${XYZ}$ (j, k, l, i) + poly_coef_cbR_${XYZ}$ (j, 3, & - & 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (j, 3, & - & 1)*dvd(-2) + poly_coef_cbR_${XYZ}$ (j, 3, 2)*dvd(-3) + poly(3) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 0, & + & 0)*dvd(2) + poly_coef_cbR_${XYZ}$ (${SV}$, 0, & + & 1)*dvd(1) + poly_coef_cbR_${XYZ}$ (${SV}$, 0, 2)*dvd(0) + poly(2) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 1, & + & 0)*dvd(1) + poly_coef_cbR_${XYZ}$ (${SV}$, 1, & + & 1)*dvd(0) + poly_coef_cbR_${XYZ}$ (${SV}$, 1, 2)*dvd(-1) + poly(1) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 2, & + & 0)*dvd(0) + poly_coef_cbR_${XYZ}$ (${SV}$, 2, & + & 1)*dvd(-1) + poly_coef_cbR_${XYZ}$ (${SV}$, 2, 2)*dvd(-2) + poly(0) = vp0 + poly_coef_cbR_${XYZ}$ (${SV}$, 3, & + & 0)*dvd(-1) + poly_coef_cbR_${XYZ}$ (${SV}$, 3, & + & 1)*dvd(-2) + poly_coef_cbR_${XYZ}$ (${SV}$, 3, 2)*dvd(-3) else #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 poly(0) = (-1._wp*v(-1) + 5._wp*v(0) + 2._wp*v(1))/6._wp @@ -1364,40 +1413,39 @@ contains if (wenojs) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do else if (mapped_weno) then do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)/(beta(q)**2._wp) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)/(beta(q)**2._wp) end do omega = alpha/sum(alpha) do q = 0, weno_num_stencils - alpha(q) = (d_cbR_${XYZ}$ (q, j)*(1._wp + d_cbR_${XYZ}$ (q, & - & j) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & - & j)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, j)))) + alpha(q) = (d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + d_cbR_${XYZ}$ (q, & + & ${SV}$) - 3._wp*omega(q)) + omega(q)**2._wp)*(omega(q)/(d_cbR_${XYZ}$ (q, & + & ${SV}$)**2._wp + omega(q)*(1._wp - 2._wp*d_cbR_${XYZ}$ (q, ${SV}$)))) end do else if (wenoz) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils ! wenoz_q = 2,3,4 for stability - alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) + alpha(q) = d_cbR_${XYZ}$ (q, ${SV}$)*(1._wp + (tau/beta(q))**wenoz_q) end do else if (teno) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, ${SV}$) end do end if omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, & - & i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3) & - & *poly(3) + vR_rs_vf_x(j, k, l, & + & i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) if (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 - vR_rs_vf_${XYZ}$ (j, k, l, i) = vR_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) + vR_rs_vf_x(j, k, l, i) = vR_rs_vf_x(j, k, l, i) + omega(4)*poly(4) #:endif end if end do @@ -1412,84 +1460,18 @@ contains if (int_comp > 0 .and. v_size >= eqn_idx%adv%end) then call nvtxStartRange("WENO-INTCOMP") - #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] - if (weno_dir == ${WENO_DIR}$) then - call s_thinc_compression(v_rs_ws_${XYZ}$, vL_rs_vf_x, vL_rs_vf_y, vL_rs_vf_z, vR_rs_vf_x, vR_rs_vf_y, & - & vR_rs_vf_z, weno_dir, is1_weno, is2_weno, is3_weno) - end if - #:endfor + call s_thinc_compression(v_rs_weno, vL_rs_vf_x, vR_rs_vf_x, weno_dir, is1_weno, is2_weno, is3_weno) call nvtxEndRange() end if end subroutine s_weno - !> Set up the WENO reconstruction for a given direction - subroutine s_initialize_weno(v_vf, weno_dir) - - type(scalar_field), dimension(:), intent(in) :: v_vf - integer, intent(in) :: weno_dir - integer :: j, k, l, q - - ! Determine WENO-reconstructed variables and map coordinate directions - - v_size = ubound(v_vf, 1) - $:GPU_UPDATE(device='[v_size]') - - if (weno_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_x(k, l, q, j) = v_vf(j)%sf(k, l, q) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - ! Reshaping/Projecting onto Characteristic Fields in y-direction - if (n == 0) return - - if (weno_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_y(k, l, q, j) = v_vf(j)%sf(l, k, q) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - ! Reshaping/Projecting onto Characteristic Fields in z-direction - if (p == 0) return - - if (weno_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do j = 1, v_size - do q = is3_weno%beg, is3_weno%end - do l = is2_weno%beg, is2_weno%end - do k = is1_weno%beg - weno_polyn, is1_weno%end + weno_polyn - v_rs_ws_z(k, l, q, j) = v_vf(j)%sf(q, l, k) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if - - end subroutine s_initialize_weno - !> Enforce monotonicity-preserving bounds on the WENO reconstruction - subroutine s_preserve_monotonicity(v_rs_ws, vL_rs_vf, vR_rs_vf) + subroutine s_preserve_monotonicity(v_rs_ws, vL_rs_vf, vR_rs_vf, weno_dir) real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(in) :: v_rs_ws real(wp), dimension(idwbuff(1)%beg:,idwbuff(2)%beg:,idwbuff(3)%beg:,1:), intent(inout) :: vL_rs_vf, vR_rs_vf + integer, intent(in) :: weno_dir integer :: i, j, k, l real(wp), dimension(-1:1) :: d !< Curvature measures at the zone centers real(wp) :: d_MD, d_LC !< Median (md) curvature and large curvature (LC) measures @@ -1504,77 +1486,92 @@ contains real(wp), parameter :: beta = 4._wp/3._wp !< Local curvature freedom parameter real(wp), parameter :: alpha_mp = 2._wp real(wp), parameter :: beta_mp = 4._wp/3._wp + real(wp) :: vp0, vp1, vp2, vm1, vm2 + + #:for WENO_DIR, XYZ, STENCIL_VAR, COORDS, X_BND, Y_BND, Z_BND in & + [(1, 'x', 'j', '{STENCIL_IDX}, k, l', 'is1_weno', 'is2_weno', 'is3_weno'), & + (2, 'y', 'k', 'j, {STENCIL_IDX}, l', 'is2_weno', 'is1_weno', 'is3_weno'), & + (3, 'z', 'l', 'j, k, {STENCIL_IDX}', 'is3_weno', 'is2_weno', 'is1_weno')] + #:set SV = STENCIL_VAR + #:set SF = lambda offs: COORDS.format(STENCIL_IDX = SV + offs) + if (weno_dir == ${WENO_DIR}$) then + $:GPU_PARALLEL_LOOP(collapse=4,private='[d, vp0, vp1, vp2, vm1, vm2]') + do l = ${Z_BND}$%beg, ${Z_BND}$%end + do k = ${Y_BND}$%beg, ${Y_BND}$%end + do j = ${X_BND}$%beg, ${X_BND}$%end + do i = 1, v_size + ! Second-order undivided differences for curvature estimation + + vp0 = v_rs_ws(${SF('')}$, i) + vm1 = v_rs_ws(${SF(' - 1')}$, i) + vm2 = v_rs_ws(${SF(' - 2')}$, i) + vp1 = v_rs_ws(${SF(' + 1')}$, i) + vp2 = v_rs_ws(${SF(' + 2')}$, i) - $:GPU_PARALLEL_LOOP(collapse=4,private='[d]') - do l = is3_weno%beg, is3_weno%end - do k = is2_weno%beg, is2_weno%end - do j = is1_weno%beg, is1_weno%end - do i = 1, v_size - ! Second-order undivided differences for curvature estimation - d(-1) = v_rs_ws(j, k, l, i) + v_rs_ws(j - 2, k, l, i) - v_rs_ws(j - 1, k, l, i)*2._wp - d(0) = v_rs_ws(j + 1, k, l, i) + v_rs_ws(j - 1, k, l, i) - v_rs_ws(j, k, l, i)*2._wp - d(1) = v_rs_ws(j + 2, k, l, i) + v_rs_ws(j, k, l, i) - v_rs_ws(j + 1, k, l, i)*2._wp + d(-1) = vp0 + vm2 - vm1*2._wp + d(0) = vp1 + vm1 - vp0*2._wp + d(1) = vp2 + vp0 - vp1*2._wp - ! Median function for oscillation detection - d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1)))*abs((sign(1._wp, & - & 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1)))*(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, & - & d(0))))*min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + ! Median function for oscillation detection + d_MD = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1)))*abs((sign(1._wp, & + & 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1)))*(sign(1._wp, & + & 4._wp*d(-1) - d(0)) + sign(1._wp, d(0))))*min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + & abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0)))*abs((sign(1._wp, & - & 4._wp*d(0) - d(1)) + sign(1._wp, d(0)))*(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, & - & d(1))))*min(abs(4._wp*d(0) - d(1)), abs(d(0)), abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + d_LC = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0)))*abs((sign(1._wp, & + & 4._wp*d(0) - d(1)) + sign(1._wp, d(0)))*(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, & + & d(1))))*min(abs(4._wp*d(0) - d(1)), abs(d(0)), abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - vL_UL = v_rs_ws(j, k, l, i) - (v_rs_ws(j + 1, k, l, i) - v_rs_ws(j, k, l, i))*alpha_mp + vL_UL = vp0 - (vp1 - vp0)*alpha_mp - vL_MD = (v_rs_ws(j, k, l, i) + v_rs_ws(j - 1, k, l, i) - d_MD)*5.e-1_wp + vL_MD = (vp0 + vm1 - d_MD)*5.e-1_wp - vL_LC = v_rs_ws(j, k, l, i) - (v_rs_ws(j + 1, k, l, i) - v_rs_ws(j, k, l, i))*5.e-1_wp + beta_mp*d_LC + vL_LC = vp0 - (vp1 - vp0)*5.e-1_wp + beta_mp*d_LC - vL_min = max(min(v_rs_ws(j, k, l, i), v_rs_ws(j - 1, k, l, i), vL_MD), min(v_rs_ws(j, k, l, i), vL_UL, & - & vL_LC)) + vL_min = max(min(vp0, vm1, vL_MD), min(vp0, vL_UL, vL_LC)) - vL_max = min(max(v_rs_ws(j, k, l, i), v_rs_ws(j - 1, k, l, i), vL_MD), max(v_rs_ws(j, k, l, i), vL_UL, & - & vL_LC)) + vL_max = min(max(vp0, vm1, vL_MD), max(vp0, vL_UL, vL_LC)) - vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, & - & i)) + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i)))*min(abs(vL_min - vL_rs_vf(j, k, l, i)), & - & abs(vL_max - vL_rs_vf(j, k, l, i))) - ! END: Left Monotonicity Preserving Bound + vL_rs_vf(j, k, l, i) = vL_rs_vf(j, k, l, i) + (sign(5.e-1_wp, vL_min - vL_rs_vf(j, k, l, & + & i)) + sign(5.e-1_wp, vL_max - vL_rs_vf(j, k, l, i)))*min(abs(vL_min - vL_rs_vf(j, k, & + & l, i)), abs(vL_max - vL_rs_vf(j, k, l, i))) + ! END: Left Monotonicity Preserving Bound - ! Right Monotonicity Preserving Bound - d(-1) = v_rs_ws(j, k, l, i) + v_rs_ws(j - 2, k, l, i) - v_rs_ws(j - 1, k, l, i)*2._wp - d(0) = v_rs_ws(j + 1, k, l, i) + v_rs_ws(j - 1, k, l, i) - v_rs_ws(j, k, l, i)*2._wp - d(1) = v_rs_ws(j + 2, k, l, i) + v_rs_ws(j, k, l, i) - v_rs_ws(j + 1, k, l, i)*2._wp + ! Right Monotonicity Preserving Bound + d(-1) = vp0 + vm2 - vm1*2._wp + d(0) = vp1 + vm1 - vp0*2._wp + d(1) = vp2 + vp0 - vp1*2._wp - d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0)))*abs((sign(1._wp, & - & 4._wp*d(0) - d(1)) + sign(1._wp, d(0)))*(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, & - & d(1))))*min(abs(4._wp*d(0) - d(1)), abs(d(0)), abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp + d_MD = (sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, 4._wp*d(1) - d(0)))*abs((sign(1._wp, & + & 4._wp*d(0) - d(1)) + sign(1._wp, d(0)))*(sign(1._wp, 4._wp*d(0) - d(1)) + sign(1._wp, & + & d(1))))*min(abs(4._wp*d(0) - d(1)), abs(d(0)), abs(4._wp*d(1) - d(0)), abs(d(1)))/8._wp - d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1)))*abs((sign(1._wp, & - & 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1)))*(sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, & - & d(0))))*min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp + d_LC = (sign(1._wp, 4._wp*d(-1) - d(0)) + sign(1._wp, 4._wp*d(0) - d(-1)))*abs((sign(1._wp, & + & 4._wp*d(-1) - d(0)) + sign(1._wp, d(-1)))*(sign(1._wp, & + & 4._wp*d(-1) - d(0)) + sign(1._wp, d(0))))*min(abs(4._wp*d(-1) - d(0)), abs(d(-1)), & + & abs(4._wp*d(0) - d(-1)), abs(d(0)))/8._wp - vR_UL = v_rs_ws(j, k, l, i) + (v_rs_ws(j, k, l, i) - v_rs_ws(j - 1, k, l, i))*alpha_mp + vR_UL = vp0 + (vp0 - vm1)*alpha_mp - vR_MD = (v_rs_ws(j, k, l, i) + v_rs_ws(j + 1, k, l, i) - d_MD)*5.e-1_wp + vR_MD = (vp0 + vp1 - d_MD)*5.e-1_wp - vR_LC = v_rs_ws(j, k, l, i) + (v_rs_ws(j, k, l, i) - v_rs_ws(j - 1, k, l, i))*5.e-1_wp + beta_mp*d_LC + vR_LC = vp0 + (vp0 - vm1)*5.e-1_wp + beta_mp*d_LC - vR_min = max(min(v_rs_ws(j, k, l, i), v_rs_ws(j + 1, k, l, i), vR_MD), min(v_rs_ws(j, k, l, i), vR_UL, & - & vR_LC)) + vR_min = max(min(vp0, vp1, vR_MD), min(vp0, vR_UL, vR_LC)) - vR_max = min(max(v_rs_ws(j, k, l, i), v_rs_ws(j + 1, k, l, i), vR_MD), max(v_rs_ws(j, k, l, i), vR_UL, & - & vR_LC)) + vR_max = min(max(vp0, vp1, vR_MD), max(vp0, vR_UL, vR_LC)) - vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, & - & i)) + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i)))*min(abs(vR_min - vR_rs_vf(j, k, l, i)), & - & abs(vR_max - vR_rs_vf(j, k, l, i))) - ! END: Right Monotonicity Preserving Bound + vR_rs_vf(j, k, l, i) = vR_rs_vf(j, k, l, i) + (sign(5.e-1_wp, vR_min - vR_rs_vf(j, k, l, & + & i)) + sign(5.e-1_wp, vR_max - vR_rs_vf(j, k, l, i)))*min(abs(vR_min - vR_rs_vf(j, k, & + & l, i)), abs(vR_max - vR_rs_vf(j, k, l, i))) + ! END: Right Monotonicity Preserving Bound + end do + end do end do end do - end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() + end if + #:endfor end subroutine s_preserve_monotonicity @@ -1585,7 +1582,7 @@ contains ! Deallocating the WENO-stencil of the WENO-reconstructed variables - @:DEALLOCATE(v_rs_ws_x) + @:DEALLOCATE(v_rs_weno) ! Deallocating WENO coefficients in x-direction @:DEALLOCATE(poly_coef_cbL_x, poly_coef_cbR_x) @@ -1595,8 +1592,6 @@ contains ! Deallocating WENO coefficients in y-direction if (n == 0) return - @:DEALLOCATE(v_rs_ws_y) - @:DEALLOCATE(poly_coef_cbL_y, poly_coef_cbR_y) @:DEALLOCATE(d_cbL_y, d_cbR_y) @:DEALLOCATE(beta_coef_y) @@ -1604,8 +1599,6 @@ contains ! Deallocating WENO coefficients in z-direction if (p == 0) return - @:DEALLOCATE(v_rs_ws_z) - @:DEALLOCATE(poly_coef_cbL_z, poly_coef_cbR_z) @:DEALLOCATE(d_cbL_z, d_cbR_z) @:DEALLOCATE(beta_coef_z) diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh index a4de3ee6c4..1beb016539 100644 --- a/toolchain/bootstrap/modules.sh +++ b/toolchain/bootstrap/modules.sh @@ -139,14 +139,15 @@ if [ ! -z ${CRAY_LD_LIBRARY_PATH+x} ] && [ "$u_c" '!=' 'c' ] && [ "$u_c" '!=' ' fi if [ "$u_c" '==' 'famd' ]; then - export OLCF_AFAR_ROOT="/sw/crusher/ums/compilers/afar/rocm-afar-8873-drop-22.2.0" + export OLCF_AFAR_ROOT="/sw/crusher/ums/compilers/afar/therock-23.1.0-gfx90a-7.12.0-bb5005b6" export PATH=${OLCF_AFAR_ROOT}/lib/llvm/bin:${PATH} export LD_LIBRARY_PATH=${OLCF_AFAR_ROOT}/lib:${OLCF_AFAR_ROOT}/lib/llvm/lib:${LD_LIBRARY_PATH} + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm-7.0.2/lib/llvm/lib:/opt/rocm-7.0.2/lib/ export CRAY_MPICH_INC="-I${OLCF_AFAR_ROOT}/include/mpich3.4a2" export CRAY_HIPFORT_INC="-I${OLCF_AFAR_ROOT}/include/hipfort/amdgcn" - export CRAY_HIPFORT_LIB="-L${OLCF_AFAR_ROOT}/lib -lhipfft" + export CRAY_HIPFORT_LIB="-L${OLCF_AFAR_ROOT}/lib -lhipfort-amdgcn -lhipfft -lamdhip64" export CRAY_HIP_INC="-I${OLCF_AFAR_ROOT}/include/hip" export CRAY_MPICH_LIB="-L${CRAY_MPICH_PREFIX}/lib \ ${CRAY_PMI_POST_LINK_OPTS} \ diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 722d27a0d7..01efb1a9b1 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -405,6 +405,7 @@ def configure(self, case: Case): f"-DCMAKE_INSTALL_PREFIX={install_dirpath}", f"-DMFC_SINGLE_PRECISION={'ON' if (ARG('single') or ARG('mixed')) else 'OFF'}", f"-DMFC_MIXED_PRECISION={'ON' if ARG('mixed') else 'OFF'}", + f"-DMFC_BUILD_JOBS={ARG('jobs')}", ] # Verbosity level 3 (-vvv): add cmake debug flags diff --git a/toolchain/modules b/toolchain/modules index d0496ed0ba..ea7cb36393 100644 --- a/toolchain/modules +++ b/toolchain/modules @@ -50,6 +50,7 @@ famd OLCF Frontier AMD famd-all python cmake famd-all cpe/25.09 famd-all PrgEnv-amd +famd-all amd/7.2.0 rocm/7.2.0 tuo OLCF Tuolumne tuo-all cpe/25.03 rocm/6.3.1