From 4a648aee0db862db10356c6ad71de69f4b0d6434 Mon Sep 17 00:00:00 2001
From: Nicolas Vergnes <nicolas.vergnes@hotmail.com>
Date: Fri, 10 May 2019 02:10:39 +0200
Subject: [PATCH] Test 2 GPU.

---
 sky-models/Makefile                           |  2 +-
 .../atmosphere/model/nishita/nishita96.cc     | 45 +++++++--------
 .../atmosphere/model/nishita/nishita96.h      |  6 ++
 .../atmosphere/model/nishita/nishita96_2.cu   | 57 +++++--------------
 .../atmosphere/model/nishita/nishita96_2.h    |  2 +-
 sky-models/sky-models.creator.user            |  2 +-
 6 files changed, 44 insertions(+), 70 deletions(-)

diff --git a/sky-models/Makefile b/sky-models/Makefile
index 62ca783..0adcc6a 100644
--- a/sky-models/Makefile
+++ b/sky-models/Makefile
@@ -40,7 +40,7 @@ OLD_DEBUG_FLAGS = -g
 DEBUG_FLAGS = --compiler-options="$(OLD_DEBUG_FLAGS)" -g
 
 OLD_RELEASE_FLAGS = -DNDEBUG -O3 -fexpensive-optimizations
-RELEASE_FLAGS = --compiler-options="$(OLD_RELEASE_FLAGS)"
+RELEASE_FLAGS = --compiler-options="$(OLD_RELEASE_FLAGS)" -O3
 
 DIRS := atmosphere physics parametrage util
 
diff --git a/sky-models/atmosphere/model/nishita/nishita96.cc b/sky-models/atmosphere/model/nishita/nishita96.cc
index f1c59b8..b661c82 100644
--- a/sky-models/atmosphere/model/nishita/nishita96.cc
+++ b/sky-models/atmosphere/model/nishita/nishita96.cc
@@ -35,8 +35,7 @@
 
 #include "util/progress_bar.h"
 
-#include <cuda_runtime.h>
-#include "nishita96_2.h"
+
 
 namespace {
 
@@ -49,6 +48,9 @@ constexpr Length kHorizonDist =
 
 Nishita96::Nishita96(ScatteringType scattering_type)
     : Nishita93(), scattering_type_(scattering_type) {
+    cudaMallocManaged(&_rayleigh_phase_integral, 8*sizeof(float));
+    cudaMallocManaged(&_mie_phase_integral, 8*sizeof(float));
+    cudaMallocManaged(&_sample_directions_, 8*sizeof(monvec3));
 }
 
 
@@ -186,34 +188,26 @@ RadianceSpectrum Nishita96::GetSkyRadiance(Length altitude, Angle sun_zenith,
 
    // ------------ DEBUT GPU VERSION ----------- //
 
+//  std::cout << sun_zenith.to(rad) << " " << view_zenith.to(rad) << " " << view_sun_azimuth.to(rad) << std::endl;
+//  for(int i=0; i<8; i++)
+//  {
+//      std::cout << sample_directions_[i].x() << " " << sample_directions_[i].y() << " " << sample_directions_[i].z() << " ";
+//  }
+//  std::cout << std::endl;
+
   auto t1 = std::chrono::high_resolution_clock::now();
 
-  float *_rayleigh_phase_integral;
-  float *_mie_phase_integral;
 
-  cudaMallocManaged(&_rayleigh_phase_integral, 8*sizeof(float));
-  cudaMallocManaged(&_mie_phase_integral, 8*sizeof(float));
 
   for (int i = 0; i < 8; ++i) {
     _rayleigh_phase_integral[i] = 0.0;
     _mie_phase_integral[i] = 0.0;
   }
 
-  monvec3 *_sample_directions_;
+    monvec3 _view_dir;
 
-  cudaMallocManaged(&_sample_directions_, 8*sizeof(monvec3));
-
-  for(int i=0; i<8; i++)
-  {
-      _sample_directions_[i] = {sample_directions_[i].x(), sample_directions_[i].y(), sample_directions_[i].z()};
-  }
-
-        monvec3 *_view_dir;
-
-        cudaMallocManaged(&_view_dir, sizeof(monvec3));
-
-       *_view_dir = {cos(view_sun_azimuth.to(rad)) * sin(view_zenith.to(rad)),
-            sin(view_sun_azimuth.to(rad)) * sin(view_zenith.to(rad)), cos(view_zenith.to(rad))};
+   _view_dir = {cos(view_sun_azimuth.to(rad)) * sin(view_zenith.to(rad)),
+        sin(view_sun_azimuth.to(rad)) * sin(view_zenith.to(rad)), cos(view_zenith.to(rad))};
 
     compute(_rayleigh_phase_integral, _mie_phase_integral, _sample_directions_, _view_dir);
 
@@ -223,13 +217,14 @@ RadianceSpectrum Nishita96::GetSkyRadiance(Length altitude, Angle sun_zenith,
         mie_phase_integral[i] = _mie_phase_integral[i];
     }
 
-    cudaFree(_rayleigh_phase_integral);
-    cudaFree(_mie_phase_integral);
-    cudaFree(_sample_directions_);
+//    cudaFree(_rayleigh_phase_integral);
+//    cudaFree(_mie_phase_integral);
+//    cudaFree(_sample_directions_);
+//    cudaFree(_view_dir);
 
   auto t2 = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double, std::milli> fp_ms = t2 - t1;
-          //std::cout << fp_ms.count() << " ";
+    //std::cout << fp_ms.count() << " ";
 
      // ------------ FIN GPU VERSION ----------- //
 
@@ -465,6 +460,8 @@ void Nishita96::MaybePrecomputeSingleScatteringTables(Angle sun_zenith) const {
         break;
     }
     sample_directions_[i] = Direction(sin(view_zenith), 0.0, cos(view_zenith));
+    _sample_directions_[i] = {sample_directions_[i].x(), sample_directions_[i].y(), sample_directions_[i].z()};
+
     sample_single_scattering_[i].Init(sun_zenith, view_zenith);
 
     std::stringstream filename;
diff --git a/sky-models/atmosphere/model/nishita/nishita96.h b/sky-models/atmosphere/model/nishita/nishita96.h
index b582539..7a13a83 100644
--- a/sky-models/atmosphere/model/nishita/nishita96.h
+++ b/sky-models/atmosphere/model/nishita/nishita96.h
@@ -36,6 +36,8 @@
 #include "math/ternary_function.h"
 #include "math/vector.h"
 #include "physics/units.h"
+#include <cuda_runtime.h>
+#include "nishita96_2.h"
 
 class Nishita96 : public Nishita93 {
  public:
@@ -104,6 +106,10 @@ class Nishita96 : public Nishita93 {
   mutable Angle current_sun_zenith_;
   mutable dimensional::Vector3<Number> sample_directions_[8];
   mutable SingleScatteringTable sample_single_scattering_[8];
+
+  float *_rayleigh_phase_integral;
+  float *_mie_phase_integral;
+  monvec3 *_sample_directions_;
 };
 
 #endif  // ATMOSPHERE_MODEL_NISHITA_NISHITA96_H_
diff --git a/sky-models/atmosphere/model/nishita/nishita96_2.cu b/sky-models/atmosphere/model/nishita/nishita96_2.cu
index c7fc654..e3183ec 100644
--- a/sky-models/atmosphere/model/nishita/nishita96_2.cu
+++ b/sky-models/atmosphere/model/nishita/nishita96_2.cu
@@ -24,72 +24,43 @@ return kMie * (1.0 + scattering_angle_cosine * scattering_angle_cosine) /
   pow(1.0 + g * g - 2.0 * g * scattering_angle_cosine, 1.5);
 }
 
-__global__ void ComputePhase(float* _rayleigh_phase_integral, float* _mie_phase_integral, monvec3 *_sample_directions_, monvec3* _view_dir)
+__global__ void ComputePhase(float* _rayleigh_phase_integral, float* _mie_phase_integral, monvec3 *_sample_directions_, monvec3 _view_dir)
 {
     double _dphi = 3.14159265358979323846 / 32;
 
-    int idx_i = blockIdx.x;
-    int idx_j = threadIdx.x;
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
-    __shared__ double _theta;
-    __shared__ double _cos_theta;
+    int idx_i = (int)idx/32;
+    int idx_j = idx - 32 * idx_i;
 
-    __shared__ float _temp_rayleigh_phase_integral[8];
-    __shared__ float _temp_mie_phase_integral[8];
 
-    __shared__ monvec3 samples[8];
+    double  _theta = (idx_i + 0.5) * _dphi;
+    double _cos_theta = cos(_theta);
 
-    if(idx_j == 0)
-    {
-        _theta = (idx_i + 0.5) * _dphi;
-        _cos_theta = cos(_theta);
-    }
-
-    if(idx_j < 8)
-    {
-        _temp_rayleigh_phase_integral[idx_j] = 0.0;
-        _temp_mie_phase_integral[idx_j] = 0.0;
-    }
-
-    if(idx_j > 7 && idx_j < 16)
-    {
-        samples[idx_j-8] = _sample_directions_[idx_j-8];
-    }
-
-
-    __syncthreads();
 
     double _phi = (idx_j + 0.5) * _dphi;
     double _dw = sin(_theta) * _dphi * _dphi;
     monvec3 _w = {cos(_phi) * sin(_theta), sin(_phi) * sin(_theta), _cos_theta};
-    float _drayleigh = (float) _RayleighPhaseFunction(monvec3_dot(*_view_dir, _w)) * _dw;
-    float _dmie = (float) _MiePhaseFunction(monvec3_dot(*_view_dir, _w)) * _dw;
+    float _drayleigh = (float) _RayleighPhaseFunction(monvec3_dot(_view_dir, _w)) * _dw;
+    float _dmie = (float) _MiePhaseFunction(monvec3_dot(_view_dir, _w)) * _dw;
     int _nearest_index = 0;
-    double _max_dot_product = monvec3_dot(_w, samples[0]);
+    double _max_dot_product = monvec3_dot(_w, _sample_directions_[0]);
     for (int k = 1; k < 8; ++k) {
-      double _dot_product = monvec3_dot(_w, samples[k]);
+      double _dot_product = monvec3_dot(_w, _sample_directions_[k]);
       if (_dot_product > _max_dot_product) {
         _nearest_index = k;
         _max_dot_product = _dot_product;
       }
     }
 
-    atomicAdd(_temp_rayleigh_phase_integral+_nearest_index,_drayleigh);
-    atomicAdd(_temp_mie_phase_integral+_nearest_index, _dmie);
-
-    __syncthreads();
-
-    if(idx_j < 8)
-    {
-        atomicAdd(_rayleigh_phase_integral+idx_j, _temp_rayleigh_phase_integral[idx_j]);
-        atomicAdd(_mie_phase_integral+idx_j, _temp_mie_phase_integral[idx_j]);
-    }
 
+   atomicAdd(_rayleigh_phase_integral+_nearest_index, _drayleigh);
+   atomicAdd(_mie_phase_integral+_nearest_index, _dmie);
 }
 
 
-void compute(float *ray, float *mie, monvec3 *sample, monvec3 *view_dir)
+void compute(float *ray, float *mie, monvec3 *sample, monvec3 view_dir)
 {
-    ComputePhase<<<32, 64>>>(ray, mie, sample, view_dir);
+    ComputePhase<<<1, 32*64>>>(ray, mie, sample, view_dir);
     cudaDeviceSynchronize();
 }
diff --git a/sky-models/atmosphere/model/nishita/nishita96_2.h b/sky-models/atmosphere/model/nishita/nishita96_2.h
index 7dc6ed5..7a7d431 100644
--- a/sky-models/atmosphere/model/nishita/nishita96_2.h
+++ b/sky-models/atmosphere/model/nishita/nishita96_2.h
@@ -7,6 +7,6 @@ typedef struct{
     double z;
 } monvec3;
 
-void compute(float*, float*, monvec3*, monvec3*);
+void compute(float*, float*, monvec3*, monvec3);
 
 #endif // NISHITA96_2_H
diff --git a/sky-models/sky-models.creator.user b/sky-models/sky-models.creator.user
index 013a6f6..35675f7 100644
--- a/sky-models/sky-models.creator.user
+++ b/sky-models/sky-models.creator.user
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE QtCreatorProject>
-<!-- Written by QtCreator 4.6.1, 2019-05-09T22:06:03. -->
+<!-- Written by QtCreator 4.6.1, 2019-05-09T23:58:26. -->
 <qtcreator>
  <data>
   <variable>EnvironmentId</variable>
-- 
GitLab