sobota, 6 kwietnia 2019

Reverse engineering the rendering of The Witcher 3, part 13c - witcher senses (fisheye effect & final combining)

Welcome!

This is the last part of reverse engineering witcher senses effect from The Witcher 3: Wild Hunt.

Quick look on what we have now: in the first part full-screen intensity map was generated which tells how visible effect will be depending on distance. In the second part I investigated "outline map" in more detail which is responsible for outline and "moving" look of the final effect.

We have arrived to the last stop. We need to combine this all together! The last pass is fullscreen quad. Inputs are: color buffer, outline map and intensity map.

Before:



After:


And a video (once again) to show how the effect is applied:


As you can see, besides applying outline to objects which Geralt can see/hear, a fisheye effect is applied to whole screen and whole screen (corners espiecially) is getting greyish to feel like real monster hunter in action.

Full pixel shader assembly:
 ps_5_0  
    dcl_globalFlags refactoringAllowed  
    dcl_constantbuffer cb0[3], immediateIndexed  
    dcl_constantbuffer cb3[7], immediateIndexed  
    dcl_sampler s0, mode_default  
    dcl_sampler s2, mode_default  
    dcl_resource_texture2d (float,float,float,float) t0  
    dcl_resource_texture2d (float,float,float,float) t2  
    dcl_resource_texture2d (float,float,float,float) t3  
    dcl_input_ps_siv v0.xy, position  
    dcl_output o0.xyzw  
    dcl_temps 7  
   0: div r0.xy, v0.xyxx, cb0[2].xyxx  
   1: mad r0.zw, r0.xxxy, l(0.000000, 0.000000, 2.000000, 2.000000), l(0.000000, 0.000000, -1.000000, -1.000000)  
   2: mov r1.yz, abs(r0.zzwz)  
   3: div r0.z, cb0[2].x, cb0[2].y  
   4: mul r1.x, r0.z, r1.y  
   5: add r0.zw, r1.xxxz, -cb3[2].xxxy  
   6: mul_sat r0.zw, r0.zzzw, l(0.000000, 0.000000, 0.555556, 0.555556)  
   7: log r0.zw, r0.zzzw  
   8: mul r0.zw, r0.zzzw, l(0.000000, 0.000000, 2.500000, 2.500000)  
   9: exp r0.zw, r0.zzzw  
  10: dp2 r0.z, r0.zwzz, r0.zwzz  
  11: sqrt r0.z, r0.z  
  12: min r0.z, r0.z, l(1.000000)  
  13: add r0.z, -r0.z, l(1.000000)  
  14: mov_sat r0.w, cb3[6].x  
  15: add_sat r1.xy, -r0.xyxx, l(0.030000, 0.030000, 0.000000, 0.000000)  
  16: add r1.x, r1.y, r1.x  
  17: add_sat r0.xy, r0.xyxx, l(-0.970000, -0.970000, 0.000000, 0.000000)  
  18: add r0.x, r0.x, r1.x  
  19: add r0.x, r0.y, r0.x  
  20: mul r0.x, r0.x, l(20.000000)  
  21: min r0.x, r0.x, l(1.000000)  
  22: add r1.xy, v0.xyxx, v0.xyxx  
  23: div r1.xy, r1.xyxx, cb0[2].xyxx  
  24: add r1.xy, r1.xyxx, l(-1.000000, -1.000000, 0.000000, 0.000000)  
  25: dp2 r0.y, r1.xyxx, r1.xyxx  
  26: mul r1.xy, r0.yyyy, r1.xyxx  
  27: mul r0.y, r0.w, l(0.100000)  
  28: mul r1.xy, r0.yyyy, r1.xyxx  
  29: max r1.xy, r1.xyxx, l(-0.400000, -0.400000, 0.000000, 0.000000)  
  30: min r1.xy, r1.xyxx, l(0.400000, 0.400000, 0.000000, 0.000000)  
  31: mul r1.xy, r1.xyxx, cb3[1].xxxx  
  32: mul r1.zw, r1.xxxy, cb0[2].zzzw  
  33: mad r1.zw, v0.xxxy, cb0[1].zzzw, -r1.zzzw  
  34: sample_indexable(texture2d)(float,float,float,float) r2.xyz, r1.zwzz, t0.xyzw, s0  
  35: mul r3.xy, r1.zwzz, l(0.500000, 0.500000, 0.000000, 0.000000)  
  36: sample_indexable(texture2d)(float,float,float,float) r0.y, r3.xyxx, t2.yxzw, s2  
  37: mad r3.xy, r1.zwzz, l(0.500000, 0.500000, 0.000000, 0.000000), l(0.500000, 0.000000, 0.000000, 0.000000)  
  38: sample_indexable(texture2d)(float,float,float,float) r2.w, r3.xyxx, t2.yzwx, s2  
  39: mul r2.w, r2.w, l(0.125000)  
  40: mul r3.x, cb0[0].x, l(0.100000)  
  41: add r0.x, -r0.x, l(1.000000)  
  42: mul r0.xy, r0.xyxx, l(0.030000, 0.125000, 0.000000, 0.000000)  
  43: mov r3.yzw, l(0, 0, 0, 0)  
  44: mov r4.x, r0.y  
  45: mov r4.y, r2.w  
  46: mov r4.z, l(0)  
  47: loop  
  48:  ige r4.w, r4.z, l(8)  
  49:  breakc_nz r4.w  
  50:  itof r4.w, r4.z  
  51:  mad r4.w, r4.w, l(0.785375), -r3.x  
  52:  sincos r5.x, r6.x, r4.w  
  53:  mov r6.y, r5.x  
  54:  mul r5.xy, r0.xxxx, r6.xyxx  
  55:  mad r5.zw, r5.xxxy, l(0.000000, 0.000000, 0.125000, 0.125000), r1.zzzw  
  56:  mul r6.xy, r5.zwzz, l(0.500000, 0.500000, 0.000000, 0.000000)  
  57:  sample_indexable(texture2d)(float,float,float,float) r4.w, r6.xyxx, t2.yzwx, s2  
  58:  mad r4.x, r4.w, l(0.125000), r4.x  
  59:  mad r5.zw, r5.zzzw, l(0.000000, 0.000000, 0.500000, 0.500000), l(0.000000, 0.000000, 0.500000, 0.000000)  
  60:  sample_indexable(texture2d)(float,float,float,float) r4.w, r5.zwzz, t2.yzwx, s2  
  61:  mad r4.y, r4.w, l(0.125000), r4.y  
  62:  mad r5.xy, r5.xyxx, r1.xyxx, r1.zwzz  
  63:  sample_indexable(texture2d)(float,float,float,float) r5.xyz, r5.xyxx, t0.xyzw, s0  
  64:  mad r3.yzw, r5.xxyz, l(0.000000, 0.125000, 0.125000, 0.125000), r3.yyzw  
  65:  iadd r4.z, r4.z, l(1)  
  66: endloop  
  67: sample_indexable(texture2d)(float,float,float,float) r0.xy, r1.zwzz, t3.xyzw, s0  
  68: mad_sat r0.xy, -r0.xyxx, l(0.800000, 0.750000, 0.000000, 0.000000), r4.xyxx  
  69: dp3 r1.x, r3.yzwy, l(0.300000, 0.300000, 0.300000, 0.000000)  
  70: add r1.yzw, -r1.xxxx, r3.yyzw  
  71: mad r1.xyz, r0.zzzz, r1.yzwy, r1.xxxx  
  72: mad r1.xyz, r1.xyzx, l(0.600000, 0.600000, 0.600000, 0.000000), -r2.xyzx  
  73: mad r1.xyz, r0.wwww, r1.xyzx, r2.xyzx  
  74: mul r0.yzw, r0.yyyy, cb3[4].xxyz  
  75: mul r2.xyz, r0.xxxx, cb3[5].xyzx  
  76: mad r0.xyz, r0.yzwy, l(1.200000, 1.200000, 1.200000, 0.000000), r2.xyzx  
  77: mov_sat r2.xyz, r0.xyzx  
  78: dp3_sat r0.x, r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000)  
  79: add r0.yzw, -r1.xxyz, r2.xxyz  
  80: mad o0.xyz, r0.xxxx, r0.yzwy, r1.xyzx  
  81: mov o0.w, l(1.000000)  
  82: ret   


82 lines means a lot of work! Let's get into it!

Take a look at inputs first:
   // *** Inputs       
     
   // * Zoom amount, always 1  
   float zoomAmount = cb3_v1.x;  
     
   // Another value which affect fisheye effect  
   // but always set to float2(1.0, 1.0).  
   float2 amount = cb0_v2.zw;  
     
   // Elapsed time in seconds  
   float time = cb0_v0.x;  
     
   // Colors of witcher senses  
   float3 colorInteresting = cb3_v5.rgb;  
   float3 colorTraces = cb3_v4.rgb;  
     
   // Was always set to float2(0.0, 0.0).  
   // Setting this to higher values  
   // makes "grey corners" effect weaker.  
   float2 offset = cb3_v2.xy;  
     
   // Dimensions of fullscreen  
   float2 texSize = cb0_v2.xy;  
   float2 invTexSize = cb0_v1.zw;  
   
   // Main value which causes fisheye effect [0-1]  
   const float fisheyeAmount = saturate( cb3_v6.x );  

The main value responsible for amount of the effect is fisheyeAmount. I guess it rises gradually from 0.0 to 1.0 once Geralt is starting to use his senses. Rest of values are rather constant but I guess some of them are different if user disables fisheye effect in gameplay options (I haven't checked it).


The first thing which happens in the shader is calculating mask responsible for grey corners:
   0: div r0.xy, v0.xyxx, cb0[2].xyxx   
   1: mad r0.zw, r0.xxxy, l(0.000000, 0.000000, 2.000000, 2.000000), l(0.000000, 0.000000, -1.000000, -1.000000)   
   2: mov r1.yz, abs(r0.zzwz)   
   3: div r0.z, cb0[2].x, cb0[2].y   
   4: mul r1.x, r0.z, r1.y   
   5: add r0.zw, r1.xxxz, -cb3[2].xxxy   
   6: mul_sat r0.zw, r0.zzzw, l(0.000000, 0.000000, 0.555556, 0.555556)   
   7: log r0.zw, r0.zzzw   
   8: mul r0.zw, r0.zzzw, l(0.000000, 0.000000, 2.500000, 2.500000)   
   9: exp r0.zw, r0.zzzw   
  10: dp2 r0.z, r0.zwzz, r0.zwzz   
  11: sqrt r0.z, r0.z   
  12: min r0.z, r0.z, l(1.000000)   
  13: add r0.z, -r0.z, l(1.000000)   

In HLSL we can write it this way:
   // Main uv  
   float2 uv = PosH.xy / texSize;  
     
   // Scale at first from [0-1] to [-1;1], then calculate abs  
   float2 uv3 = abs( uv * 2.0 - 1.0);   
        
   // Aspect ratio  
   float aspectRatio = texSize.x / texSize.y;  
        
   // * Mask used to make corners grey  
   float mask_gray_corners;  
   {  
     float2 newUv = float2( uv3.x * aspectRatio, uv3.y ) - offset;  
     newUv = saturate( newUv / 1.8 );  
     newUv = pow(newUv, 2.5);  
       
     mask_gray_corners = 1-min(1.0, length(newUv) );  
   }  

At first uv is [-1; 1] range are calculated and their absolute value. Then, some clever "squeezing" takes place. Final mask looks this way:

I'll come back to this mask later.


Now I'm going to intentionally omit a few lines of assembly and take a closer look at code responsible for "zooming" effect.
  22: add r1.xy, v0.xyxx, v0.xyxx   
  23: div r1.xy, r1.xyxx, cb0[2].xyxx   
  24: add r1.xy, r1.xyxx, l(-1.000000, -1.000000, 0.000000, 0.000000)   
  25: dp2 r0.y, r1.xyxx, r1.xyxx   
  26: mul r1.xy, r0.yyyy, r1.xyxx   
  27: mul r0.y, r0.w, l(0.100000)   
  28: mul r1.xy, r0.yyyy, r1.xyxx   
  29: max r1.xy, r1.xyxx, l(-0.400000, -0.400000, 0.000000, 0.000000)   
  30: min r1.xy, r1.xyxx, l(0.400000, 0.400000, 0.000000, 0.000000)   
  31: mul r1.xy, r1.xyxx, cb3[1].xxxx   
  32: mul r1.zw, r1.xxxy, cb0[2].zzzw   
  33: mad r1.zw, v0.xxxy, cb0[1].zzzw, -r1.zzzw   

At first "double" texture coordinates are calculated and float2(1, 1) is subtracted:
   float2 uv4 = 2 * PosH.xy;  
   uv4 /= cb0_v2.xy;  
   uv4 -= float2(1.0, 1.0);  

Such texcoord can be visualised as:

Then dot product is calculated as dot(uv4, uv4), which yields a mask:

which is used to multiply with aforementioned texcoords:

Important: In upper left corner (black pixels) values are negative. The reason why they are represented as black (0.0) is limited precision of R11G11B10_FLOAT format. There is no sign bit there so we cannot store negative values.

Later an attenuation factor is calculated (As I mentioned before, fisheyeAmount changes from 0.0 to 1.0).
   float attenuation = fisheyeAmount * 0.1;  
   uv4 *= attenuation;  

Later we have a clamp (max/min) and one multiplication.
This way an offset was calculated. To calculate final uv which will be used to sample color texture we just subtract:

float2 colorUV = mainUv - offset;

Sampling with colorUV input color texture, we get distorted image around corners:



Outlines

The next step is to sample outline map to find outlines. This is quite easy, at first we find texcoords to sample interesting objects' outline, then the same for traces:
   // * Sample outline map  
        
   // interesting objects (upper left square)  
   float2 outlineUV = colorUV * 0.5;  
   float outlineInteresting = texture2.Sample( sampler2, outlineUV ).x; // r0.y  
        
   // traces (upper right square)  
   outlineUV = colorUV * 0.5 + float2(0.5, 0.0);  
   float outlineTraces = texture2.Sample( sampler2, outlineUV ).x; // r2.w  
        
   outlineInteresting /= 8.0; // r4.x  
   outlineTraces /= 8.0; // r4.y  

interesting objects from outline map
traces from outline map
It's worth to notice that we only sample .x channel from outline map and only upper squares of it are considered.

Movement

To make a movement of traces with time quite a similar trick is used as it was with drunk effect. A unit circle is introduced and we sample 8 times both outline map for interesting objects and traces as well as color texture.

Note that we divided found outlines by 8.0 just a moment ago.

Because we are in texture coordinates space [0-1]2 having circle radius = 1 to circle around particular pixel would give us unacceptable artifacts:


So, before going further let's find out how the radius is calculated. To do that, we have to go back to missed assembly lines 15-21. A small problem with calculation of this radius is that its calculation is scattered within shader (probably clever shader compiler optimizations or so). So, there is one part (15-21) and second one (41-42):
  15: add_sat r1.xy, -r0.xyxx, l(0.030000, 0.030000, 0.000000, 0.000000)  
  16: add r1.x, r1.y, r1.x  
  17: add_sat r0.xy, r0.xyxx, l(-0.970000, -0.970000, 0.000000, 0.000000)  
  18: add r0.x, r0.x, r1.x  
  19: add r0.x, r0.y, r0.x  
  20: mul r0.x, r0.x, l(20.000000)  
  21: min r0.x, r0.x, l(1.000000)  
  ...  
  41: add r0.x, -r0.x, l(1.000000)  
  42: mul r0.xy, r0.xyxx, l(0.030000, 0.125000, 0.000000, 0.000000)  

As you can see we consider only texels from [0.00 - 0.03] near every surface, sum their values up, multiply by 20 and saturate. Here is how it looks just after lines 15-21:


 And just after line 41:

Then at line 42 we multiply above by 0.03, which is circle radius for whole screen. As you can see, the radius is getting smaller near the edges of screen.


Having that, we can take a look at assembly resposible for movement:
  40: mul r3.x, cb0[0].x, l(0.100000)  
  41: add r0.x, -r0.x, l(1.000000)  
  42: mul r0.xy, r0.xyxx, l(0.030000, 0.125000, 0.000000, 0.000000)  
  43: mov r3.yzw, l(0, 0, 0, 0)  
  44: mov r4.x, r0.y  
  45: mov r4.y, r2.w  
  46: mov r4.z, l(0)  
  47: loop  
  48:  ige r4.w, r4.z, l(8)  
  49:  breakc_nz r4.w  
  50:  itof r4.w, r4.z  
  51:  mad r4.w, r4.w, l(0.785375), -r3.x  
  52:  sincos r5.x, r6.x, r4.w  
  53:  mov r6.y, r5.x  
  54:  mul r5.xy, r0.xxxx, r6.xyxx  
  55:  mad r5.zw, r5.xxxy, l(0.000000, 0.000000, 0.125000, 0.125000), r1.zzzw  
  56:  mul r6.xy, r5.zwzz, l(0.500000, 0.500000, 0.000000, 0.000000)  
  57:  sample_indexable(texture2d)(float,float,float,float) r4.w, r6.xyxx, t2.yzwx, s2  
  58:  mad r4.x, r4.w, l(0.125000), r4.x  
  59:  mad r5.zw, r5.zzzw, l(0.000000, 0.000000, 0.500000, 0.500000), l(0.000000, 0.000000, 0.500000, 0.000000)  
  60:  sample_indexable(texture2d)(float,float,float,float) r4.w, r5.zwzz, t2.yzwx, s2  
  61:  mad r4.y, r4.w, l(0.125000), r4.y  
  62:  mad r5.xy, r5.xyxx, r1.xyxx, r1.zwzz  
  63:  sample_indexable(texture2d)(float,float,float,float) r5.xyz, r5.xyxx, t0.xyzw, s0  
  64:  mad r3.yzw, r5.xxyz, l(0.000000, 0.125000, 0.125000, 0.125000), r3.yyzw  
  65:  iadd r4.z, r4.z, l(1)  
  66: endloop  

Let's take a moment to stop here. At line 40 we have time factor - simply elapsedTime * 0.1. At line 43 we have buffer for color texture fetched inside loop.

r0.x (lines 41-42) is radius of circle as we know it now. r4.x (line 44) is outline of interesting objects, r4.y (line 45) - outline of traces (divided previously by 8!) and r4.z (line 46) - loop counter.

As one can expect, loop has 8 iterations. We start by calculating angle in radians with i * PI_4 which gives 2*PI - full cycle. Angle is perturbed with time.

Using sincos we determine point of sampling (unit circle)  and we adjust the radius using multiplication (line 54).

After that we circle around a pixel and sample outlines and color. After the loop we will have average values (thanks to dividing by 8) of outlines and color.
   float timeParam = time * 0.1;  
     
   // adjust circle radius  
   circle_radius = 1.0 - circle_radius;  
   circle_radius *= 0.03;  
        
   float3 color_circle_main = float3(0.0, 0.0, 0.0);  
        
   [loop]  
   for (int i=0; 8 > i; i++)  
   {  
      // full 2*PI = 360 angles cycle  
      const float angleRadians = (float) i * PI_4 - timeParam;  
             
      // unit circle  
      float2 unitCircle;  
      sincos(angleRadians, unitCircle.y, unitCircle.x); // unitCircle.x = cos, unitCircle.y = sin  
             
      // adjust radius  
      unitCircle *= circle_radius;  
             
      // * base texcoords (circle) - note we also scale radius here by 8  
      // * probably because of dimensions of outline map.  
      // line 55  
      float2 uv_outline_base = colorUV + unitCircle / 8.0;  
                       
      // * interesting objects (circle)  
      float2 uv_outline_interesting_circle = uv_outline_base * 0.5;  
      float outline_interesting_circle = texture2.Sample( sampler2, uv_outline_interesting_circle ).x;  
      outlineInteresting += outline_interesting_circle / 8.0;  
             
      // * traces (circle)  
      float2 uv_outline_traces_circle = uv_outline_base * 0.5 + float2(0.5, 0.0);  
      float outline_traces_circle = texture2.Sample( sampler2, uv_outline_traces_circle ).x;  
      outlineTraces += outline_traces_circle / 8.0;  
             
      // * sample color texture (zooming effect) with perturbation  
      float2 uv_color_circle = colorUV + unitCircle * offsetUV;  
      float3 color_circle = texture0.Sample( sampler0, uv_color_circle ).rgb;  
      color_circle_main += color_circle / 8.0;  
   }  
        

Sampling of color is quite similar, but to base colorUV we add offset multiplied by "unit" circle.

Intensities

After the loop we sample intensity map and adjust final intensities (because intensity map has no idea about outlines):
  67: sample_indexable(texture2d)(float,float,float,float) r0.xy, r1.zwzz, t3.xyzw, s0  
  68: mad_sat r0.xy, -r0.xyxx, l(0.800000, 0.750000, 0.000000, 0.000000), r4.xyxx  

HLSL:
   // * Sample intensity map  
   float2 intensityMap = texture3.Sample( sampler0, colorUV ).xy;  
     
   float intensityInteresting = intensityMap.r;  
   float intensityTraces = intensityMap.g;  
        
   // * Adjust outlines  
   float mainOutlineInteresting = saturate( outlineInteresting - 0.8*intensityInteresting );  
   float mainOutlineTraces = saturate( outlineTraces - 0.75*intensityTraces ); 

Gray corners and final combining

The gray color near corners is calculated using dot product (assembly line 69):
   // * Greyish color  
   float3 color_greyish = dot( color_circle_main, float3(0.3, 0.3, 0.3) ).xxx;  



Then we have two interpolations. The first one combines gray color with "circled one" using the first mask I described - so the corners are grey. Additionally there is 0.6 factor which desaturates final image:

The second one combines the first color with the above one using fisheyeAmount. That means, the screen is getting progressively darker (thanks to 0.6 multiplication above) and more gray around corner! Genius.

HLSL:
   // * Determine main color.  
   // (1) At first, combine "circled" color with gray one.  
   // Now we have have greyish corners here.  
   float3 mainColor = lerp( color_greyish, color_circle_main, mask_gray_corners ) * 0.6;  
     
   // (2) Then mix "regular" color with the above.  
   // Please note this operation makes corners gradually gray (because fisheyeAmount rises from 0 to 1)
   // and gradually darker (because of 0.6 multiplier).  
   mainColor = lerp( color, mainColor, fisheyeAmount );  


Now we can move to outlining objects.
Colors (red and yellow) are taken from constant buffer.
   // * Determine color of witcher senses  
   float3 senses_traces = mainOutlineTraces * colorTraces;  
   float3 senses_interesting = mainOutlineInteresting * colorInteresting;  
   float3 senses_total = 1.2 * senses_traces + senses_interesting;   



Phew! We are almost at the finish line!
We have final color, we have color of witcher senses... ale we have to do is to combine it somehow!

This is not just simple adding. At first, we calculate dot product:
  78: dp3_sat r0.x, r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000)  
   
  float dot_senses_total = saturate( dot(senses_total, float3(1.0, 1.0, 1.0) ) );  

which looks like this:

And this is, at the very end, used to interpolate between color and (saturated) witcher senses:
  76: mad r0.xyz, r0.yzwy, l(1.200000, 1.200000, 1.200000, 0.000000), r2.xyzx  
  77: mov_sat r2.xyz, r0.xyzx  
  78: dp3_sat r0.x, r0.xyzx, l(1.000000, 1.000000, 1.000000, 0.000000)  
  79: add r0.yzw, -r1.xxyz, r2.xxyz  
  80: mad o0.xyz, r0.xxxx, r0.yzwy, r1.xyzx  
  81: mov o0.w, l(1.000000)  
  82: ret  
   
   float3 senses_total = 1.2 * senses_traces + senses_interesting;   
     
   // * Final combining  
   float3 senses_total_sat = saturate(senses_total);  
   float dot_senses_total = saturate( dot(senses_total, float3(1.0, 1.0, 1.0) ) );  
        
   float3 finalColor = lerp( mainColor, senses_total_sat, dot_senses_total );  
   return float4( finalColor, 1.0 );  



This is the end.


The full shader is available here.
Comparison of my (left) and original (right) shaders:


If you have come this far, congratulations. Feel free to comment.
I hope you enjoyed this mini-series! In "witcher senses" mechanics there is a lot of brilliant ideas and final result is really convincing.

Thank you very much for reading!


PS. A decent part of this mini-series was done with High Contrast in background :)

Brak komentarzy:

Prześlij komentarz