在GPU编程中，很多状态信息需要在不同的内核执行之间进行传递和保留，以便实现高效的并行计算和数据共享。IntegratorState意为积分器的状态，他作用就是可以在不同Kernel之间进行信息共享，IntegratorState不能占用太多内存，否则会影响执行效率，他的内存管理和分配由不同的硬件来决定。

IntegratorShadowStateCPU

typedef struct IntegratorShadowStateCPU {
#define KERNEL_STRUCT_BEGIN(name) struct {
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
#define KERNEL_STRUCT_END(name) \
  } \
  name;
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
  } \
  name[cpu_size];
#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
#include "kernel/integrator/shadow_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_ARRAY_MEMBER
#undef KERNEL_STRUCT_END
#undef KERNEL_STRUCT_END_ARRAY
} IntegratorShadowStateCPU;
typedef struct IntegratorStateCPU {
#define KERNEL_STRUCT_BEGIN(name) struct {
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
#define KERNEL_STRUCT_END(name) \
  } \
  name;
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
  } \
  name[cpu_size];
#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
#include "kernel/integrator/state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_ARRAY_MEMBER
#undef KERNEL_STRUCT_END
#undef KERNEL_STRUCT_END_ARRAY
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
  IntegratorShadowStateCPU shadow;
  IntegratorShadowStateCPU ao;
} IntegratorStateCPU;

用于CPU渲染时的IntegratorState，这里不详细研究。

IntegratorStateGPU

typedef struct IntegratorStateGPU {
#define KERNEL_STRUCT_BEGIN(name) struct {
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
#define KERNEL_STRUCT_END(name) \
  } \
  name;
#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
  } \
  name[gpu_size];
#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
#include "kernel/integrator/state_template.h"
#include "kernel/integrator/shadow_state_template.h"
#undef KERNEL_STRUCT_BEGIN
#undef KERNEL_STRUCT_MEMBER
#undef KERNEL_STRUCT_ARRAY_MEMBER
#undef KERNEL_STRUCT_END
#undef KERNEL_STRUCT_END_ARRAY
#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
  /* Count number of queued kernels. */
  ccl_global IntegratorQueueCounter *queue_counter;
  /* Count number of kernels queued for specific shaders. */
  ccl_global int *sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM];
  /* Index of shadow path which will be used by a next shadow path.  */
  ccl_global int *next_shadow_path_index;
  /* Index of main path which will be used by a next shadow catcher split.  */
  ccl_global int *next_main_path_index;
  /* Partition/key offsets used when writing sorted active indices. */
  ccl_global int *sort_partition_key_offsets;
  /* Divisor used to partition active indices by locality when sorting by material.  */
  uint sort_partition_divisor;
} IntegratorStateGPU;

KERNEL_STRUCT宏定义

在IntegratorState中出现了KERNEL_STRUCT_BEGIN这样的宏定义，而后又会将其undef ，它是一个辅助的宏，用来定义IntegratorState中的各种属性。

IntegratorState中这种定义数据的格式在项目中经常使用，首先定义宏：

#define KERNEL_STRUCT_BEGIN(name) struct {
#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
#define KERNEL_STRUCT_END(name) \
  } \
  name;

然后就可以用这个宏做结构体的定义：

KERNEL_STRUCT_BEGIN(path)
KERNEL_STRUCT_MEMBER(path, uint32_t, sample, KERNEL_FEATURE_PATH_TRACING)
KERNEL_STRUCT_END(path)

上面的代码就相当于：

struct {
    ccl_global uint32_t *sample;
} path;

这样在需要定义大量的结构体时，可以单独把他们放到另一个文件中，再引用过来。

在parent_struct, type, name, feature这几份宏定义的参数中，看起来feature是没有用到的，只起到一个标识作用。

以IntegratorStateGPU为例，它include了两个文件state_template.h和shadow_state_template.h。state_template.h中定义了的Path、Ray、Intersection、Subsurface等结构体类型，shadow_state_template.h定义了shadow_path、shadow_ray等结构体类型。

state_template.h文件的一些定义：

KERNEL_STRUCT_BEGIN(path)
/* Index of a pixel within the device render buffer where this path will write its result.
 * To get an actual offset within the buffer the value needs to be multiplied by the
 * `kernel_data.film.pass_stride`.
 *
 * The multiplication is delayed for later, so that state can use 32bit integer. */
KERNEL_STRUCT_MEMBER(path, uint32_t, render_pixel_index, KERNEL_FEATURE_PATH_TRACING)
/* Current sample number. */
KERNEL_STRUCT_MEMBER(path, uint32_t, sample, KERNEL_FEATURE_PATH_TRACING)
/* Current ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current transparent ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, transparent_bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current diffuse ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, diffuse_bounce, KERNEL_FEATURE_PATH_TRACING)
/* Current glossy ray bounce depth. */
KERNEL_STRUCT_MEMBER(path, uint16_t, glossy_bounce, KERNEL_FEATURE_PATH_TRACING)
...
KERNEL_STRUCT_END(path)

这里就是定义了一个path结构体，它的成员有render_pixel_index，sample，bounce等。

INTEGRATOR_STATE宏定义

#  define INTEGRATOR_STATE(state, nested_struct, member) \
    kernel_integrator_state.nested_struct.member[state]
#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
    INTEGRATOR_STATE(state, nested_struct, member)

#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
    kernel_integrator_state.nested_struct[array_index].member[state]
#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
    INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)

这些宏定义是用来快速地读取或写入state中的属性的，比如想读取一个state的path中的sample属性：

sample = INTEGRATOR_STATE(state, path, sample)
// 等价于 sample = kernel_integrator_state.path.sample[state]

同理，写入此属性:

INTEGRATOR_STATE_WRITE(state, path, sample) = sample
// 等价于 kernel_integrator_state.path.sample[state] = sample

如果是数组成员的话加上一个array_index参数。

需要注意的是，IntegratorStateGPU和IntegratorStateCPU关于INTEGRATOR_STATE宏的定义是不一样的，IntegratorStateCPU中是这样定义的：

#  define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
#  define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)

#  define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
    ((state)->nested_struct[array_index].member)
#  define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
    ((state)->nested_struct[array_index].member)

它的state是一个结构体，以上面读取path中的sample属性为例：

sample = INTEGRATOR_STATE(state, path, sample)
// 等价于 sample = ((state)->path.sample)

在GPU中，state是一个索引，从kernel_integrator_state中根据此索引取数据，在CPU中，state是一个完整的结构体，可以直接从它的内部取数据。

在CPU中，IntegratorState的定义是这样的：

typedef IntegratorStateCPU *ccl_restrict IntegratorState;
typedef const IntegratorStateCPU *ccl_restrict ConstIntegratorState;
typedef IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState;
typedef const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState;

直接将IntegratorStateCPU作为IntegratorState，

在GPU中，IntegratorState的定义是这样的：

typedef int IntegratorState;
typedef int ConstIntegratorState;
typedef int IntegratorShadowState;
typedef int ConstIntegratorShadowState;

IntegratorState就是一个int值。在CUDA中，这个IntegratorState是线程的索引，每个线程计算光线追踪的一条path。

kernel_integrator_state

通过上面对于IntegratorState的分析可以知道，GPU上的IntegratorState数据是通过索引从kernel_integrator_state获取的。这里以CUDA为例，了解kernel_integrator_state是如何定义的，包含了哪些内容。

在 src\kernel\device\cuda\globals.h 文件中，可以看到如下定义：

struct KernelParamsCUDA {
  /* Global scene data and textures */
  KernelData data;
#define KERNEL_DATA_ARRAY(type, name) const type *name;
#include "kernel/data_arrays.h"

  /* Integrator state */
  IntegratorStateGPU integrator_state;
};

#ifdef __KERNEL_GPU__
__constant__ KernelParamsCUDA kernel_params;
#endif

/* Abstraction macros */
#define kernel_data kernel_params.data
#define kernel_data_fetch(name, index) kernel_params.name[(index)]
#define kernel_data_array(name) (kernel_params.name)
#define kernel_integrator_state kernel_params.integrator_state

从这里可以看到kernel_integrator_state是一个IntegratorStateGPU类型的数据，它保存在KernelParamsCUDA结构体中。kernel_params是KernelParamsCUDA结构体的实例，它在初始化完成后将数据copy到GPU。

辅助函数

在src\kernel\integrator\state_util.h文件中定义了一些辅助函数，它利用INTEGRATOR_STATE的宏定义，将IntegratorState中对一个结构体的读写操作封装到一起，比如下面就是对IntegratorState的Ray数据的读写操作：

/* Ray */

ccl_device_forceinline void integrator_state_write_ray(IntegratorState state,
                                                       ccl_private const Ray *ccl_restrict ray)
{
  INTEGRATOR_STATE_WRITE(state, ray, P) = ray->P;
  INTEGRATOR_STATE_WRITE(state, ray, D) = ray->D;
  INTEGRATOR_STATE_WRITE(state, ray, tmin) = ray->tmin;
  INTEGRATOR_STATE_WRITE(state, ray, tmax) = ray->tmax;
  INTEGRATOR_STATE_WRITE(state, ray, time) = ray->time;
  INTEGRATOR_STATE_WRITE(state, ray, dP) = ray->dP;
  INTEGRATOR_STATE_WRITE(state, ray, dD) = ray->dD;
}

ccl_device_forceinline void integrator_state_read_ray(ConstIntegratorState state,
                                                      ccl_private Ray *ccl_restrict ray)
{
  ray->P = INTEGRATOR_STATE(state, ray, P);
  ray->D = INTEGRATOR_STATE(state, ray, D);
  ray->tmin = INTEGRATOR_STATE(state, ray, tmin);
  ray->tmax = INTEGRATOR_STATE(state, ray, tmax);
  ray->time = INTEGRATOR_STATE(state, ray, time);
  ray->dP = INTEGRATOR_STATE(state, ray, dP);
  ray->dD = INTEGRATOR_STATE(state, ray, dD);
}

🪴 Quartz 4.0

Explorer

IntegratorState

IntegratorShadowStateCPU

IntegratorStateGPU

KERNEL_STRUCT宏定义

INTEGRATOR_STATE宏定义

kernel_integrator_state

辅助函数

Graph View

Table of Contents

Backlinks