trimesh-ray-optix/ray_8cpp_source.html

#include "ray.h"

#include "ATen/core/TensorBody.h"

#include "ATen/ops/where.h"

#include "CUDABuffer.h"

#include "LaunchParams.h"

#include "base.h"

#include "c10/core/Layout.h"

#include "c10/core/ScalarType.h"

#include "c10/core/TensorOptions.h"

#include "c10/util/ArrayRef.h"

#include "optix8.h"

#include "optix_host.h"

#include "optix_types.h"

#include "sbtdef.h"

#include "type.h"

#include <limits>


namespace hmesh {


void OptixAccelStructureWrapperCPP::buildAccelStructure(torch::Tensor vertices,

                                                        torch::Tensor faces) {

    OptixAccelBuildOptions buildOptions = {};

    OptixBuildInput buildInput = {};


    // CUdeviceptr tempBuffer, outputBuffer;

    size_t tempBufferSizeInBytes, outputBufferSizeInBytes;


    buildOptions.buildFlags = OPTIX_BUILD_FLAG_NONE |

                              OPTIX_BUILD_FLAG_ALLOW_COMPACTION |

                              OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS;

    buildOptions.operation = OPTIX_BUILD_OPERATION_BUILD;

    buildOptions.motionOptions.numKeys = 0;


    CUdeviceptr pVert = (CUdeviceptr)vertices.data_ptr();

    CUdeviceptr pFace = (CUdeviceptr)faces.data_ptr();


    buildInput.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;

    buildInput.triangleArray.vertexBuffers = &pVert;

    buildInput.triangleArray.numVertices = vertices.size(0);

    buildInput.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;

    buildInput.triangleArray.vertexStrideInBytes = sizeof(vec3f);

    buildInput.triangleArray.indexBuffer = pFace;

    buildInput.triangleArray.numIndexTriplets = faces.size(0);

    buildInput.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;

    buildInput.triangleArray.indexStrideInBytes = sizeof(vec3i);

    buildInput.triangleArray.preTransform = 0;


    buildInput.triangleArray.numSbtRecords = 1;

    buildInput.triangleArray.sbtIndexOffsetBuffer = 0;

    buildInput.triangleArray.sbtIndexOffsetSizeInBytes = 0;

    buildInput.triangleArray.sbtIndexOffsetStrideInBytes = 0;


    uint32_t triangleBuildFlags =

        OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;

    buildInput.triangleArray.flags = &triangleBuildFlags;


    OptixAccelBufferSizes bufferSizes = {};

    OPTIX_CHECK(optixAccelComputeMemoryUsage(optixContext, &buildOptions,

                                             &buildInput, 1, &bufferSizes));


    CUDABuffer tempBuffer;

    CUDABuffer accelStructureBuffer;

    accelStructureBuffer.alloc(bufferSizes.outputSizeInBytes);

    tempBuffer.alloc(bufferSizes.tempSizeInBytes);


    CUDABuffer compactedSizeBuffer;

    compactedSizeBuffer.alloc(sizeof(uint64_t));

    OptixAccelEmitDesc emitDesc;

    emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;

    emitDesc.result = compactedSizeBuffer.d_pointer();


    OPTIX_CHECK(optixAccelBuild(

        optixContext, cuStream, &buildOptions, &buildInput, 1,

        (CUdeviceptr)tempBuffer.d_ptr, tempBuffer.sizeInBytes,

        (CUdeviceptr)accelStructureBuffer.d_ptr,

        accelStructureBuffer.sizeInBytes, &asHandle, &emitDesc, 1));


    CUDA_SYNC_CHECK();


    uint64_t compactedSize;

    compactedSizeBuffer.download(&compactedSize, 1);

    asBuffer.resize(compactedSize);


    OPTIX_CHECK(optixAccelCompact(optixContext, cuStream, asHandle,

                                  asBuffer.d_pointer(), compactedSize,

                                  &asHandle));


    CUDA_SYNC_CHECK();


    compactedSizeBuffer.free();

    tempBuffer.free();

    accelStructureBuffer.free();

}


void OptixAccelStructureWrapperCPP::freeAccelStructure() { asBuffer.free(); }


template <typename... Ts> inline bool tensorInputCheck(Ts... ts) {

    bool valid = true;

    (

        [&] {

            if (!ts.is_cuda()) {

                std::cerr << "error in file " << __FILE__ << " line "

                          << __LINE__

                          << ": input tensors must reside in cuda device.\n";

                valid = false;

            }

            if (ts.layout() != torch::kStrided) {

                std::cerr << "error in file " << __FILE__ << " line "

                          << __LINE__

                          << ": input tensor layout must be torch::kStrided.\n";

                valid = false;

            }

        }(),

        ...);

    return valid;

}


inline std::vector<int64_t> removeLastDim(const c10::IntArrayRef dims) {

    auto ref = dims.vec();

    ref.pop_back();

    return ref;

}


inline size_t prod(const std::vector<int64_t> &dims) {

    size_t p = 1;

    for (auto s : dims)

        p *= s;

    return p;

}


inline std::vector<int64_t> changeLastDim(const c10::IntArrayRef dims,

                                       size_t value) {

    std::vector<int64_t> dimsVec;

    for (auto s : dims)

        dimsVec.push_back(s);

    *(dimsVec.end() - 1) = value;

    return dimsVec;

}


template <typename T> inline T *data_ptr(const torch::Tensor &t) {

    return (T *)t.data_ptr();

}


template <typename T>


void fillArray(T *dst, c10::ArrayRef<T> src, T defaultValue) {

    int i = 0;

    const int src_size = src.size();

    for (; i < MAX_SIZE_LENGTH - src_size; i++)

        dst[i] = defaultValue;

    for (; i < MAX_SIZE_LENGTH; i++)

        dst[i] = src[i + src_size - MAX_SIZE_LENGTH];

}


torch::Tensor intersectsAny(OptixAccelStructureWrapperCPP as,

                            const torch::Tensor &origins,

                            const torch::Tensor &directions) {

    if (!tensorInputCheck(origins, directions))

        return {};

    // output buffer

    auto options =

        torch::TensorOptions().dtype(torch::kBool).device(torch::kCUDA);

    auto resultSize = removeLastDim(origins.sizes());

    auto nray = prod(resultSize);

    auto result = torch::empty(resultSize, options);

    // fill launch params

    LaunchParams lp = {};

    lp.rays.origins = data_ptr<float>(origins);

    lp.rays.directions = data_ptr<float>(directions);

    lp.rays.nray = nray;

    fillArray(lp.rays.rayShape, origins.sizes(), std::numeric_limits<int64_t>::max());

    fillArray(lp.rays.originsStride, origins.strides(), (int64_t) 0);

    fillArray(lp.rays.directionsStride, directions.strides(), (int64_t) 0);

    lp.traversable = as.asHandle;

    lp.results.hit = data_ptr<bool>(result);

    CUDABuffer lpBuffer;

    lpBuffer.alloc_and_upload(&lp, 1);

    optixLaunch(optixPipelines[SBTType::INTERSECTS_ANY], cuStream,

                lpBuffer.d_pointer(), sizeof(lp),

                &sbts[SBTType::INTERSECTS_ANY], lp.rays.nray, 1, 1);

    lpBuffer.free();

    return result;

}


torch::Tensor intersectsFirst(OptixAccelStructureWrapperCPP as,

                              const torch::Tensor &origins,

                              const torch::Tensor &directions) {

    if (!tensorInputCheck(origins, directions))

        return {};

    // output buffer

    auto options =

        torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA);

    auto resultSize = removeLastDim(origins.sizes());

    auto nray = prod(resultSize);

    auto result = torch::empty(resultSize, options);

    // fill launch params

    LaunchParams lp = {};

    lp.rays.origins = data_ptr<float>(origins);

    lp.rays.directions = data_ptr<float>(directions);

    lp.rays.nray = nray;

    fillArray(lp.rays.rayShape, origins.sizes(), std::numeric_limits<int64_t>::max());

    fillArray(lp.rays.originsStride, origins.strides(), (int64_t) 0);

    fillArray(lp.rays.directionsStride, directions.strides(), (int64_t) 0);

    lp.traversable = as.asHandle;

    lp.results.triIdx = data_ptr<int>(result);

    CUDABuffer lpBuffer;

    lpBuffer.alloc_and_upload(&lp, 1);

    optixLaunch(optixPipelines[SBTType::INTERSECTS_FIRST], cuStream,

                lpBuffer.d_pointer(), sizeof(lp),

                &sbts[SBTType::INTERSECTS_FIRST], lp.rays.nray, 1, 1);

    lpBuffer.free();

    return result;

}


std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor,

           torch::Tensor>


intersectsClosest(OptixAccelStructureWrapperCPP as, torch::Tensor origins,

                  torch::Tensor directions) {

    if (!tensorInputCheck(origins, directions))

        return {};

    // output buffers

    // hitmask buffer

    auto hitbufOptions =

        torch::TensorOptions().dtype(torch::kBool).device(torch::kCUDA);

    auto hitbufSize = removeLastDim(origins.sizes());

    auto hitbuf = torch::empty(hitbufSize, hitbufOptions);

    // front hit buffer

    auto frontbuf = torch::empty(hitbufSize, hitbufOptions);

    // triangle index buffer

    auto tibufOptions =

        torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA);

    auto tibufSize = removeLastDim(origins.sizes());

    auto tibuf = torch::empty(tibufSize, tibufOptions);

    // intersect location buffer

    auto locbufOptions =

        torch::TensorOptions().dtype(torch::kFloat).device(torch::kCUDA);

    auto locbufSize = changeLastDim(origins.sizes(), 3);

    auto locbuf = torch::empty(locbufSize, locbufOptions);

    // uv buffer

    auto uvbufOptions =

        torch::TensorOptions().dtype(torch::kFloat).device(torch::kCUDA);

    auto uvbufSize = changeLastDim(origins.sizes(), 2);

    auto uvbuf = torch::empty(uvbufSize, uvbufOptions);

    auto nray = prod(hitbufSize);


    // fill and upload launchParams

    LaunchParams lp = {};

    lp.rays.nray = nray;

    lp.rays.origins = data_ptr<float>(origins);

    lp.rays.directions = data_ptr<float>(directions);

    fillArray(lp.rays.rayShape, origins.sizes(), std::numeric_limits<int64_t>::max());

    fillArray(lp.rays.originsStride, origins.strides(), (int64_t) 0);

    fillArray(lp.rays.directionsStride, directions.strides(), (int64_t) 0);


    lp.results.hit = data_ptr<bool>(hitbuf);

    lp.results.location = data_ptr<float3>(locbuf);

    lp.results.triIdx = data_ptr<int>(tibuf);

    lp.results.uv = data_ptr<float2>(uvbuf);

    lp.results.front = data_ptr<bool>(frontbuf);


    lp.traversable = as.asHandle;


    CUDABuffer lpBuffer;

    lpBuffer.alloc_and_upload(&lp, 1);


    // 启动！

    optixLaunch(optixPipelines[SBTType::INTERSECTS_CLOSEST], cuStream,

                lpBuffer.d_pointer(), sizeof(lp),

                &sbts[SBTType::INTERSECTS_CLOSEST], nray, 1, 1);


    lpBuffer.free();

    return {hitbuf, frontbuf, tibuf, locbuf, uvbuf};

}


torch::Tensor intersectsCount(OptixAccelStructureWrapperCPP as,

                              torch::Tensor origins, torch::Tensor directions) {

    if (!tensorInputCheck(origins, directions))

        return {};

    // first pass - get the intersection count

    auto hitCountBufSize = removeLastDim(origins.sizes());

    auto hitCountBufOptions =

        torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA);

    auto hitCountBuf =

        torch::zeros(hitCountBufSize, hitCountBufOptions).contiguous();

    auto nray = prod(hitCountBufSize);


    LaunchParams lp = {};

    lp.rays.nray = nray;

    lp.rays.origins = data_ptr<float>(origins);

    lp.rays.directions = data_ptr<float>(directions);

    fillArray(lp.rays.rayShape, origins.sizes(), std::numeric_limits<int64_t>::max());

    fillArray(lp.rays.originsStride, origins.strides(), (int64_t) 0);

    fillArray(lp.rays.directionsStride, directions.strides(), (int64_t) 0);

    lp.results.hitCount = data_ptr<int>(hitCountBuf);

    lp.traversable = as.asHandle;


    CUDABuffer lpBuffer;

    lpBuffer.alloc_and_upload(&lp, 1);


    optixLaunch(optixPipelines[SBTType::INTERSECTS_COUNT], cuStream,

                lpBuffer.d_pointer(), sizeof(lp),

                &sbts[SBTType::INTERSECTS_COUNT], nray, 1, 1);


    lpBuffer.free();

    return hitCountBuf;

}


std::tuple<torch::Tensor, torch::Tensor, torch::Tensor>


intersectsLocation(OptixAccelStructureWrapperCPP as, torch::Tensor origins,

                   torch::Tensor directions) {

    if (!tensorInputCheck(origins, directions))

        return {};

    // first pass - get the intersection count

    auto hitCountBuf = intersectsCount(as, origins, directions);


    // second pass

    hitCountBuf = hitCountBuf.flatten();

    hitCountBuf = torch::where(hitCountBuf <= MAX_ANYHIT_SIZE, hitCountBuf,

                               MAX_ANYHIT_SIZE);

    auto globalIdxBuf = hitCountBuf.cumsum(0);

    auto globalIdxBufOptions =

        torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA);

    auto nhits = globalIdxBuf[-1].item<int>();

    globalIdxBuf = torch::cat({torch::zeros({1}, globalIdxBufOptions),

                               torch::slice(hitCountBuf, 0, 0, -1)});

    // hit location

    auto locbufOptions =

        torch::TensorOptions().dtype(torch::kFloat).device(torch::kCUDA);

    auto locbuf = torch::empty({nhits, 3}, locbufOptions);

    auto idxbufOptions =

        torch::TensorOptions().dtype(torch::kInt).device(torch::kCUDA);

    auto tibuf = torch::empty({nhits}, idxbufOptions);

    auto ribuf = torch::empty({nhits}, idxbufOptions);


    auto nray = prod(removeLastDim(origins.sizes()));


    LaunchParams lp = {};

    lp.traversable = as.asHandle;

    lp.rays.nray = nray;

    lp.rays.origins = data_ptr<float>(origins);

    lp.rays.directions = data_ptr<float>(directions);

    lp.rays.hitCounts = data_ptr<int>(hitCountBuf);

    lp.rays.globalIdx = data_ptr<int>(globalIdxBuf);

    fillArray(lp.rays.rayShape, origins.sizes(), std::numeric_limits<int64_t>::max());

    fillArray(lp.rays.originsStride, origins.strides(), (int64_t) 0);

    fillArray(lp.rays.directionsStride, directions.strides(), (int64_t) 0);

    lp.results.hitCount = data_ptr<int>(hitCountBuf);

    lp.results.location = data_ptr<float3>(locbuf);

    lp.results.triIdx = data_ptr<int>(tibuf);

    lp.results.rayIdx = data_ptr<int>(ribuf);


    CUDABuffer lpBuffer;

    lpBuffer.alloc_and_upload(&lp, 1);


    optixLaunch(optixPipelines[SBTType::INTERSECTS_LOCATION],

                cuStream, lpBuffer.d_pointer(), sizeof(lp),

                &sbts[SBTType::INTERSECTS_LOCATION], nray, 1, 1);


    lpBuffer.free();

    return {locbuf, ribuf, tibuf};

}


} // namespace hmesh

CUDABuffer.h

LaunchParams.h

base.h

hmesh
Definition base.cpp:13

hmesh::sbts
OptixShaderBindingTable sbts[SBTType::count]
Definition base.cpp:22

hmesh::MAX_ANYHIT_SIZE
constexpr int MAX_ANYHIT_SIZE
Definition LaunchParams.h:8

hmesh::data_ptr
T * data_ptr(const torch::Tensor &t)
Definition ray.cpp:147

hmesh::MAX_SIZE_LENGTH
constexpr int MAX_SIZE_LENGTH
Definition LaunchParams.h:9

hmesh::intersectsLocation
std::tuple< torch::Tensor, torch::Tensor, torch::Tensor > intersectsLocation(OptixAccelStructureWrapperCPP as, torch::Tensor origins, torch::Tensor directions)
Definition ray.cpp:325

hmesh::intersectsFirst
torch::Tensor intersectsFirst(OptixAccelStructureWrapperCPP as, const torch::Tensor &origins, const torch::Tensor &dirs)
Definition ray.cpp:191

hmesh::optixPipelines
OptixPipeline optixPipelines[SBTType::count]
Definition base.cpp:23

hmesh::intersectsClosest
std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor > intersectsClosest(OptixAccelStructureWrapperCPP as, torch::Tensor origins, torch::Tensor directions)
Find if ray hits any triangle and return ray index, triangle index, hit location and uv.
Definition ray.cpp:233

hmesh::prod
size_t prod(const std::vector< int64_t > &dims)
Definition ray.cpp:131

hmesh::intersectsCount
torch::Tensor intersectsCount(OptixAccelStructureWrapperCPP as, torch::Tensor origins, torch::Tensor directions)
Definition ray.cpp:291

hmesh::removeLastDim
std::vector< int64_t > removeLastDim(const c10::IntArrayRef dims)
Definition ray.cpp:125

hmesh::fillArray
void fillArray(T *dst, c10::ArrayRef< T > src, T defaultValue)
Definition ray.cpp:152

hmesh::optixContext
OptixDeviceContext optixContext
Definition base.cpp:17

hmesh::changeLastDim
std::vector< int64_t > changeLastDim(const c10::IntArrayRef dims, size_t value)
Definition ray.cpp:138

hmesh::intersectsAny
torch::Tensor intersectsAny(OptixAccelStructureWrapperCPP as, const torch::Tensor &origins, const torch::Tensor &dirs)
Definition ray.cpp:161

hmesh::cuStream
CUstream cuStream
Definition base.cpp:16

hmesh::vec3f
vec< float, 3 > vec3f
Definition type.h:12

hmesh::vec3i
vec< int32_t, 3 > vec3i
Definition type.h:11

hmesh::tensorInputCheck
bool tensorInputCheck(Ts... ts)
Definition ray.cpp:104

optix8.h

OPTIX_CHECK
#define OPTIX_CHECK(call)
Definition optix8.h:41

CUDA_SYNC_CHECK
#define CUDA_SYNC_CHECK()
Definition optix8.h:51

ray.h

sbtdef.h

INTERSECTS_LOCATION
@ INTERSECTS_LOCATION
Definition sbtdef.h:14

INTERSECTS_ANY
@ INTERSECTS_ANY
Definition sbtdef.h:10

INTERSECTS_FIRST
@ INTERSECTS_FIRST
Definition sbtdef.h:11

INTERSECTS_COUNT
@ INTERSECTS_COUNT
Definition sbtdef.h:13

INTERSECTS_CLOSEST
@ INTERSECTS_CLOSEST
Definition sbtdef.h:12

hmesh::CUDABuffer
Definition CUDABuffer.h:28

hmesh::CUDABuffer::sizeInBytes
size_t sizeInBytes
Definition CUDABuffer.h:76

hmesh::CUDABuffer::d_ptr
void * d_ptr
Definition CUDABuffer.h:77

hmesh::CUDABuffer::d_pointer
CUdeviceptr d_pointer() const
Definition CUDABuffer.h:29

hmesh::CUDABuffer::free
void free()
free allocated memory
Definition CUDABuffer.h:46

hmesh::CUDABuffer::resize
void resize(size_t size)
re-size buffer to given number of bytes
Definition CUDABuffer.h:32

hmesh::CUDABuffer::download
void download(T *t, size_t count)
Definition CUDABuffer.h:69

hmesh::CUDABuffer::alloc
void alloc(size_t size)
allocate to given number of bytes
Definition CUDABuffer.h:39

hmesh::CUDABuffer::alloc_and_upload
void alloc_and_upload(const std::vector< T > &vt)
Definition CUDABuffer.h:52

hmesh::LPResult::rayIdx
int * rayIdx
Definition LaunchParams.h:37

hmesh::LPResult::front
bool * front
Definition LaunchParams.h:34

hmesh::LPResult::hit
bool * hit
Definition LaunchParams.h:33

hmesh::LPResult::uv
float2 * uv
Definition LaunchParams.h:35

hmesh::LPResult::hitCount
int * hitCount
Definition LaunchParams.h:36

hmesh::LPResult::location
float3 * location
Definition LaunchParams.h:31

hmesh::LPResult::triIdx
int * triIdx
Definition LaunchParams.h:32

hmesh::LaunchParams
Definition LaunchParams.h:40

hmesh::LaunchParams::rays
RayInput rays
Definition LaunchParams.h:42

hmesh::LaunchParams::results
LPResult results
Definition LaunchParams.h:44

hmesh::LaunchParams::traversable
OptixTraversableHandle traversable
Definition LaunchParams.h:46

hmesh::OptixAccelStructureWrapperCPP
Definition ray.h:11

hmesh::OptixAccelStructureWrapperCPP::asBuffer
CUDABuffer asBuffer
Definition ray.h:13

hmesh::OptixAccelStructureWrapperCPP::buildAccelStructure
void buildAccelStructure(torch::Tensor vertices, torch::Tensor faces)
Definition ray.cpp:27

hmesh::OptixAccelStructureWrapperCPP::asHandle
OptixTraversableHandle asHandle
Definition ray.h:12

hmesh::OptixAccelStructureWrapperCPP::freeAccelStructure
void freeAccelStructure()
Definition ray.cpp:102

hmesh::RayInput::hitCounts
int * hitCounts
Definition LaunchParams.h:25

hmesh::RayInput::globalIdx
int * globalIdx
Definition LaunchParams.h:27

hmesh::RayInput::rayShape
int64_t rayShape[MAX_SIZE_LENGTH]
Definition LaunchParams.h:15

hmesh::RayInput::directions
float * directions
Definition LaunchParams.h:21

hmesh::RayInput::directionsStride
int64_t directionsStride[MAX_SIZE_LENGTH]
Definition LaunchParams.h:23

hmesh::RayInput::origins
float * origins
Definition LaunchParams.h:17

hmesh::RayInput::nray
size_t nray
Definition LaunchParams.h:13

hmesh::RayInput::originsStride
int64_t originsStride[MAX_SIZE_LENGTH]
Definition LaunchParams.h:19

type.h