diff --git a/TracyOpenCL.hpp b/TracyOpenCL.hpp index 8026d0cb..6197ddda 100644 --- a/TracyOpenCL.hpp +++ b/TracyOpenCL.hpp @@ -64,15 +64,40 @@ namespace tracy { , m_head(0) , m_tail(0) { + int64_t tcpu, tgpu; assert(m_contextId != 255); - m_hostStartTime = Profiler::GetTime(); - m_deviceStartTime = GetDeviceTimestamp(context, device); + cl_int err = CL_SUCCESS; + cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); + assert(err == CL_SUCCESS); + uint32_t dummyValue = 42; + cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err); + assert(err == CL_SUCCESS); + cl_event writeBufferEvent; + err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent); + assert(err == CL_SUCCESS); + err = clWaitForEvents(1, &writeBufferEvent); + + tcpu = Profiler::GetTime(); + + assert(err == CL_SUCCESS); + cl_int eventStatus; + err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); + assert(err == CL_SUCCESS); + assert(eventStatus == CL_COMPLETE); + err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr); + assert(err == CL_SUCCESS); + err = clReleaseEvent(writeBufferEvent); + assert(err == CL_SUCCESS); + err = clReleaseMemObject(dummyBuffer); + assert(err == CL_SUCCESS); + err = clReleaseCommandQueue(queue); + assert(err == CL_SUCCESS); auto item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuNewContext); - MemWrite(&item->gpuNewContext.cpuTime, m_hostStartTime); - MemWrite(&item->gpuNewContext.gpuTime, m_hostStartTime); + MemWrite(&item->gpuNewContext.cpuTime, tcpu); + MemWrite(&item->gpuNewContext.gpuTime, tgpu); memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1.0f); MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); @@ -117,7 +142,7 @@ namespace tracy { auto item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuTime); - MemWrite(&item->gpuTime.gpuTime, TimestampOffset(eventTimeStamp)); + MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp); MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail); MemWrite(&item->gpuTime.context, m_contextId); Profiler::QueueSerialFinish(); @@ -154,50 +179,6 @@ namespace tracy { } private: - tracy_force_inline int64_t GetHostStartTime() const - { - return m_hostStartTime; - } - - tracy_force_inline int64_t GetDeviceStartTime() const - { - return m_deviceStartTime; - } - - tracy_force_inline int64_t TimestampOffset(int64_t deviceTimestamp) const - { - return m_hostStartTime + (deviceTimestamp - m_deviceStartTime); - } - - tracy_force_inline int64_t GetDeviceTimestamp(cl_context context, cl_device_id device) const - { - cl_ulong deviceTimestamp = 0; - cl_int err = CL_SUCCESS; - cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); - assert(err == CL_SUCCESS); - uint32_t dummyValue = 42; - cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err); - assert(err == CL_SUCCESS); - cl_event writeBufferEvent; - err = clEnqueueWriteBuffer(queue, dummyBuffer, CL_TRUE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent); - assert(err == CL_SUCCESS); - err = clWaitForEvents(1, &writeBufferEvent); - assert(err == CL_SUCCESS); - cl_int eventStatus; - err = clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); - assert(err == CL_SUCCESS); - assert(eventStatus == CL_COMPLETE); - err = clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &deviceTimestamp, nullptr); - assert(err == CL_SUCCESS); - err = clReleaseEvent(writeBufferEvent); - assert(err == CL_SUCCESS); - err = clReleaseMemObject(dummyBuffer); - assert(err == CL_SUCCESS); - err = clReleaseCommandQueue(queue); - assert(err == CL_SUCCESS); - - return (int64_t)deviceTimestamp; - } unsigned int m_contextId; @@ -205,8 +186,6 @@ namespace tracy { unsigned int m_head; unsigned int m_tail; - int64_t m_hostStartTime; - int64_t m_deviceStartTime; }; class OpenCLCtxScope {