diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 7e6e2a3bb8278..241f29e95752a 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -107,6 +107,7 @@ class DirectSubmissionHw { GraphicsAllocation *switchRingBuffersAllocations(); virtual uint64_t updateTagValue() = 0; virtual void getTagAddressValue(TagData &tagData) = 0; + void unblockGpu(); void cpuCachelineFlush(void *ptr, size_t size); diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index de57f11e4b007..cf59f1f7affda 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -159,6 +159,19 @@ bool DirectSubmissionHw::makeResourcesResident(DirectSubm return ret; } +template +inline void DirectSubmissionHw::unblockGpu() { + if (sfenceMode >= DirectSubmissionSfenceMode::BeforeSemaphoreOnly) { + CpuIntrinsics::sfence(); + } + + semaphoreData->QueueWorkCount = currentQueueWorkCount; + + if (sfenceMode == DirectSubmissionSfenceMode::BeforeAndAfterSemaphore) { + CpuIntrinsics::sfence(); + } +} + template inline void DirectSubmissionHw::cpuCachelineFlush(void *ptr, size_t size) { if (disableCpuCacheFlush) { @@ -273,8 +286,7 @@ bool DirectSubmissionHw::stopRingBuffer() { EncodeNoop::alignToCacheLine(ringCommandStream); cpuCachelineFlush(flushPtr, getSizeEnd()); - - semaphoreData->QueueWorkCount = currentQueueWorkCount; + this->unblockGpu(); cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); this->handleStopRingBuffer(); @@ -478,16 +490,7 @@ bool DirectSubmissionHw::dispatchCommandBuffer(BatchBuffe reserved = *ringBufferStart; } - if (sfenceMode >= DirectSubmissionSfenceMode::BeforeSemaphoreOnly) { - CpuIntrinsics::sfence(); - } - - //unblock GPU - semaphoreData->QueueWorkCount = currentQueueWorkCount; - - if (sfenceMode == DirectSubmissionSfenceMode::BeforeAndAfterSemaphore) { - CpuIntrinsics::sfence(); - } + this->unblockGpu(); cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize); currentQueueWorkCount++; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 22f6062675392..8281426eb0299 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -879,3 +879,26 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWor EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter); } } + +HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenStoppingRingbufferThenProgramSfenceInstruction) { + DebugManagerStateRestore restorer{}; + + using Dispatcher = BlitterDispatcher; + + FlushStampTracker flushStamp(true); + + for (int32_t debugFlag : {-1, 0, 1, 2}) { + DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag); + + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + EXPECT_TRUE(directSubmission.initialize(true, true)); + + auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load(); + + EXPECT_TRUE(directSubmission.stopRingBuffer()); + + uint32_t expectedCount = (debugFlag == -1) ? 2 : static_cast(debugFlag); + + EXPECT_EQ(initialCounterValue + expectedCount, CpuIntrinsicsTests::sfenceCounter); + } +} \ No newline at end of file