Skip to content

Commit

Permalink
Revert "Fail build program in shared system USM + statefull access ca…
Browse files Browse the repository at this point in the history
…se OCL"

This reverts commit 9dabc2d.

Signed-off-by: Compute-Runtime-Validation <[email protected]>
  • Loading branch information
Compute-Runtime-Validation authored and Compute-Runtime-Automation committed Nov 5, 2021
1 parent a0243eb commit faea791
Show file tree
Hide file tree
Showing 12 changed files with 8 additions and 155 deletions.
10 changes: 2 additions & 8 deletions opencl/source/program/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ cl_int Program::build(
const char *buildOptions,
bool enableCaching) {
cl_int retVal = CL_SUCCESS;

std::string internalOptions;
initInternalOptions(internalOptions);
auto defaultClDevice = deviceVector[0];
UNRECOVERABLE_IF(defaultClDevice == nullptr);
auto &defaultDevice = defaultClDevice->getDevice();
Expand Down Expand Up @@ -68,9 +69,6 @@ cl_int Program::build(
} else if (this->createdFrom != CreatedFrom::BINARY) {
options = "";
}
std::string internalOptions;
initInternalOptions(internalOptions);

extractInternalOptions(options, internalOptions);
applyAdditionalOptions(internalOptions);

Expand Down Expand Up @@ -168,10 +166,6 @@ cl_int Program::build(
phaseReached[clDevice->getRootDeviceIndex()] = BuildPhase::BinaryProcessing;
}

if (containsStatefulAccess(defaultDevice.getRootDeviceIndex()) && forceToStatelessNeeded() && !isBuiltIn) {
retVal = CL_BUILD_PROGRAM_FAILURE;
}

if (retVal != CL_SUCCESS) {
break;
}
Expand Down
33 changes: 1 addition & 32 deletions opencl/source/program/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic
kernelDebugEnabled = clDevices[0]->isDebuggerActive();
}
void Program::initInternalOptions(std::string &internalOptions) const {

auto pClDevice = clDevices[0];
auto force32BitAddressess = pClDevice->getSharedDeviceInfo().force32BitAddressess;
internalOptions = getOclVersionCompilerInternalOption(pClDevice->getEnabledClVersion());
Expand All @@ -75,7 +74,7 @@ void Program::initInternalOptions(std::string &internalOptions) const {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::arch32bit);
}

if ((isBuiltIn && is32bit) || forceToStatelessNeeded() ||
if ((isBuiltIn && is32bit) || pClDevice->areSharedSystemAllocationsAllowed() ||
DebugManager.flags.DisableStatelessToStatefulOptimization.get()) {
CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::greaterThan4gbBuffersRequired);
}
Expand Down Expand Up @@ -137,20 +136,6 @@ Program::~Program() {
}
}

bool Program::forceToStatelessNeeded() const {
auto preferStateful = false;
if (auto it = options.find(NEO::CompilerOptions::smallerThan4gbBuffersOnly.data()); it != std::string::npos) {
preferStateful = true;
}

if (DebugManager.flags.UseSmallerThan4gbBuffersOnly.get() != -1) {
preferStateful = static_cast<bool>(DebugManager.flags.UseSmallerThan4gbBuffersOnly.get());
}

auto forceStateless = !preferStateful && clDevices[0]->areSharedSystemAllocationsAllowed();
return forceStateless;
}

cl_int Program::createProgramFromBinary(
const void *pBinary,
size_t binarySize, ClDevice &clDevice) {
Expand Down Expand Up @@ -504,22 +489,6 @@ cl_int Program::packDeviceBinary(ClDevice &clDevice) {
return CL_SUCCESS;
}

bool Program::containsStatefulAccess(uint32_t rootDeviceIndex) const {
auto &buildInfo = buildInfos[rootDeviceIndex];
for (const auto &kernelInfo : buildInfo.kernelInfoArray) {
for (const auto &arg : kernelInfo->kernelDescriptor.payloadMappings.explicitArgs) {
auto isStatefulAccess = arg.is<ArgDescriptor::ArgTPointer>() &&
(isValidOffset(arg.as<ArgDescPointer>().bindless) ||
isValidOffset(arg.as<ArgDescPointer>().bindful));
if (isStatefulAccess) {
return true;
}
}
}

return false;
}

void Program::setBuildStatus(cl_build_status status) {
for (auto &deviceBuildInfo : deviceBuildInfos) {
deviceBuildInfo.second.buildStatus = status;
Expand Down
5 changes: 0 additions & 5 deletions opencl/source/program/program.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,6 @@ class Program : public BaseObject<_cl_program> {
}

protected:
bool forceToStatelessNeeded() const;

MOCKABLE_VIRTUAL bool containsStatefulAccess(uint32_t rootDeviceIndex) const;

MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice);

cl_int packDeviceBinary(ClDevice &clDevice);
Expand Down Expand Up @@ -370,7 +366,6 @@ class Program : public BaseObject<_cl_program> {

bool isBuiltIn = false;
bool kernelDebugEnabled = false;
bool containsStatefulAccesses = false;
uint32_t maxRootDeviceIndex = std::numeric_limits<uint32_t>::max();
std::mutex lockMutex;
uint32_t exposedKernels = 0;
Expand Down
1 change: 0 additions & 1 deletion opencl/test/unit_test/mocks/mock_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class MockProgram : public Program {
using Program::debugDataSize;
using Program::deviceBuildInfos;
using Program::extractInternalOptions;
using Program::forceToStatelessNeeded;
using Program::getKernelInfo;
using Program::internalOptionsToExtract;
using Program::irBinary;
Expand Down
99 changes: 0 additions & 99 deletions opencl/test/unit_test/program/program_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1653,105 +1653,6 @@ TEST_F(ProgramTests, WhenProgramIsCreatedThenCorrectOclVersionIsInOptions) {
}
}

TEST_F(ProgramTests, whenForceToStatelessNeededIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;

class MyMockProgram : public Program {
public:
using Program::forceToStatelessNeeded;
using Program::options;
using Program::Program;
};

MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));

{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
program.options = "";
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(-1);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_FALSE(program.forceToStatelessNeeded());
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
program.options = "";
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_EQ(program.forceToStatelessNeeded(), pClDevice->areSharedSystemAllocationsAllowed());
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
program.options = "";
EXPECT_FALSE(program.forceToStatelessNeeded());
}
{
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
program.options = "-cl-opt-smaller-than-4GB-buffers-only";
EXPECT_FALSE(program.forceToStatelessNeeded());
}
}

TEST_F(ProgramTests, givenStatefulAndStatelessAccessesWhenProgramBuildIsCalledThenCorrectResultIsReturned) {
DebugManagerStateRestore restorer;

class MyMockProgram : public Program {
public:
using Program::containsStatefulAccess;
using Program::createdFrom;
using Program::irBinary;
using Program::irBinarySize;
using Program::isBuiltIn;
using Program::options;
using Program::Program;
using Program::sourceCode;

bool containsStatefulAccess(uint32_t rootDeviceIndex) const override {
return hasStatefulAccess;
}

bool hasStatefulAccess = false;
};

MyMockProgram program(pContext, false, toClDeviceVector(*pClDevice));
program.isBuiltIn = false;
program.sourceCode = "test_kernel";
program.createdFrom = Program::CreatedFrom::SOURCE;

{
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
}
{
program.hasStatefulAccess = true;
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(1);
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
}
{
program.hasStatefulAccess = true;
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
if (pClDevice->areSharedSystemAllocationsAllowed()) {
EXPECT_EQ(CL_BUILD_PROGRAM_FAILURE, program.build(toClDeviceVector(*pClDevice), nullptr, false));
} else {
EXPECT_EQ(CL_SUCCESS, program.build(toClDeviceVector(*pClDevice), nullptr, false));
}
}
{
MyMockProgram programWithBuiltIn(pContext, true, toClDeviceVector(*pClDevice));
programWithBuiltIn.irBinary.reset(new char[16]);
programWithBuiltIn.irBinarySize = 16;

programWithBuiltIn.isBuiltIn = true;
programWithBuiltIn.hasStatefulAccess = true;
DebugManager.flags.UseSmallerThan4gbBuffersOnly.set(0);
EXPECT_EQ(CL_SUCCESS, programWithBuiltIn.build(toClDeviceVector(*pClDevice), nullptr, false));
}
}

TEST_F(ProgramTests, GivenForcedClVersionWhenProgramIsCreatedThenCorrectOclOptionIsPresent) {
std::pair<unsigned int, std::string> testedValues[] = {
{0, "-ocl-version=120"},
Expand Down
1 change: 0 additions & 1 deletion opencl/test/unit_test/test_files/igdrcl.config
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ ForceCsrFlushing = 0
ForceCsrReprogramming = 0
OmitTimestampPacketDependencies = 0
DisableStatelessToStatefulOptimization = 0
UseSmallerThan4gbBuffersOnly = -1
DisableConcurrentBlockExecution = 0
UseNoRingFlushesKmdMode = 1
DisableZeroCopyForUseHostPtr = 0
Expand Down
1 change: 0 additions & 1 deletion opencl/test/unit_test/test_files/kernel_num_args.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@
*/

__kernel void test(__global float *argGlobal, __read_only image3d_t argImg3D, __constant float *argConst) {
argGlobal[0] = argConst[0];
}
2 changes: 1 addition & 1 deletion opencl/test/unit_test/test_files/test_constant_memory.cl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ __constant uint constant_a[2] = {0xabcd5432u, 0xaabb5533u};
__kernel void test(__global uint *in, __global uint *out) {
int i = get_global_id(0);
int j = get_global_id(0) % (sizeof(constant_a) / sizeof(constant_a[0]));
in[0] = 0;

out[i] = constant_a[j];
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
namespace NEO {
namespace CompilerOptions {
static constexpr ConstStringRef greaterThan4gbBuffersRequired = "-cl-intel-greater-than-4GB-buffer-required";
static constexpr ConstStringRef smallerThan4gbBuffersOnly = "-cl-opt-smaller-than-4GB-buffers-only";
static constexpr ConstStringRef hasBufferOffsetArg = "-cl-intel-has-buffer-offset-arg";
static constexpr ConstStringRef kernelDebugEnable = "-cl-kernel-debug-enable";
static constexpr ConstStringRef arch32bit = "-m32";
Expand Down
1 change: 0 additions & 1 deletion shared/source/debug_settings/debug_variables_base.inl
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ DECLARE_DEBUG_VARIABLE(bool, DisableStatelessToStatefulOptimization, false, "Dis
DECLARE_DEBUG_VARIABLE(bool, DisableConcurrentBlockExecution, false, "disables concurrent block kernel execution")
DECLARE_DEBUG_VARIABLE(bool, UseNoRingFlushesKmdMode, true, "Windows only, passes flag to KMD that informs KMD to not emit any ring buffer flushes.")
DECLARE_DEBUG_VARIABLE(bool, DisableZeroCopyForUseHostPtr, false, "When active all buffer allocations created with CL_MEM_USE_HOST_PTR flag will not share memory with CPU.")
DECLARE_DEBUG_VARIABLE(int32_t, UseSmallerThan4gbBuffersOnly, -1, " -1: default, 0: disabled, 1: enabled. When enabled driver will not force stateless accesses when shared system USM is active")
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")
Expand Down
4 changes: 4 additions & 0 deletions shared/source/helpers/compiler_hw_info_config_base.inl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
#include "shared/source/helpers/compiler_hw_info_config.h"

namespace NEO {
template <PRODUCT_FAMILY gfxProduct>
bool CompilerHwInfoConfigHw<gfxProduct>::isForceToStatelessRequired() const {
return false;
}

template <PRODUCT_FAMILY gfxProduct>
bool CompilerHwInfoConfigHw<gfxProduct>::isForceEmuInt32DivRemSPRequired() const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,4 @@ bool CompilerHwInfoConfigHw<gfxProduct>::isMidThreadPreemptionSupported(const Ha
return hwInfo.featureTable.ftrGpGpuMidThreadLevelPreempt;
}

template <PRODUCT_FAMILY gfxProduct>
bool CompilerHwInfoConfigHw<gfxProduct>::isForceToStatelessRequired() const {
return false;
}

} // namespace NEO

0 comments on commit faea791

Please sign in to comment.