Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
mProcessingSettings->clusterizerZSSanityCheck = mProcessingSettings->mergerSanityCheck = mProcessingSettings->outputSanityCheck = true;
}

static_cast<GPUSettingsProcessingScaling&>(*mMemoryScalers) = GetProcessingSettings().scaling;
mMemoryScalers->scalingFactor = GetProcessingSettings().memoryScalingFactor;
mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate;
mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0;
if (GetProcessingSettings().forceMaxMemScalers > 1) {
mMemoryScalers->rescaleMaxMem(GetProcessingSettings().forceMaxMemScalers);
Expand Down
19 changes: 18 additions & 1 deletion GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory)
availableMemory = newAvailableMemory;
}

double GPUMemorySizeScalers::getScalingFactor()
float GPUMemorySizeScalers::getScalingFactor()
{
if (!doFuzzing) {
return scalingFactor;
Expand All @@ -62,3 +62,20 @@ void GPUMemorySizeScalers::fuzzScalingFactor(uint64_t seed)
fuzzSeed = seed;
doFuzzing = true;
}

size_t GPUMemorySizeScalers::getValue(size_t maxVal, size_t val)
{
return returnMaxVal ? maxVal : (std::min<size_t>(maxVal, offset + val) * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor);
}

size_t GPUMemorySizeScalers::NTPCPeaks(size_t tpcDigits, bool perSector) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); }
size_t GPUMemorySizeScalers::NTPCClusters(size_t tpcDigits, bool perSector) { return getValue(perSector ? tpcMaxSectorClusters : tpcMaxClusters, (conservativeMemoryEstimate ? 1.0 : tpcClustersPerPeak) * NTPCPeaks(tpcDigits, perSector)); }
size_t GPUMemorySizeScalers::NTPCStartHits(size_t tpcHits) { return getValue(tpcMaxStartHits, tpcHits * tpcStartHitsPerHit); }
size_t GPUMemorySizeScalers::NTPCRowStartHits(size_t tpcHits) { return getValue(tpcMaxRowStartHits, std::max<size_t>(NTPCStartHits(tpcHits) * (tpcHits < 30000000 ? 20 : 12) / GPUCA_ROW_COUNT, tpcMinRowStartHits)); }
size_t GPUMemorySizeScalers::NTPCTracklets(size_t tpcHits, bool lowField) { return getValue(tpcMaxTracklets, NTPCStartHits(tpcHits) * (lowField ? tpcTrackletsPerStartHitLowField : tpcTrackletsPerStartHit)); }
size_t GPUMemorySizeScalers::NTPCTrackletHits(size_t tpcHits, bool lowField) { return getValue(tpcMaxTrackletHits, hitOffset + tpcHits * (lowField ? tpcTrackletHitsPerHitLowField : tpcTrackletHitsPerHit)); }
size_t GPUMemorySizeScalers::NTPCSectorTracks(size_t tpcHits) { return getValue(tpcMaxSectorTracks, tpcHits * tpcSectorTracksPerHit); }
size_t GPUMemorySizeScalers::NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); }
size_t GPUMemorySizeScalers::NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservativeMemoryEstimate ? 1.0 : tpcMergedTrackPerSectorTrack)); }
size_t GPUMemorySizeScalers::NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); }
size_t GPUMemorySizeScalers::NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservativeMemoryEstimate) ? 1024 : std::min<size_t>(1024, tpcCompressedUnattachedHitsBase1024[type] * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); }
57 changes: 17 additions & 40 deletions GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,42 +16,23 @@
#define O2_GPU_GPUMEMORYSIZESCALERS_H

#include "GPUDef.h"
#include "GPUSettings.h"

namespace o2::gpu
{

struct GPUMemorySizeScalers {
struct GPUMemorySizeScalers : public GPUSettingsProcessingScaling {
// Input sizes
size_t nTPCdigits = 0;
size_t nTPCHits = 0;
size_t nTRDTracklets = 0;
size_t nITSTracks = 0;

// General scaling factor
double scalingFactor = 1;
float scalingFactor = 1;
uint64_t fuzzSeed = 0;
uint64_t fuzzLimit = 0;
double temporaryFactor = 1;
bool conservative = 0;

// Offset
double offset = 1000.;
double hitOffset = 20000;

// Scaling Factors
double tpcPeaksPerDigit = 0.2;
double tpcClustersPerPeak = 0.9;
double tpcStartHitsPerHit = 0.08;
double tpcTrackletsPerStartHit = 0.8;
double tpcTrackletsPerStartHitLowField = 0.85;
double tpcTrackletHitsPerHit = 5;
double tpcTrackletHitsPerHitLowField = 7;
double tpcSectorTracksPerHit = 0.02;
double tpcSectorTrackHitsPerHit = 0.8;
double tpcSectorTrackHitsPerHitWithRejection = 1.0;
double tpcMergedTrackPerSectorTrack = 1.0;
double tpcMergedTrackHitPerSectorHit = 1.1;
size_t tpcCompressedUnattachedHitsBase1024[3] = {900, 900, 500}; // No ratio, but integer fraction of 1024 for exact computation
float temporaryFactor = 1;

// Upper limits
size_t tpcMaxPeaks = 20000000;
Expand All @@ -71,24 +52,20 @@ struct GPUMemorySizeScalers {
bool doFuzzing = false;

void rescaleMaxMem(size_t newAvailableMemory);
double getScalingFactor();
float getScalingFactor();
void fuzzScalingFactor(uint64_t seed);
inline size_t getValue(size_t maxVal, size_t val)
{
return returnMaxVal ? maxVal : (std::min<size_t>(maxVal, offset + val) * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor);
}

inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); }
inline size_t NTPCClusters(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxSectorClusters : tpcMaxClusters, (conservative ? 1.0 : tpcClustersPerPeak) * NTPCPeaks(tpcDigits, perSector)); }
inline size_t NTPCStartHits(size_t tpcHits) { return getValue(tpcMaxStartHits, tpcHits * tpcStartHitsPerHit); }
inline size_t NTPCRowStartHits(size_t tpcHits) { return getValue(tpcMaxRowStartHits, std::max<size_t>(NTPCStartHits(tpcHits) * (tpcHits < 30000000 ? 20 : 12) / GPUCA_ROW_COUNT, tpcMinRowStartHits)); }
inline size_t NTPCTracklets(size_t tpcHits, bool lowField) { return getValue(tpcMaxTracklets, NTPCStartHits(tpcHits) * (lowField ? tpcTrackletsPerStartHitLowField : tpcTrackletsPerStartHit)); }
inline size_t NTPCTrackletHits(size_t tpcHits, bool lowField) { return getValue(tpcMaxTrackletHits, hitOffset + tpcHits * (lowField ? tpcTrackletHitsPerHitLowField : tpcTrackletHitsPerHit)); }
inline size_t NTPCSectorTracks(size_t tpcHits) { return getValue(tpcMaxSectorTracks, tpcHits * tpcSectorTracksPerHit); }
inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); }
inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); }
inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); }
inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min<size_t>(1024, tpcCompressedUnattachedHitsBase1024[type] * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); }
size_t getValue(size_t maxVal, size_t val);
size_t NTPCPeaks(size_t tpcDigits, bool perSector = false);
size_t NTPCClusters(size_t tpcDigits, bool perSector = false);
size_t NTPCStartHits(size_t tpcHits);
size_t NTPCRowStartHits(size_t tpcHits);
size_t NTPCTracklets(size_t tpcHits, bool lowField);
size_t NTPCTrackletHits(size_t tpcHits, bool lowField);
size_t NTPCSectorTracks(size_t tpcHits);
size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0);
size_t NTPCMergedTracks(size_t tpcSectorTracks);
size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss);
size_t NTPCUnattachedHitsBase1024(int32_t type);
};

} // namespace o2::gpu
Expand Down
23 changes: 22 additions & 1 deletion GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,27 @@ AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes betwe
AddHelp("help", 'h')
EndConfig()

// Settings steering the processing of NN Clusterization
BeginSubConfig(GPUSettingsProcessingScaling, scaling, configStandalone.proc, "SCALING", 0, "Processing settings for neural network clusterizer", proc_scaling)
AddOption(offset, float, 1000., "", 0, "Scaling Factor: offset")
AddOption(hitOffset, float, 20000, "", 0, "Scaling Factor: hitOffset")
AddOption(tpcPeaksPerDigit, float, 0.2, "", 0, "Scaling Factor: tpcPeaksPerDigit")
AddOption(tpcClustersPerPeak, float, 0.9, "", 0, "Scaling Factor: tpcClustersPerPeak")
AddOption(tpcStartHitsPerHit, float, 0.08, "", 0, "Scaling Factor: tpcStartHitsPerHit")
AddOption(tpcTrackletsPerStartHit, float, 0.8, "", 0, "Scaling Factor: tpcTrackletsPerStartHit")
AddOption(tpcTrackletsPerStartHitLowField, float, 0.85, "", 0, "Scaling Factor: tpcTrackletsPerStartHitLowField")
AddOption(tpcTrackletHitsPerHit, float, 5, "", 0, "Scaling Factor: tpcTrackletHitsPerHit")
AddOption(tpcTrackletHitsPerHitLowField, float, 7, "", 0, "Scaling Factor: tpcTrackletHitsPerHitLowField")
AddOption(tpcSectorTracksPerHit, float, 0.02, "", 0, "Scaling Factor: tpcSectorTracksPerHit")
AddOption(tpcSectorTrackHitsPerHit, float, 0.8, "", 0, "Scaling Factor: tpcSectorTrackHitsPerHit")
AddOption(tpcSectorTrackHitsPerHitWithRejection, float, 1.0, "", 0, "Scaling Factor: tpcSectorTrackHitsPerHitWithRejection")
AddOption(tpcMergedTrackPerSectorTrack, float, 1.0, "", 0, "Scaling Factor: tpcMergedTrackPerSectorTrack")
AddOption(tpcMergedTrackHitPerSectorHit, float, 1.1, "", 0, "Scaling Factor: tpcMergedTrackHitPerSectorHit")
AddOptionArray(tpcCompressedUnattachedHitsBase1024, int32_t, 3, (900, 900, 500), "", 0, "Scaling Factor: tpcCompressedUnattachedHitsBase1024")
AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing")
AddHelp("help", 'h')
EndConfig()

// Settings steering the processing once the device was selected, only available on the host
BeginSubConfig(GPUSettingsProcessing, proc, configStandalone, "PROC", 0, "Processing settings", proc)
AddOption(deviceNum, int32_t, -1, "gpuDevice", 0, "Set GPU device to use (-1: automatic, -2: for round-robin usage in timeslice-pipeline)")
Expand All @@ -323,7 +344,6 @@ AddOption(forceMemoryPoolSize, uint64_t, 1, "memSize", 0, "Force size of allocat
AddOption(forceHostMemoryPoolSize, uint64_t, 0, "hostMemSize", 0, "Force size of allocated host page locked host memory (overriding memSize)", min(0ul))
AddOption(memoryScalingFactor, float, 1.f, "", 0, "Factor to apply to all memory scalers")
AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1))
AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing")
AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations")
AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n")
AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0, "Automatically register input memory buffers for the GPU")
Expand Down Expand Up @@ -390,6 +410,7 @@ AddSubConfig(GPUSettingsProcessingRTC, rtc)
AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech)
AddSubConfig(GPUSettingsProcessingParam, param)
AddSubConfig(GPUSettingsProcessingNNclusterizer, nn)
AddSubConfig(GPUSettingsProcessingScaling, scaling)
AddHelp("help", 'h')
EndConfig()
#endif // __OPENCL__
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsProcessingRTC + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsProcessingRTCtechnical + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsProcessingNNclusterizer + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsProcessingScaling + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsDisplay + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsDisplayLight + ;
#pragma link C++ class o2::gpu::internal::GPUConfigurableParamGPUSettingsDisplayHeavy + ;
Expand Down
2 changes: 1 addition & 1 deletion prodtests/full-system-test/dpl-workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ if [[ $SYNCRAWMODE == 1 ]]; then
TOF_CONFIG+=" --for-calib"
fi
if [[ $SYNCRAWMODE == 1 ]] || [[ $SYNCMODE == 0 && $CTFINPUT == 1 && $GPUTYPE != "CPU" ]]; then
GPU_CONFIG_KEY+="GPU_proc.conservativeMemoryEstimate=1;"
GPU_CONFIG_KEY+="GPU_proc_scaling.conservativeMemoryEstimate=1;"
fi

if [[ $SYNCMODE == 1 && "0${ED_NO_ITS_ROF_FILTER:-}" != "01" && $BEAMTYPE == "PbPb" ]] && has_detector ITS; then
Expand Down
Loading