From 45efcaaebac45d5c1669407d3d784a0de80b9feb Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 13 Mar 2026 10:05:02 +0100 Subject: [PATCH 1/5] initialize arrow compute --- .../include/Framework/IndexBuilderHelpers.h | 2 +- Framework/Core/src/IndexBuilderHelpers.cxx | 68 ++++++++++++------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/Framework/Core/include/Framework/IndexBuilderHelpers.h b/Framework/Core/include/Framework/IndexBuilderHelpers.h index 30754e62a8dc3..3a23d97a10d83 100644 --- a/Framework/Core/include/Framework/IndexBuilderHelpers.h +++ b/Framework/Core/include/Framework/IndexBuilderHelpers.h @@ -29,7 +29,7 @@ enum struct IndexKind : int { namespace o2::framework { -void cannotBuildAnArray(); +void cannotBuildAnArray(const char* reason); void cannotCreateIndexBuilder(); struct ChunkedArrayIterator { diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index 0943dea42169c..b70e4d01185e7 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -14,6 +14,7 @@ #include "Framework/CompilerBuiltins.h" #include "Framework/VariantHelpers.h" #include +#include #include #include #include @@ -21,9 +22,9 @@ namespace o2::framework { -void cannotBuildAnArray() +void cannotBuildAnArray(const char* reason) { - throw framework::runtime_error("Cannot finish an array"); + throw framework::runtime_error_f("Cannot finish an array: %s", reason); } void cannotCreateIndexBuilder() @@ -62,10 +63,10 @@ SelfBuilder::SelfBuilder(arrow::MemoryPool* pool) { auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); if (!status.ok()) { - throw framework::runtime_error("Cannot create array builder for the self-index!"); + throw framework::runtime_error_f("Cannot create array builder for the self-index: %s", status.ToString().c_str()); } } -// static_cast(this)->reset(pool); + void SelfBuilder::reset(std::shared_ptr) { mBuilder->Reset(); @@ -74,7 +75,10 @@ void SelfBuilder::reset(std::shared_ptr) void SelfBuilder::fill(int idx) { - (void)static_cast(mBuilder.get())->Append(idx); + auto status = static_cast(mBuilder.get())->Append(idx); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot append to self-index array: %s", status.ToString().c_str()); + } } std::shared_ptr SelfBuilder::result() const @@ -82,7 +86,7 @@ std::shared_ptr SelfBuilder::result() const std::shared_ptr array; auto status = static_cast(mBuilder.get())->Finish(&array); if (!status.ok()) { - cannotBuildAnArray(); + cannotBuildAnArray(status.ToString().c_str()); } return std::make_shared(array); @@ -93,7 +97,7 @@ SingleBuilder::SingleBuilder(std::shared_ptr source, arrow: { auto status = arrow::MakeBuilder(pool, arrow::int32(), &mBuilder); if (!status.ok()) { - throw framework::runtime_error("Cannot create array builder for the single-valued index!"); + throw framework::runtime_error_f("Cannot create array builder for the single-valued index: %s", status.ToString().c_str()); } } @@ -126,10 +130,14 @@ bool SingleBuilder::find(int idx) void SingleBuilder::fill(int idx) { + arrow::Status status; if (mPosition < mSourceSize && valueAt(mPosition) == idx) { - (void)static_cast(mBuilder.get())->Append((int)mPosition); + status = static_cast(mBuilder.get())->Append((int)mPosition); } else { - (void)static_cast(mBuilder.get())->Append(-1); + status = static_cast(mBuilder.get())->Append(-1); + } + if (!status.ok()) { + throw framework::runtime_error_f("Cannot append to array: %s", status.ToString().c_str()); } } @@ -138,7 +146,7 @@ std::shared_ptr SingleBuilder::result() const std::shared_ptr array; auto status = static_cast(mBuilder.get())->Finish(&array); if (!status.ok()) { - cannotBuildAnArray(); + cannotBuildAnArray(status.ToString().c_str()); } return std::make_shared(array); } @@ -146,14 +154,15 @@ std::shared_ptr SingleBuilder::result() const SliceBuilder::SliceBuilder(std::shared_ptr source, arrow::MemoryPool* pool) : ChunkedArrayIterator{source} { - if (!preSlice().ok()) { - throw framework::runtime_error("Cannot pre-slice the source for slice-index building"); + auto status = preSlice(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot pre-slice the source for slice-index building: %s", status.ToString().c_str()); } std::unique_ptr builder; - auto status = arrow::MakeBuilder(pool, arrow::int32(), &builder); + status = arrow::MakeBuilder(pool, arrow::int32(), &builder); if (!status.ok()) { - throw framework::runtime_error("Cannot create array for the slice-index builder!"); + throw framework::runtime_error_f("Cannot create array for the slice-index builder: %s", status.ToString().c_str()); } mListBuilder = std::make_unique(pool, std::move(builder), 2); mValueBuilder = static_cast(mListBuilder.get())->value_builder(); @@ -166,8 +175,9 @@ void SliceBuilder::reset(std::shared_ptr source) mListBuilder->Reset(); mValuePos = 0; static_cast(this)->reset(source); - if (!preSlice().ok()) { - throw framework::runtime_error("Cannot pre-slice the source for slice-index building"); + auto status = preSlice(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot pre-slice the source for slice-index building: %s", status.ToString().c_str()); } } @@ -211,13 +221,17 @@ std::shared_ptr SliceBuilder::result() const std::shared_ptr array; auto status = static_cast(mListBuilder.get())->Finish(&array); if (!status.ok()) { - cannotBuildAnArray(); + cannotBuildAnArray(status.ToString().c_str()); } return std::make_shared(array); } arrow::Status SliceBuilder::SliceBuilder::preSlice() { + auto status = arrow::compute::Initialize(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot initialize arrow compute: %s", status.ToString().c_str()); + } arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); @@ -230,14 +244,15 @@ arrow::Status SliceBuilder::SliceBuilder::preSlice() ArrayBuilder::ArrayBuilder(std::shared_ptr source, arrow::MemoryPool* pool) : ChunkedArrayIterator{source} { - if (!preFind().ok()) { - throw framework::runtime_error("Cannot pre-find in a source for array-index building"); + auto&& status = preFind(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot pre-find in a source for array-index building: %s", status.ToString().c_str()); } std::unique_ptr builder; - auto status = arrow::MakeBuilder(pool, arrow::int32(), &builder); + status = arrow::MakeBuilder(pool, arrow::int32(), &builder); if (!status.ok()) { - throw framework::runtime_error("Cannot create array for the array-index builder!"); + throw framework::runtime_error_f("Cannot create array for the array-index builder: %s", status.ToString().c_str()); } mListBuilder = std::make_unique(pool, std::move(builder)); mValueBuilder = static_cast(mListBuilder.get())->value_builder(); @@ -246,8 +261,9 @@ ArrayBuilder::ArrayBuilder(std::shared_ptr source, arrow::M void ArrayBuilder::reset(std::shared_ptr source) { static_cast(this)->reset(source); - if (!preFind().ok()) { - throw framework::runtime_error("Cannot pre-find in a source for array-index building"); + auto status = preFind(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot pre-find in a source for array-index building: %s", status.ToString().c_str()); } mValues.clear(); mIndices.clear(); @@ -274,13 +290,17 @@ std::shared_ptr ArrayBuilder::result() const std::shared_ptr array; auto status = static_cast(mListBuilder.get())->Finish(&array); if (!status.ok()) { - cannotBuildAnArray(); + cannotBuildAnArray(status.ToString().c_str()); } return std::make_shared(array); } arrow::Status ArrayBuilder::preFind() { + auto status = arrow::compute::Initialize(); + if (!status.ok()) { + throw framework::runtime_error_f("Cannot initialize arrow compute: %s", status.ToString().c_str()); + } arrow::Datum max; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(max, arrow::compute::CallFunction("max", {mSource}, &options)); From 2a3cf33f87f933143224c95a16d041f7217b7e22 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 13 Mar 2026 10:12:11 +0100 Subject: [PATCH 2/5] add compute target --- dependencies/O2Dependencies.cmake | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/dependencies/O2Dependencies.cmake b/dependencies/O2Dependencies.cmake index 8addb87a1a16f..60800cb140c23 100644 --- a/dependencies/O2Dependencies.cmake +++ b/dependencies/O2Dependencies.cmake @@ -92,7 +92,34 @@ if(NOT TARGET ArrowAcero::arrow_acero_shared) ) endif() -if (NOT TARGET Gandiva::gandiva_shared) +if(NOT TARGET ArrowCompute::arrow_compute_shared) + # ArrowCompute::arrow_compute_shared is linked for no reason to parquet + # so we cannot use it because we do not want to build parquet itself. + # For that reason at the moment we need to do the lookup by hand. + get_target_property(ARROW_SHARED_LOCATION Arrow::arrow_shared LOCATION) + get_filename_component(ARROW_SHARED_DIR ${ARROW_SHARED_LOCATION} DIRECTORY) + + find_library(ARROW_COMPUTE_SHARED arrow_compute + PATHS ${ARROW_SHARED_DIR} + NO_DEFAULT_PATH + ) + + if(ARROW_COMPUTE_SHARED) + message(STATUS + "Found arrow_compute_shared library at: ${ARROW_COMPUTE_SHARED}") + else() + message(FATAL_ERROR + "arrow_compute_shared library not found in ${ARROW_SHARED_DIR}") + endif() + + # Step 3: Create a target for ArrowCompute::arrow_compute_shared + add_library(ArrowCompute::arrow_compute_shared SHARED IMPORTED) + set_target_properties(ArrowCompute::arrow_compute_shared PROPERTIES + IMPORTED_LOCATION ${ARROW_COMPUTE_SHARED} + ) +endif() + +if(NOT TARGET Gandiva::gandiva_shared) add_library(Gandiva::gandiva_shared ALIAS gandiva_shared) endif() From f1834b2441008715a5bf30ba56b513449e58f59a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 13 Mar 2026 10:13:02 +0100 Subject: [PATCH 3/5] add compute dependency to framework lib --- Framework/Core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index e6a8db1077136..3d98d789f18f7 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -178,6 +178,7 @@ o2_add_library(Framework RapidJSON::RapidJSON Arrow::arrow_shared ArrowDataset::arrow_dataset_shared + ArrowCompute::arrow_compute_shared Microsoft.GSL::GSL O2::FrameworkLogger Gandiva::gandiva_shared From b57ebf1b212975976dd1a557bb83d1844133aec7 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 13 Mar 2026 10:21:33 +0100 Subject: [PATCH 4/5] use initialize for arrow > 20 --- Framework/Core/src/IndexBuilderHelpers.cxx | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index b70e4d01185e7..0c06aea11ef2c 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -13,9 +13,12 @@ #include "Framework/IndexBuilderHelpers.h" #include "Framework/CompilerBuiltins.h" #include "Framework/VariantHelpers.h" -#include +#include +#if (ARROW_VERSION_MAJOR > 20) #include +#endif #include +#include #include #include #include @@ -228,10 +231,14 @@ std::shared_ptr SliceBuilder::result() const arrow::Status SliceBuilder::SliceBuilder::preSlice() { +#if (ARROW_VERSION_MAJOR > 20) auto status = arrow::compute::Initialize(); if (!status.ok()) { throw framework::runtime_error_f("Cannot initialize arrow compute: %s", status.ToString().c_str()); } +#else + arrow::Status status; +#endif arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, arrow::compute::CallFunction("value_counts", {mSource}, &options)); @@ -297,10 +304,14 @@ std::shared_ptr ArrayBuilder::result() const arrow::Status ArrayBuilder::preFind() { +#if (ARROW_VERSION_MAJOR > 20) auto status = arrow::compute::Initialize(); if (!status.ok()) { throw framework::runtime_error_f("Cannot initialize arrow compute: %s", status.ToString().c_str()); } +#else + arrow::Status status; +#endif arrow::Datum max; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(max, arrow::compute::CallFunction("max", {mSource}, &options)); From f6fd1b25420c50e31a760f5a6cb8804db692e2f4 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 13 Mar 2026 11:18:43 +0100 Subject: [PATCH 5/5] fix for missing target in arrow 20 --- Framework/Core/CMakeLists.txt | 3 +- dependencies/O2Dependencies.cmake | 49 ++++++++++++++++--------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 3d98d789f18f7..c311ba980a20b 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -8,7 +8,6 @@ # In applying this license CERN does not waive the privileges and immunities # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. - o2_add_library(Framework SOURCES src/AnalysisHelpers.cxx src/AlgorithmSpec.cxx @@ -178,7 +177,7 @@ o2_add_library(Framework RapidJSON::RapidJSON Arrow::arrow_shared ArrowDataset::arrow_dataset_shared - ArrowCompute::arrow_compute_shared + $<$:ArrowCompute::arrow_compute_shared> Microsoft.GSL::GSL O2::FrameworkLogger Gandiva::gandiva_shared diff --git a/dependencies/O2Dependencies.cmake b/dependencies/O2Dependencies.cmake index 60800cb140c23..71e9d9907ac28 100644 --- a/dependencies/O2Dependencies.cmake +++ b/dependencies/O2Dependencies.cmake @@ -92,31 +92,34 @@ if(NOT TARGET ArrowAcero::arrow_acero_shared) ) endif() -if(NOT TARGET ArrowCompute::arrow_compute_shared) - # ArrowCompute::arrow_compute_shared is linked for no reason to parquet - # so we cannot use it because we do not want to build parquet itself. - # For that reason at the moment we need to do the lookup by hand. - get_target_property(ARROW_SHARED_LOCATION Arrow::arrow_shared LOCATION) - get_filename_component(ARROW_SHARED_DIR ${ARROW_SHARED_LOCATION} DIRECTORY) - - find_library(ARROW_COMPUTE_SHARED arrow_compute - PATHS ${ARROW_SHARED_DIR} - NO_DEFAULT_PATH - ) +string(REGEX MATCH "([0-9]+)\.*" ARROW_MAJOR "${ARROW_VERSION}") +if(${ARROW_MAJOR} GREATER 20) + if(NOT TARGET ArrowCompute::arrow_compute_shared) + # ArrowCompute::arrow_compute_shared is linked for no reason to parquet + # so we cannot use it because we do not want to build parquet itself. + # For that reason at the moment we need to do the lookup by hand. + get_target_property(ARROW_SHARED_LOCATION Arrow::arrow_shared LOCATION) + get_filename_component(ARROW_SHARED_DIR ${ARROW_SHARED_LOCATION} DIRECTORY) + + find_library(ARROW_COMPUTE_SHARED arrow_compute + PATHS ${ARROW_SHARED_DIR} + NO_DEFAULT_PATH + ) - if(ARROW_COMPUTE_SHARED) - message(STATUS - "Found arrow_compute_shared library at: ${ARROW_COMPUTE_SHARED}") - else() - message(FATAL_ERROR - "arrow_compute_shared library not found in ${ARROW_SHARED_DIR}") + if(ARROW_COMPUTE_SHARED) + message(STATUS + "Found arrow_compute_shared library at: ${ARROW_COMPUTE_SHARED}") + else() + message(FATAL_ERROR + "arrow_compute_shared library not found in ${ARROW_SHARED_DIR}") + endif() + + # Step 3: Create a target for ArrowCompute::arrow_compute_shared + add_library(ArrowCompute::arrow_compute_shared SHARED IMPORTED) + set_target_properties(ArrowCompute::arrow_compute_shared PROPERTIES + IMPORTED_LOCATION ${ARROW_COMPUTE_SHARED} + ) endif() - - # Step 3: Create a target for ArrowCompute::arrow_compute_shared - add_library(ArrowCompute::arrow_compute_shared SHARED IMPORTED) - set_target_properties(ArrowCompute::arrow_compute_shared PROPERTIES - IMPORTED_LOCATION ${ARROW_COMPUTE_SHARED} - ) endif() if(NOT TARGET Gandiva::gandiva_shared)