Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/Common/ProfileEvents.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,11 @@
M(IcebergTrivialCountOptimizationApplied, "Trivial count optimization applied while reading from Iceberg", ValueType::Number) \
M(IcebergVersionHintUsed, "Number of times version-hint.text has been used.", ValueType::Number) \
M(IcebergMinMaxIndexPrunedFiles, "Number of skipped files by using MinMax index in Iceberg", ValueType::Number) \
M(IcebergAvroFileParsing, "Number of times avro metadata files have been parsed.", ValueType::Number) \
M(IcebergAvroFileParsingMicroseconds, "Time spent for parsing avro metadata files for Iceberg tables.", ValueType::Microseconds) \
M(IcebergJsonFileParsing, "Number of times json metadata files have been parsed.", ValueType::Number) \
M(IcebergJsonFileParsingMicroseconds, "Time spent for parsing json metadata files for Iceberg tables.", ValueType::Microseconds) \
\
M(JoinBuildTableRowCount, "Total number of rows in the build table for a JOIN operation.", ValueType::Number) \
M(JoinProbeTableRowCount, "Total number of rows in the probe table for a JOIN operation.", ValueType::Number) \
M(JoinResultRowCount, "Total number of rows in the result of a JOIN operation.", ValueType::Number) \
Expand Down Expand Up @@ -580,7 +585,9 @@ The server successfully detected this situation and will download merged part fr
M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.", ValueType::Number) \
M(S3CopyObject, "Number of S3 API CopyObject calls.", ValueType::Number) \
M(S3ListObjects, "Number of S3 API ListObjects calls.", ValueType::Number) \
M(S3ListObjectsMicroseconds, "Time of S3 API ListObjects execution.", ValueType::Microseconds) \
M(S3HeadObject, "Number of S3 API HeadObject calls.", ValueType::Number) \
M(S3HeadObjectMicroseconds, "Time of S3 API HeadObject execution.", ValueType::Microseconds) \
M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.", ValueType::Number) \
M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.", ValueType::Number) \
M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.", ValueType::Number) \
Expand Down Expand Up @@ -634,6 +641,7 @@ The server successfully detected this situation and will download merged part fr
M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls", ValueType::Number) \
M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.", ValueType::Number) \
M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.", ValueType::Number) \
M(AzureListObjectsMicroseconds, "Time of Azure blob storage API ListObjects execution.", ValueType::Microseconds) \
M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.", ValueType::Number) \
M(AzureCreateContainer, "Number of Azure blob storage API CreateContainer calls.", ValueType::Number) \
\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageCommon.h>
#include <Disks/ObjectStorages/ObjectStorageIteratorAsync.h>
#include <Interpreters/Context.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>


namespace CurrentMetrics
Expand All @@ -25,6 +26,7 @@ namespace CurrentMetrics
namespace ProfileEvents
{
extern const Event AzureListObjects;
extern const Event AzureListObjectsMicroseconds;
extern const Event DiskAzureListObjects;
extern const Event AzureDeleteObjects;
extern const Event DiskAzureDeleteObjects;
Expand Down Expand Up @@ -76,6 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync
ProfileEvents::increment(ProfileEvents::AzureListObjects);
if (client->IsClientForDisk())
ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AzureListObjectsMicroseconds);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps putting it in a scope will be more accurate and safer (e.g, future changes to this method that introduce slow operations could lead to wrong values)?

Example:

ListBlobsPagedResponse blob_list_response;

{
    ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AzureListObjectsMicroseconds);
    blob_list_response = client->ListBlobs(options);
}

The same comment applies to the other list object operations.

In any case, I see that it is already implemented without this "protection". So it is not a must to implement this 👍

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, I want to get time for S3/Azure communication, not for parsing for response.


chassert(batch.empty());
auto blob_list_response = client->ListBlobs(options);
Expand Down Expand Up @@ -183,7 +186,11 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
if (client_ptr->IsClientForDisk())
ProfileEvents::increment(ProfileEvents::DiskAzureListObjects);

blob_list_response = client_ptr->ListBlobs(options);
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::AzureListObjectsMicroseconds);
blob_list_response = client_ptr->ListBlobs(options);
}

const auto & blobs_list = blob_list_response.Blobs;

for (const auto & blob : blobs_list)
Expand Down
15 changes: 13 additions & 2 deletions src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@
#include <Common/logger_useful.h>
#include <Common/MultiVersion.h>
#include <Common/Macros.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>


namespace ProfileEvents
{
extern const Event S3ListObjects;
extern const Event S3ListObjectsMicroseconds;
extern const Event DiskS3DeleteObjects;
extern const Event DiskS3ListObjects;
}
Expand Down Expand Up @@ -137,7 +139,12 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync
ProfileEvents::increment(ProfileEvents::S3ListObjects);
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);

auto outcome = client->ListObjectsV2(*request);
Aws::S3::Model::ListObjectsV2Outcome outcome;

{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3ListObjectsMicroseconds);
outcome = client->ListObjectsV2(*request);
}

/// Outcome failure will be handled on the caller side.
if (outcome.IsSuccess())
Expand Down Expand Up @@ -264,7 +271,11 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
ProfileEvents::increment(ProfileEvents::S3ListObjects);
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);

outcome = client.get()->ListObjectsV2(request);
{
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3ListObjectsMicroseconds);
outcome = client.get()->ListObjectsV2(request);
}

throwIfError(outcome);

auto result = outcome.GetResult();
Expand Down
3 changes: 3 additions & 0 deletions src/IO/S3/getObjectInfo.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <optional>
#include <IO/S3/getObjectInfo.h>
#include <IO/Expect404ResponseScope.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>

#if USE_AWS_S3

Expand All @@ -15,6 +16,7 @@ namespace ProfileEvents
extern const Event S3GetObject;
extern const Event S3GetObjectAttributes;
extern const Event S3HeadObject;
extern const Event S3HeadObjectMicroseconds;
extern const Event DiskS3GetObject;
extern const Event DiskS3GetObjectAttributes;
extern const Event DiskS3HeadObject;
Expand All @@ -32,6 +34,7 @@ namespace
ProfileEvents::increment(ProfileEvents::S3HeadObject);
if (client.isClientForDisk())
ProfileEvents::increment(ProfileEvents::DiskS3HeadObject);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::S3HeadObjectMicroseconds);

S3::HeadObjectRequest req;
req.SetBucket(bucket);
Expand Down
10 changes: 0 additions & 10 deletions src/IO/S3Common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@
#include <IO/S3/Requests.h>


namespace ProfileEvents
{
extern const Event S3GetObjectAttributes;
extern const Event S3GetObjectMetadata;
extern const Event S3HeadObject;
extern const Event DiskS3GetObjectAttributes;
extern const Event DiskS3GetObjectMetadata;
extern const Event DiskS3HeadObject;
}

namespace DB
{

Expand Down
5 changes: 3 additions & 2 deletions src/Interpreters/IcebergMetadataLog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <Processors/QueryPlan/ReadFromSystemNumbersStep.h>
#include <Storages/ObjectStorage/DataLakes/DataLakeConfiguration.h>
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.h>
#include <Storages/ObjectStorage/DataLakes/Iceberg/IcebergWrites.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <base/Decimal.h>
Expand Down Expand Up @@ -83,7 +84,7 @@ void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const

void insertRowToLogTable(
const ContextPtr & local_context,
String row,
Copy link
Member

@Enmk Enmk Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That needs a comment that explains WHY we need NOT a value, but a function here... do I get it right that major reason is not to skew the the time measurements by getting metadata content?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make row string only when actually required. Serialization from JSON structure to string can take a lot of time, in my test select count() from iceberg.table reduced from 3.5 seconds to 0.2 only with this optimization. Several thousands parquet files, metadata.json near 5Mb.

std::function<String()> get_row,
IcebergMetadataLogLevel row_log_level,
const String & table_path,
const String & file_path,
Expand All @@ -103,7 +104,7 @@ void insertRowToLogTable(
.content_type = row_log_level,
.table_path = table_path,
.file_path = file_path,
.metadata_content = row,
.metadata_content = get_row(),
.row_in_file = row_in_file});
}
}
5 changes: 4 additions & 1 deletion src/Interpreters/IcebergMetadataLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <Interpreters/SystemLog.h>
#include <Storages/ColumnsDescription.h>
#include <Poco/JSON/Object.h>

namespace DB
{
Expand All @@ -23,9 +24,11 @@ struct IcebergMetadataLogElement
void appendToBlock(MutableColumns & columns) const;
};

/// Here `get_row` function is used instead `row` string to calculate string only when required.
/// Inside `insertRowToLogTable` code can exit immediately after `iceberg_metadata_log_level` setting check.
void insertRowToLogTable(
const ContextPtr & local_context,
String row,
std::function<String()> get_row,
IcebergMetadataLogLevel row_log_level,
const String & table_path,
const String & file_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,20 @@
#include <Common/assert_cast.h>
#include <base/find_symbols.h>
#include <Processors/Formats/Impl/JSONObjectEachRowRowOutputFormat.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>

namespace DB::ErrorCodes
{
extern const int ICEBERG_SPECIFICATION_VIOLATION;
extern const int INCORRECT_DATA;
}

namespace ProfileEvents
{
extern const Event IcebergAvroFileParsing;
extern const Event IcebergAvroFileParsingMicroseconds;
}

namespace DB::Iceberg
{

Expand All @@ -30,6 +37,9 @@ try
: buffer(std::move(buffer_))
, manifest_file_path(manifest_file_path_)
{
ProfileEvents::increment(ProfileEvents::IcebergAvroFileParsing);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::IcebergAvroFileParsingMicroseconds);

auto manifest_file_reader
= std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*buffer));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,10 @@ bool IcebergMetadata::update(const ContextPtr & local_context)

updateState(local_context, metadata_object);

auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(metadata_object); };
insertRowToLogTable(
local_context,
dumpMetadataObjectToString(metadata_object),
dump_metadata,
DB::IcebergMetadataLogLevel::Metadata,
configuration_ptr->getRawPath().path,
metadata_file_path,
Expand Down Expand Up @@ -773,9 +774,10 @@ DataLakeMetadataPtr IcebergMetadata::create(

auto format_version = object->getValue<int>(f_format_version);

auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(object); };
insertRowToLogTable(
local_context,
dumpMetadataObjectToString(object),
dump_metadata,
DB::IcebergMetadataLogLevel::Metadata,
configuration_ptr->getRawPath().path,
metadata_file_path,
Expand Down
6 changes: 4 additions & 2 deletions src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,10 @@ ManifestFileContent::ManifestFileContent(
const String & path_to_manifest_file_)
: path_to_manifest_file(path_to_manifest_file_)
{
auto dump_metadata = [&]()->String { return manifest_file_deserializer.getMetadataContent(); };
insertRowToLogTable(
context,
manifest_file_deserializer.getMetadataContent(),
dump_metadata,
DB::IcebergMetadataLogLevel::ManifestFileMetadata,
common_path,
path_to_manifest_file,
Expand Down Expand Up @@ -230,9 +231,10 @@ ManifestFileContent::ManifestFileContent(

for (size_t i = 0; i < manifest_file_deserializer.rows(); ++i)
{
auto dump_row_metadata = [&]()->String { return manifest_file_deserializer.getContent(i); };
insertRowToLogTable(
context,
manifest_file_deserializer.getContent(i),
dump_row_metadata,
DB::IcebergMetadataLogLevel::ManifestFileEntry,
common_path,
path_to_manifest_file,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,10 @@ ManifestFileCacheKeys getManifestList(

ManifestFileCacheKeys manifest_file_cache_keys;

auto dump_metadata = [&]()->String { return manifest_list_deserializer.getMetadataContent(); };
insertRowToLogTable(
local_context,
manifest_list_deserializer.getMetadataContent(),
dump_metadata,
DB::IcebergMetadataLogLevel::ManifestListMetadata,
configuration_ptr->getRawPath().path,
filename,
Expand Down Expand Up @@ -185,9 +186,10 @@ ManifestFileCacheKeys getManifestList(
manifest_file_cache_keys.emplace_back(
manifest_file_name, added_sequence_number, added_snapshot_id.safeGet<Int64>(), content_type);

auto dump_row_metadata = [&]()->String { return manifest_list_deserializer.getContent(i); };
insertRowToLogTable(
local_context,
manifest_list_deserializer.getContent(i),
dump_row_metadata,
DB::IcebergMetadataLogLevel::ManifestListEntry,
configuration_ptr->getRawPath().path,
filename,
Expand Down
7 changes: 7 additions & 0 deletions src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
#include <Storages/ObjectStorage/Utils.h>

#include <Common/ElapsedTimeProfileEventIncrement.h>

using namespace DB;


Expand All @@ -65,6 +67,8 @@ namespace DB::DataLakeStorageSetting
namespace ProfileEvents
{
extern const Event IcebergVersionHintUsed;
extern const Event IcebergJsonFileParsing;
extern const Event IcebergJsonFileParsingMicroseconds;
}

namespace DB::Setting
Expand Down Expand Up @@ -310,6 +314,9 @@ Poco::JSON::Object::Ptr getMetadataJSONObject(
return json_str;
};

ProfileEvents::increment(ProfileEvents::IcebergJsonFileParsing);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::IcebergJsonFileParsingMicroseconds);

String metadata_json_str;
if (cache_ptr)
metadata_json_str = cache_ptr->getOrSetTableMetadata(IcebergMetadataFilesCache::getKey(configuration_ptr, metadata_file_path), create_fn);
Expand Down
Loading