diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 610b732cafd2..119b25ba1d6f 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -299,6 +299,11 @@ M(IcebergTrivialCountOptimizationApplied, "Trivial count optimization applied while reading from Iceberg", ValueType::Number) \ M(IcebergVersionHintUsed, "Number of times version-hint.text has been used.", ValueType::Number) \ M(IcebergMinMaxIndexPrunedFiles, "Number of skipped files by using MinMax index in Iceberg", ValueType::Number) \ + M(IcebergAvroFileParsing, "Number of times avro metadata files have been parsed.", ValueType::Number) \ + M(IcebergAvroFileParsingMicroseconds, "Time spent for parsing avro metadata files for Iceberg tables.", ValueType::Microseconds) \ + M(IcebergJsonFileParsing, "Number of times json metadata files have been parsed.", ValueType::Number) \ + M(IcebergJsonFileParsingMicroseconds, "Time spent for parsing json metadata files for Iceberg tables.", ValueType::Microseconds) \ + \ M(JoinBuildTableRowCount, "Total number of rows in the build table for a JOIN operation.", ValueType::Number) \ M(JoinProbeTableRowCount, "Total number of rows in the probe table for a JOIN operation.", ValueType::Number) \ M(JoinResultRowCount, "Total number of rows in the result of a JOIN operation.", ValueType::Number) \ @@ -580,7 +585,9 @@ The server successfully detected this situation and will download merged part fr M(S3DeleteObjects, "Number of S3 API DeleteObject(s) calls.", ValueType::Number) \ M(S3CopyObject, "Number of S3 API CopyObject calls.", ValueType::Number) \ M(S3ListObjects, "Number of S3 API ListObjects calls.", ValueType::Number) \ + M(S3ListObjectsMicroseconds, "Time of S3 API ListObjects execution.", ValueType::Microseconds) \ M(S3HeadObject, "Number of S3 API HeadObject calls.", ValueType::Number) \ + M(S3HeadObjectMicroseconds, "Time of S3 API HeadObject execution.", ValueType::Microseconds) \ M(S3GetObjectAttributes, "Number of S3 API GetObjectAttributes calls.", ValueType::Number) \ M(S3CreateMultipartUpload, "Number of S3 API CreateMultipartUpload calls.", ValueType::Number) \ M(S3UploadPartCopy, "Number of S3 API UploadPartCopy calls.", ValueType::Number) \ @@ -634,6 +641,7 @@ The server successfully detected this situation and will download merged part fr M(AzureCopyObject, "Number of Azure blob storage API CopyObject calls", ValueType::Number) \ M(AzureDeleteObjects, "Number of Azure blob storage API DeleteObject(s) calls.", ValueType::Number) \ M(AzureListObjects, "Number of Azure blob storage API ListObjects calls.", ValueType::Number) \ + M(AzureListObjectsMicroseconds, "Time of Azure blob storage API ListObjects execution.", ValueType::Microseconds) \ M(AzureGetProperties, "Number of Azure blob storage API GetProperties calls.", ValueType::Number) \ M(AzureCreateContainer, "Number of Azure blob storage API CreateContainer calls.", ValueType::Number) \ \ diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 4fe9c5c43117..e5e139b04c5c 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -25,6 +26,7 @@ namespace CurrentMetrics namespace ProfileEvents { extern const Event AzureListObjects; + extern const Event AzureListObjectsMicroseconds; extern const Event DiskAzureListObjects; extern const Event AzureDeleteObjects; extern const Event DiskAzureDeleteObjects; @@ -76,6 +78,7 @@ class AzureIteratorAsync final : public IObjectStorageIteratorAsync ProfileEvents::increment(ProfileEvents::AzureListObjects); if (client->IsClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); + ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); chassert(batch.empty()); auto blob_list_response = client->ListBlobs(options); @@ -183,7 +186,11 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith if (client_ptr->IsClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskAzureListObjects); - blob_list_response = client_ptr->ListBlobs(options); + { + ProfileEventTimeIncrement watch(ProfileEvents::AzureListObjectsMicroseconds); + blob_list_response = client_ptr->ListBlobs(options); + } + const auto & blobs_list = blob_list_response.Blobs; for (const auto & blob : blobs_list) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 320173bd7a49..5ae8da17fa93 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -30,11 +30,13 @@ #include #include #include +#include namespace ProfileEvents { extern const Event S3ListObjects; + extern const Event S3ListObjectsMicroseconds; extern const Event DiskS3DeleteObjects; extern const Event DiskS3ListObjects; } @@ -137,7 +139,12 @@ class S3IteratorAsync final : public IObjectStorageIteratorAsync ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); - auto outcome = client->ListObjectsV2(*request); + Aws::S3::Model::ListObjectsV2Outcome outcome; + + { + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); + outcome = client->ListObjectsV2(*request); + } /// Outcome failure will be handled on the caller side. if (outcome.IsSuccess()) @@ -264,7 +271,11 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet ProfileEvents::increment(ProfileEvents::S3ListObjects); ProfileEvents::increment(ProfileEvents::DiskS3ListObjects); - outcome = client.get()->ListObjectsV2(request); + { + ProfileEventTimeIncrement watch(ProfileEvents::S3ListObjectsMicroseconds); + outcome = client.get()->ListObjectsV2(request); + } + throwIfError(outcome); auto result = outcome.GetResult(); diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp index b69f2a23a0dc..b85743e4c5fe 100644 --- a/src/IO/S3/getObjectInfo.cpp +++ b/src/IO/S3/getObjectInfo.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #if USE_AWS_S3 @@ -15,6 +16,7 @@ namespace ProfileEvents extern const Event S3GetObject; extern const Event S3GetObjectAttributes; extern const Event S3HeadObject; + extern const Event S3HeadObjectMicroseconds; extern const Event DiskS3GetObject; extern const Event DiskS3GetObjectAttributes; extern const Event DiskS3HeadObject; @@ -32,6 +34,7 @@ namespace ProfileEvents::increment(ProfileEvents::S3HeadObject); if (client.isClientForDisk()) ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); + ProfileEventTimeIncrement watch(ProfileEvents::S3HeadObjectMicroseconds); S3::HeadObjectRequest req; req.SetBucket(bucket); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index f12de6a7b546..cc321ba7af8c 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -19,16 +19,6 @@ #include -namespace ProfileEvents -{ - extern const Event S3GetObjectAttributes; - extern const Event S3GetObjectMetadata; - extern const Event S3HeadObject; - extern const Event DiskS3GetObjectAttributes; - extern const Event DiskS3GetObjectMetadata; - extern const Event DiskS3HeadObject; -} - namespace DB { diff --git a/src/Interpreters/IcebergMetadataLog.cpp b/src/Interpreters/IcebergMetadataLog.cpp index ab8266dfdc30..316665eb2fc5 100644 --- a/src/Interpreters/IcebergMetadataLog.cpp +++ b/src/Interpreters/IcebergMetadataLog.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,7 @@ void IcebergMetadataLogElement::appendToBlock(MutableColumns & columns) const void insertRowToLogTable( const ContextPtr & local_context, - String row, + std::function get_row, IcebergMetadataLogLevel row_log_level, const String & table_path, const String & file_path, @@ -103,7 +104,7 @@ void insertRowToLogTable( .content_type = row_log_level, .table_path = table_path, .file_path = file_path, - .metadata_content = row, + .metadata_content = get_row(), .row_in_file = row_in_file}); } } diff --git a/src/Interpreters/IcebergMetadataLog.h b/src/Interpreters/IcebergMetadataLog.h index d4c65c4d6346..042615a20a43 100644 --- a/src/Interpreters/IcebergMetadataLog.h +++ b/src/Interpreters/IcebergMetadataLog.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -23,9 +24,11 @@ struct IcebergMetadataLogElement void appendToBlock(MutableColumns & columns) const; }; +/// Here `get_row` function is used instead `row` string to calculate string only when required. +/// Inside `insertRowToLogTable` code can exit immediately after `iceberg_metadata_log_level` setting check. void insertRowToLogTable( const ContextPtr & local_context, - String row, + std::function get_row, IcebergMetadataLogLevel row_log_level, const String & table_path, const String & file_path, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp index 5057b5df9d2d..a574b06f2eeb 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/AvroForIcebergDeserializer.cpp @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB::ErrorCodes { @@ -19,6 +20,12 @@ namespace DB::ErrorCodes extern const int INCORRECT_DATA; } +namespace ProfileEvents +{ + extern const Event IcebergAvroFileParsing; + extern const Event IcebergAvroFileParsingMicroseconds; +} + namespace DB::Iceberg { @@ -30,6 +37,9 @@ try : buffer(std::move(buffer_)) , manifest_file_path(manifest_file_path_) { + ProfileEvents::increment(ProfileEvents::IcebergAvroFileParsing); + ProfileEventTimeIncrement watch(ProfileEvents::IcebergAvroFileParsingMicroseconds); + auto manifest_file_reader = std::make_unique(std::make_unique(*buffer)); diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp index 12fcb29b24ac..2fab3e3349f1 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/IcebergMetadata.cpp @@ -243,9 +243,10 @@ bool IcebergMetadata::update(const ContextPtr & local_context) updateState(local_context, metadata_object); + auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(metadata_object); }; insertRowToLogTable( local_context, - dumpMetadataObjectToString(metadata_object), + dump_metadata, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, @@ -773,9 +774,10 @@ DataLakeMetadataPtr IcebergMetadata::create( auto format_version = object->getValue(f_format_version); + auto dump_metadata = [&]()->String { return dumpMetadataObjectToString(object); }; insertRowToLogTable( local_context, - dumpMetadataObjectToString(object), + dump_metadata, DB::IcebergMetadataLogLevel::Metadata, configuration_ptr->getRawPath().path, metadata_file_path, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp index 1fdde4972ef0..f4260051ff8d 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/ManifestFile.cpp @@ -155,9 +155,10 @@ ManifestFileContent::ManifestFileContent( const String & path_to_manifest_file_) : path_to_manifest_file(path_to_manifest_file_) { + auto dump_metadata = [&]()->String { return manifest_file_deserializer.getMetadataContent(); }; insertRowToLogTable( context, - manifest_file_deserializer.getMetadataContent(), + dump_metadata, DB::IcebergMetadataLogLevel::ManifestFileMetadata, common_path, path_to_manifest_file, @@ -230,9 +231,10 @@ ManifestFileContent::ManifestFileContent( for (size_t i = 0; i < manifest_file_deserializer.rows(); ++i) { + auto dump_row_metadata = [&]()->String { return manifest_file_deserializer.getContent(i); }; insertRowToLogTable( context, - manifest_file_deserializer.getContent(i), + dump_row_metadata, DB::IcebergMetadataLogLevel::ManifestFileEntry, common_path, path_to_manifest_file, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp index 9372a13e2074..eb7123b0d8e1 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/StatelessMetadataFileGetter.cpp @@ -148,9 +148,10 @@ ManifestFileCacheKeys getManifestList( ManifestFileCacheKeys manifest_file_cache_keys; + auto dump_metadata = [&]()->String { return manifest_list_deserializer.getMetadataContent(); }; insertRowToLogTable( local_context, - manifest_list_deserializer.getMetadataContent(), + dump_metadata, DB::IcebergMetadataLogLevel::ManifestListMetadata, configuration_ptr->getRawPath().path, filename, @@ -185,9 +186,10 @@ ManifestFileCacheKeys getManifestList( manifest_file_cache_keys.emplace_back( manifest_file_name, added_sequence_number, added_snapshot_id.safeGet(), content_type); + auto dump_row_metadata = [&]()->String { return manifest_list_deserializer.getContent(i); }; insertRowToLogTable( local_context, - manifest_list_deserializer.getContent(i), + dump_row_metadata, DB::IcebergMetadataLogLevel::ManifestListEntry, configuration_ptr->getRawPath().path, filename, diff --git a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp index 260c50ea4080..644197a140fb 100644 --- a/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp +++ b/src/Storages/ObjectStorage/DataLakes/Iceberg/Utils.cpp @@ -40,6 +40,8 @@ #include #include +#include + using namespace DB; @@ -65,6 +67,8 @@ namespace DB::DataLakeStorageSetting namespace ProfileEvents { extern const Event IcebergVersionHintUsed; + extern const Event IcebergJsonFileParsing; + extern const Event IcebergJsonFileParsingMicroseconds; } namespace DB::Setting @@ -310,6 +314,9 @@ Poco::JSON::Object::Ptr getMetadataJSONObject( return json_str; }; + ProfileEvents::increment(ProfileEvents::IcebergJsonFileParsing); + ProfileEventTimeIncrement watch(ProfileEvents::IcebergJsonFileParsingMicroseconds); + String metadata_json_str; if (cache_ptr) metadata_json_str = cache_ptr->getOrSetTableMetadata(IcebergMetadataFilesCache::getKey(configuration_ptr, metadata_file_path), create_fn);