diff --git a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h index bd0a0b65f57f8..6c165c421b168 100644 --- a/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h +++ b/llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.h -----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -42,7 +42,7 @@ enum class ValidationResult { /// marking the files for garbage collection. /// \param ForceValidation Whether to force validation to occur even if it /// should not be necessary. -/// \param LLVMCasBinary If provided, validation is performed out-of-process +/// \param LLVMCasBinaryPath If provided, validation is performed out-of-process /// using the given \c llvm-cas executable which protects against crashes /// during validation. Otherwise validation is performed in-process. /// @@ -52,7 +52,7 @@ enum class ValidationResult { /// in an invalid state because \p AllowRecovery is false. Expected validateOnDiskUnifiedCASDatabasesIfNeeded( StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation, - std::optional LLVMCasBinary); + std::optional LLVMCasBinaryPath); } // namespace llvm::cas diff --git a/llvm/include/llvm/CAS/ObjectStore.h b/llvm/include/llvm/CAS/ObjectStore.h index f451e613dccdd..a08f3982d3a19 100644 --- a/llvm/include/llvm/CAS/ObjectStore.h +++ b/llvm/include/llvm/CAS/ObjectStore.h @@ -5,6 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the ObjectStore class. +/// +//===----------------------------------------------------------------------===// #ifndef LLVM_CAS_OBJECTSTORE_H #define LLVM_CAS_OBJECTSTORE_H @@ -246,7 +251,7 @@ class ObjectStore { /// Set the size for limiting growth of on-disk storage. This has an effect /// for when the instance is closed. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error setSizeLimit(std::optional SizeLimit) { return Error::success(); } @@ -262,7 +267,7 @@ class ObjectStore { /// Prune local storage to reduce its size according to the desired size /// limit. Pruning can happen concurrently with other operations. /// - /// Implementations may be not have this implemented. + /// Implementations may leave this unimplemented. virtual Error pruneStorageData() { return Error::success(); } /// Validate the whole node tree. @@ -291,13 +296,9 @@ class ObjectStore { /// Reference to an abstract hierarchical node, with data and references. /// Reference is passed by value and is expected to be valid as long as the \a /// ObjectStore is. -/// -/// TODO: Expose \a ObjectStore::readData() and only call \a -/// ObjectStore::getDataString() when asked. class ObjectProxy { public: - const ObjectStore &getCAS() const { return *CAS; } - ObjectStore &getCAS() { return *CAS; } + ObjectStore &getCAS() const { return *CAS; } CASID getID() const { return CAS->getID(Ref); } ObjectRef getRef() const { return Ref; } size_t getNumReferences() const { return CAS->getNumRefs(H); } @@ -352,12 +353,13 @@ class ObjectProxy { ObjectHandle H; }; +/// Create an in memory CAS. std::unique_ptr createInMemoryCAS(); /// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled. bool isOnDiskCASEnabled(); -/// Gets or creates a persistent on-disk path at \p Path. +/// Create a persistent on-disk path at \p Path. Expected> createOnDiskCAS(const Twine &Path); /// Set \p Path to a reasonable default on-disk path for a persistent CAS for diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h b/llvm/include/llvm/CAS/OnDiskGraphDB.h index 1dfb0d38daa27..2c3ee5a9b6ab3 100644 --- a/llvm/include/llvm/CAS/OnDiskGraphDB.h +++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h @@ -341,13 +341,16 @@ class OnDiskGraphDB { /// \param HashByteSize Size for the object digest hash bytes. /// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes /// if they don't exist in the primary store. The upstream store is only used - /// for reading nodes, new nodes are only written to the primary store. + /// for reading nodes, new nodes are only written to the primary store. User + /// need to make sure \p UpstreamDB outlives current instance of + /// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to + /// manage both. /// \param Policy If \p UpstreamDB is provided, controls how nodes are copied /// to primary store. This is recorded at creation time and subsequent opens /// need to pass the same policy otherwise the \p open will fail. static Expected> open(StringRef Path, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB = nullptr, + OnDiskGraphDB *UpstreamDB = nullptr, std::shared_ptr Logger = nullptr, FaultInPolicy Policy = FaultInPolicy::FullTree); @@ -440,9 +443,8 @@ class OnDiskGraphDB { // Private constructor. OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, - OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, FaultInPolicy Policy, - std::shared_ptr Logger); + OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB, + FaultInPolicy Policy, std::shared_ptr Logger); /// Mapping from hash to object reference. /// @@ -461,7 +463,7 @@ class OnDiskGraphDB { std::string RootPath; /// Optional on-disk store to be used for faulting-in nodes. - std::unique_ptr UpstreamDB; + OnDiskGraphDB* UpstreamDB = nullptr; /// The policy used to fault in data from upstream. FaultInPolicy FIPolicy; diff --git a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h index fb2aba4b1526e..0dfd0345cd142 100644 --- a/llvm/include/llvm/CAS/OnDiskKeyValueDB.h +++ b/llvm/include/llvm/CAS/OnDiskKeyValueDB.h @@ -19,6 +19,8 @@ namespace llvm::cas::ondisk { +class UnifiedOnDiskCache; + /// An on-disk key-value data store with the following properties: /// * Keys are fixed length binary hashes with expected normal distribution. /// * Values are buffers of the same size, specified at creation time. @@ -59,9 +61,13 @@ class OnDiskKeyValueDB { /// \param KeySize Size for the key hash bytes. /// \param ValueName Identifier name for the values. /// \param ValueSize Size for the value bytes. + /// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size + /// and lifetime of the CAS instance and it must owns current initializing + /// KeyValueDB after initialized. static Expected> open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *UnifiedCache = nullptr, std::shared_ptr Logger = nullptr); using CheckValueT = @@ -71,11 +77,14 @@ class OnDiskKeyValueDB { Error validate(CheckValueT CheckValue) const; private: - OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache) - : ValueSize(ValueSize), Cache(std::move(Cache)) {} + OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache, + UnifiedOnDiskCache *UnifiedCache) + : ValueSize(ValueSize), Cache(std::move(Cache)), + UnifiedCache(UnifiedCache) {} const size_t ValueSize; OnDiskTrieRawHashMap Cache; + UnifiedOnDiskCache *UnifiedCache = nullptr; }; } // namespace llvm::cas::ondisk diff --git a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h index f00ab92dd29c7..e34919a60c278 100644 --- a/llvm/include/llvm/CAS/UnifiedOnDiskCache.h +++ b/llvm/include/llvm/CAS/UnifiedOnDiskCache.h @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -33,7 +33,7 @@ class OnDiskKeyValueDB; /// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open /// for a limited period of time, e.g. for the duration of a build operation. /// For long-living processes that need periodic access to a -/// \p UnifiedOnDiskCache, the client should device a scheme where access is +/// \p UnifiedOnDiskCache, the client should devise a scheme where access is /// performed within some defined period. For example, if a service is designed /// to continuously wait for requests that access a \p UnifiedOnDiskCache, it /// could keep the instance alive while new requests are coming in but close it @@ -43,28 +43,8 @@ class UnifiedOnDiskCache { /// The \p OnDiskGraphDB instance for the open directory. OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; } - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// - /// \param Key the hash bytes for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ArrayRef Key, ObjectID Value); - - /// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key. - /// An \p ObjectID as a key is equivalent to its digest bytes. - /// - /// \param Key the \p ObjectID for the key. - /// \param Value the \p ObjectID value. - /// - /// \returns the \p ObjectID associated with the \p Key. It may be different - /// than \p Value if another value was already associated with this key. - Expected KVPut(ObjectID Key, ObjectID Value); - - /// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated - /// with the \p Key, or \p std::nullopt if the key does not exist. - Expected> KVGet(ArrayRef Key); + /// The \p OnDiskGraphDB instance for the open directory. + OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; } /// Open a \p UnifiedOnDiskCache instance for a directory. /// @@ -150,18 +130,23 @@ class UnifiedOnDiskCache { static Error collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger = nullptr); + /// Remove unused data from the current UnifiedOnDiskCache. Error collectGarbage(); - ~UnifiedOnDiskCache(); + /// Helper function to convert the value stored in KeyValueDB and ObjectID. + static ObjectID getObjectIDFromValue(ArrayRef Value); - Error validateActionCache(); + using ValueBytes = std::array; + static ValueBytes getValueFromObjectID(ObjectID ID); - OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; } + ~UnifiedOnDiskCache(); private: + friend class OnDiskGraphDB; + friend class OnDiskKeyValueDB; UnifiedOnDiskCache(); - Expected> + Expected>> faultInFromUpstreamKV(ArrayRef Key); /// \returns the storage size of the primary directory. @@ -175,7 +160,7 @@ class UnifiedOnDiskCache { std::atomic NeedsGarbageCollection; std::string PrimaryDBDir; - OnDiskGraphDB *UpstreamGraphDB = nullptr; + std::unique_ptr UpstreamGraphDB; std::unique_ptr PrimaryGraphDB; std::unique_ptr UpstreamKVDB; diff --git a/llvm/lib/CAS/ActionCaches.cpp b/llvm/lib/CAS/ActionCaches.cpp index 3097d811d6a59..5589ccc4f3a38 100644 --- a/llvm/lib/CAS/ActionCaches.cpp +++ b/llvm/lib/CAS/ActionCaches.cpp @@ -13,15 +13,13 @@ #include "BuiltinCAS.h" #include "llvm/ADT/TrieRawHashMap.h" #include "llvm/CAS/ActionCache.h" -#include "llvm/CAS/ObjectStore.h" #include "llvm/CAS/OnDiskCASLogger.h" -#include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Config/llvm-config.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" #define DEBUG_TYPE "cas-action-caches" @@ -67,6 +65,7 @@ class InMemoryActionCache final : public ActionCache { InMemoryCacheT Cache; }; +/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB. class OnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, @@ -87,6 +86,8 @@ class OnDiskActionCache final : public ActionCache { using DataT = CacheEntry; }; +/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide +/// access to its ActionCache. class UnifiedOnDiskActionCache final : public ActionCache { public: Error putImpl(ArrayRef ActionKey, const CASID &Result, @@ -118,7 +119,8 @@ static Error createResultCachePoisonedError(ArrayRef KeyHash, } Expected> -InMemoryActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { +InMemoryActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { auto Result = Cache.find(Key); if (!Result) return std::nullopt; @@ -169,9 +171,9 @@ OnDiskActionCache::create(StringRef AbsPath) { ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger)) return std::move(E); std::unique_ptr DB; - if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(), - sizeof(HashType), getHashName(), - sizeof(DataT), std::move(Logger)) + if (Error E = ondisk::OnDiskKeyValueDB::open( + AbsPath, getHashName(), sizeof(HashType), getHashName(), + sizeof(DataT), /*UnifiedCache=*/nullptr, std::move(Logger)) .moveInto(DB)) return std::move(E); return std::unique_ptr( @@ -179,7 +181,8 @@ OnDiskActionCache::create(StringRef AbsPath) { } Expected> -OnDiskActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { +OnDiskActionCache::getImpl(ArrayRef Key, + bool /*CanBeDistributed*/) const { std::optional> Val; if (Error E = DB->get(Key).moveInto(Val)) return std::move(E); @@ -218,13 +221,14 @@ UnifiedOnDiskActionCache::UnifiedOnDiskActionCache( Expected> UnifiedOnDiskActionCache::getImpl(ArrayRef Key, bool /*CanBeDistributed*/) const { - std::optional Val; - if (Error E = UniDB->KVGet(Key).moveInto(Val)) + std::optional> Val; + if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val)) return std::move(E); if (!Val) return std::nullopt; + auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val); return CASID::create(&getContext(), - toStringRef(UniDB->getGraphDB().getDigest(*Val))); + toStringRef(UniDB->getGraphDB().getDigest(ID))); } Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, @@ -233,20 +237,35 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef Key, auto Expected = UniDB->getGraphDB().getReference(Result.getHash()); if (LLVM_UNLIKELY(!Expected)) return Expected.takeError(); - std::optional Observed; - if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed)) + + auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected); + std::optional> Observed; + if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed)) return E; - if (*Expected == Observed) + auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed); + if (*Expected == ObservedID) return Error::success(); return createResultCachePoisonedError( - Key, getContext(), Result, - UniDB->getGraphDB().getDigest(*Observed)); + Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID)); } Error UnifiedOnDiskActionCache::validate() const { - return UniDB->validateActionCache(); + auto ValidateRef = [](FileOffset Offset, ArrayRef Value) -> Error { + auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value); + auto formatError = [&](Twine Msg) { + return createStringError( + llvm::errc::illegal_byte_sequence, + "bad record at 0x" + + utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + + Msg.str()); + }; + if (ID.getOpaqueData() == 0) + return formatError("zero is not a valid ref"); + return Error::success(); + }; + return UniDB->getKeyValueDB().validate(ValidateRef); } Expected> diff --git a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp index 40d898e4b7f56..f3f6fa043bc52 100644 --- a/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp +++ b/llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp @@ -1,4 +1,4 @@ -//===- BuiltinUnifiedCASDatabases.cpp ---------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -35,4 +35,4 @@ Expected cas::validateOnDiskUnifiedCASDatabasesIfNeeded( #else return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled"); #endif -} \ No newline at end of file +} diff --git a/llvm/lib/CAS/OnDiskCAS.cpp b/llvm/lib/CAS/OnDiskCAS.cpp index dee2f0ec76dea..ab5aec88ede60 100644 --- a/llvm/lib/CAS/OnDiskCAS.cpp +++ b/llvm/lib/CAS/OnDiskCAS.cpp @@ -43,8 +43,8 @@ class OnDiskCAS : public BuiltinCAS { static Expected> open(StringRef Path); - OnDiskCAS(std::shared_ptr UniDB_) - : UniDB(std::move(UniDB_)), DB(&UniDB->getGraphDB()) {} + OnDiskCAS(std::shared_ptr UniDB) + : UnifiedDB(std::move(UniDB)), DB(&UnifiedDB->getGraphDB()) {} private: ObjectHandle convertHandle(ondisk::ObjectHandle Node) const { @@ -67,10 +67,12 @@ class OnDiskCAS : public BuiltinCAS { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); return std::distance(RefsRange.begin(), RefsRange.end()); } + ObjectRef readRef(ObjectHandle Node, size_t I) const final { auto RefsRange = DB->getObjectRefs(convertHandle(Node)); return convertRef(RefsRange.begin()[I]); } + Error forEachRef(ObjectHandle Node, function_ref Callback) const final; @@ -78,11 +80,11 @@ class OnDiskCAS : public BuiltinCAS { Expected> getStorageSize() const final; Error pruneStorageData() final; - OnDiskCAS(std::unique_ptr DB_) - : OwnedDB(std::move(DB_)), DB(OwnedDB.get()) {} + OnDiskCAS(std::unique_ptr GraphDB) + : OwnedDB(std::move(GraphDB)), DB(OwnedDB.get()) {} std::unique_ptr OwnedDB; - std::shared_ptr UniDB; + std::shared_ptr UnifiedDB; ondisk::OnDiskGraphDB *DB; }; @@ -99,8 +101,6 @@ Error OnDiskCAS::validate(bool CheckHash) const { if (auto E = DB->validate(CheckHash, Hasher)) return E; - if (UniDB && UniDB->getUpstreamGraphDB()) - return UniDB->getUpstreamGraphDB()->validate(CheckHash, Hasher); return Error::success(); } @@ -165,15 +165,15 @@ Error OnDiskCAS::forEachRef(ObjectHandle Node, } Error OnDiskCAS::setSizeLimit(std::optional SizeLimit) { - UniDB->setSizeLimit(SizeLimit); + UnifiedDB->setSizeLimit(SizeLimit); return Error::success(); } Expected> OnDiskCAS::getStorageSize() const { - return UniDB->getStorageSize(); + return UnifiedDB->getStorageSize(); } -Error OnDiskCAS::pruneStorageData() { return UniDB->collectGarbage(); } +Error OnDiskCAS::pruneStorageData() { return UnifiedDB->collectGarbage(); } Expected> OnDiskCAS::open(StringRef AbsPath) { std::shared_ptr Logger; diff --git a/llvm/lib/CAS/OnDiskGraphDB.cpp b/llvm/lib/CAS/OnDiskGraphDB.cpp index 43fe8dc363098..8e123ca028e51 100644 --- a/llvm/lib/CAS/OnDiskGraphDB.cpp +++ b/llvm/lib/CAS/OnDiskGraphDB.cpp @@ -907,6 +907,10 @@ int64_t DataRecordHandle::getDataRelOffset() const { } Error OnDiskGraphDB::validate(bool Deep, HashingFuncT Hasher) const { + if (UpstreamDB) { + if (auto E = UpstreamDB->validate(Deep, Hasher)) + return E; + } return Index.validate([&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { @@ -1216,11 +1220,8 @@ OnDiskGraphDB::load(ObjectID ExternalRef) { return I.takeError(); TrieRecord::Data Object = I->Ref.load(); - if (Object.SK == TrieRecord::StorageKind::Unknown) { - if (!UpstreamDB) - return std::nullopt; + if (Object.SK == TrieRecord::StorageKind::Unknown) return faultInFromUpstream(ExternalRef); - } if (Object.SK == TrieRecord::StorageKind::DataPool) return ObjectHandle::fromFileOffset(Object.Offset); @@ -1300,8 +1301,10 @@ OnDiskGraphDB::getObjectPresence(ObjectID ExternalRef, TrieRecord::Data Object = I->Ref.load(); if (Object.SK != TrieRecord::StorageKind::Unknown) return ObjectPresence::InPrimaryDB; + if (!CheckUpstream || !UpstreamDB) return ObjectPresence::Missing; + std::optional UpstreamID = UpstreamDB->getExistingReference(getDigest(*I)); return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB @@ -1563,10 +1566,11 @@ unsigned OnDiskGraphDB::getHardStorageLimitUtilization() const { return std::max(IndexPercent, DataPercent); } -Expected> OnDiskGraphDB::open( - StringRef AbsPath, StringRef HashName, unsigned HashByteSize, - std::unique_ptr UpstreamDB, - std::shared_ptr Logger, FaultInPolicy Policy) { +Expected> +OnDiskGraphDB::open(StringRef AbsPath, StringRef HashName, + unsigned HashByteSize, OnDiskGraphDB *UpstreamDB, + std::shared_ptr Logger, + FaultInPolicy Policy) { if (std::error_code EC = sys::fs::create_directories(AbsPath)) return createFileError(AbsPath, EC); @@ -1621,17 +1625,16 @@ Expected> OnDiskGraphDB::open( return std::unique_ptr( new OnDiskGraphDB(AbsPath, std::move(*Index), std::move(*DataPool), - std::move(UpstreamDB), Policy, std::move(Logger))); + UpstreamDB, Policy, std::move(Logger))); } OnDiskGraphDB::OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index, OnDiskDataAllocator DataPool, - std::unique_ptr UpstreamDB, - FaultInPolicy Policy, + OnDiskGraphDB *UpstreamDB, FaultInPolicy Policy, std::shared_ptr Logger) : Index(std::move(Index)), DataPool(std::move(DataPool)), - RootPath(RootPath.str()), UpstreamDB(std::move(UpstreamDB)), - FIPolicy(Policy), Logger(std::move(Logger)) { + RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy), + Logger(std::move(Logger)) { /// Lifetime for "big" objects not in DataPool. /// /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something @@ -1654,7 +1657,6 @@ Error OnDiskGraphDB::importFullTree(ObjectID PrimaryID, // against the process dying during importing and leaving the database with an // incomplete tree. Note that if the upstream has missing nodes then the tree // will be copied with missing nodes as well, it won't be considered an error. - struct UpstreamCursor { ObjectHandle Node; size_t RefsCount; @@ -1736,7 +1738,6 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, // Copy the node data into the primary store. // FIXME: Use hard-link or cloning if the file-system supports it and data is // stored into a separate file. - auto Data = UpstreamDB->getObjectData(UpstreamNode); auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode); SmallVector Refs; @@ -1753,7 +1754,8 @@ Error OnDiskGraphDB::importSingleNode(ObjectID PrimaryID, Expected> OnDiskGraphDB::faultInFromUpstream(ObjectID PrimaryID) { - assert(UpstreamDB); + if (!UpstreamDB) + return std::nullopt; auto UpstreamID = UpstreamDB->getReference(getDigest(PrimaryID)); if (LLVM_UNLIKELY(!UpstreamID)) diff --git a/llvm/lib/CAS/OnDiskKeyValueDB.cpp b/llvm/lib/CAS/OnDiskKeyValueDB.cpp index 709c639fa7012..d67d4455bcdd8 100644 --- a/llvm/lib/CAS/OnDiskKeyValueDB.cpp +++ b/llvm/lib/CAS/OnDiskKeyValueDB.cpp @@ -20,6 +20,7 @@ #include "llvm/CAS/OnDiskKeyValueDB.h" #include "OnDiskCommon.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Errc.h" @@ -53,15 +54,21 @@ Expected>> OnDiskKeyValueDB::get(ArrayRef Key) { // Check the result cache. OnDiskTrieRawHashMap::ConstOnDiskPtr ActionP = Cache.find(Key); - if (!ActionP) + if (ActionP) { + assert(isAddrAligned(Align(8), ActionP->Data.data())); + return ActionP->Data; + } + if (!UnifiedCache || !UnifiedCache->UpstreamKVDB) return std::nullopt; - assert(isAddrAligned(Align(8), ActionP->Data.data())); - return ActionP->Data; + + // Try to fault in from upstream. + return UnifiedCache->faultInFromUpstreamKV(Key); } Expected> OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, + UnifiedOnDiskCache *Cache, std::shared_ptr Logger) { if (std::error_code EC = sys::fs::create_directories(Path)) return createFileError(Path, EC); @@ -89,10 +96,14 @@ OnDiskKeyValueDB::open(StringRef Path, StringRef HashName, unsigned KeySize, return std::move(E); return std::unique_ptr( - new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache))); + new OnDiskKeyValueDB(ValueSize, std::move(*ActionCache), Cache)); } Error OnDiskKeyValueDB::validate(CheckValueT CheckValue) const { + if (UnifiedCache && UnifiedCache->UpstreamKVDB) { + if (auto E = UnifiedCache->UpstreamKVDB->validate(CheckValue)) + return E; + } return Cache.validate( [&](FileOffset Offset, OnDiskTrieRawHashMap::ConstValueProxy Record) -> Error { diff --git a/llvm/lib/CAS/UnifiedOnDiskCache.cpp b/llvm/lib/CAS/UnifiedOnDiskCache.cpp index ec5ada21912f1..d4b19c3cb91f3 100644 --- a/llvm/lib/CAS/UnifiedOnDiskCache.cpp +++ b/llvm/lib/CAS/UnifiedOnDiskCache.cpp @@ -1,4 +1,4 @@ -//===- UnifiedOnDiskCache.cpp -----------------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,60 +6,62 @@ // //===----------------------------------------------------------------------===// // -// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one -// directory while also restricting storage growth with a scheme of chaining the -// two most recent directories (primary & upstream), where the primary -// "faults-in" data from the upstream one. When the primary (most recent) -// directory exceeds its intended limit a new empty directory becomes the -// primary one. -// -// Within the top-level directory (the path that \p UnifiedOnDiskCache::open -// receives) there are directories named like this: -// -// 'v.' -// 'v..' -// ... -// -// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and -// the part after the dot is an increasing integer. The primary directory is the -// one with the highest integer and the upstream one is the directory before it. -// For example, if the sub-directories contained are: -// -// 'v1.5', 'v1.6', 'v1.7', 'v1.8' -// -// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are -// unused directories that can be safely deleted at any time and by any process. -// -// Contained within the top-level directory is a file named "lock" which is used -// for processes to take shared or exclusive locks for the contents of the top -// directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock for -// the top-level directory; when it closes, if the primary sub-directory -// exceeded its limit, it attempts to get an exclusive lock in order to create a -// new empty primary directory; if it can't get the exclusive lock it gives up -// and lets the next \p UnifiedOnDiskCache instance that closes to attempt -// again. -// -// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a -// directory, by any process, the storage size in that directory will keep -// growing unrestricted. But the major benefit is that garbage-collection can be -// triggered on a directory concurrently, at any time and by any process, -// without affecting any active readers/writers in the same process or other -// processes. -// -// The \c UnifiedOnDiskCache also provides validation and recovery on top of the -// underlying on-disk storage. The low-level storage is designed to remain -// coherent across regular process crashes, but may be invalid after power loss -// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows -// validating the contents once per boot and can recover by marking invalid -// data for garbage collection. -// -// The data recovery described above requires exclusive access to the CAS, and -// it is an error to attempt recovery if the CAS is open in any process/thread. -// In order to maximize backwards compatibility with tools that do not perform -// validation before opening the CAS, we do not attempt to get exclusive access -// until recovery is actually performed, meaning as long as the data is valid -// it will not conflict with concurrent use. +/// \file +/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one +/// directory while also restricting storage growth with a scheme of chaining +/// the two most recent directories (primary & upstream), where the primary +/// "faults-in" data from the upstream one. When the primary (most recent) +/// directory exceeds its intended limit a new empty directory becomes the +/// primary one. +/// +/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open +/// receives) there are directories named like this: +/// +/// 'v.' +/// 'v.' +/// 'v.' +/// ... +/// +/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and +/// the part after the dot is an increasing integer. The primary directory is +/// the one with the highest integer and the upstream one is the directory +/// before it. For example, if the sub-directories contained are: +/// +/// 'v1.5', 'v1.6', 'v1.7', 'v1.8' +/// +/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are +/// unused directories that can be safely deleted at any time and by any +/// process. +/// +/// Contained within the top-level directory is a file named "lock" which is +/// used for processes to take shared or exclusive locks for the contents of the +/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock +/// for the top-level directory; when it closes, if the primary sub-directory +/// exceeded its limit, it attempts to get an exclusive lock in order to create +/// a new empty primary directory; if it can't get the exclusive lock it gives +/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt +/// again. +/// +/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a +/// directory, by any process, the storage size in that directory will keep +/// growing unrestricted. But the major benefit is that garbage-collection can +/// be triggered on a directory concurrently, at any time and by any process, +/// without affecting any active readers/writers in the same process or other +/// processes. +/// +/// The \c UnifiedOnDiskCache also provides validation and recovery on top of +/// the underlying on-disk storage. The low-level storage is designed to remain +/// coherent across regular process crashes, but may be invalid after power loss +/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows +/// validating the contents once per boot and can recover by marking invalid +/// data for garbage collection. +/// +/// The data recovery described above requires exclusive access to the CAS, and +/// it is an error to attempt recovery if the CAS is open in any process/thread. +/// In order to maximize backwards compatibility with tools that do not perform +/// validation before opening the CAS, we do not attempt to get exclusive access +/// until recovery is actually performed, meaning as long as the data is valid +/// it will not conflict with concurrent use. // //===----------------------------------------------------------------------===// @@ -73,6 +75,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/CAS/OnDiskCASLogger.h" +#include "llvm/CAS/ActionCache.h" #include "llvm/CAS/OnDiskGraphDB.h" #include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/Support/Compiler.h" @@ -102,38 +105,22 @@ static constexpr StringLiteral DBDirPrefix = "v1."; static constexpr StringLiteral ValidationFilename = "v1.validation"; static constexpr StringLiteral CorruptPrefix = "corrupt."; -Expected UnifiedOnDiskCache::KVPut(ObjectID Key, ObjectID Value) { - return KVPut(PrimaryGraphDB->getDigest(Key), Value); +ObjectID UnifiedOnDiskCache::getObjectIDFromValue(ArrayRef Value) { + // little endian encoded. + assert(Value.size() == sizeof(uint64_t)); + return ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); } -Expected UnifiedOnDiskCache::KVPut(ArrayRef Key, - ObjectID Value) { - static_assert(sizeof(Value.getOpaqueData()) == sizeof(uint64_t), - "unexpected return opaque type"); - std::array ValBytes; - support::endian::write64le(ValBytes.data(), Value.getOpaqueData()); - Expected> Existing = PrimaryKVDB->put(Key, ValBytes); - if (!Existing) - return Existing.takeError(); - assert(Existing->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Existing->data())); +UnifiedOnDiskCache::ValueBytes +UnifiedOnDiskCache::getValueFromObjectID(ObjectID ID) { + // little endian encoded. + UnifiedOnDiskCache::ValueBytes ValBytes; + static_assert(ValBytes.size() == sizeof(ID.getOpaqueData())); + support::endian::write64le(ValBytes.data(), ID.getOpaqueData()); + return ValBytes; } -Expected> -UnifiedOnDiskCache::KVGet(ArrayRef Key) { - std::optional> Value; - if (Error E = PrimaryKVDB->get(Key).moveInto(Value)) - return std::move(E); - if (!Value) { - if (UpstreamKVDB) - return faultInFromUpstreamKV(Key); - return std::nullopt; - } - assert(Value->size() == sizeof(uint64_t)); - return ObjectID::fromOpaqueData(support::endian::read64le(Value->data())); -} - -Expected> +Expected>> UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { assert(UpstreamGraphDB); assert(UpstreamKVDB); @@ -147,48 +134,24 @@ UnifiedOnDiskCache::faultInFromUpstreamKV(ArrayRef Key) { // The value is the \p ObjectID in the context of the upstream // \p OnDiskGraphDB instance. Translate it to the context of the primary // \p OnDiskGraphDB instance. - assert(UpstreamValue->size() == sizeof(uint64_t)); - ObjectID UpstreamID = ObjectID::fromOpaqueData( - support::endian::read64le(UpstreamValue->data())); + ObjectID UpstreamID = getObjectIDFromValue(*UpstreamValue); auto PrimaryID = PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID)); if (LLVM_UNLIKELY(!PrimaryID)) return PrimaryID.takeError(); - return KVPut(Key, *PrimaryID); -} - -Error UnifiedOnDiskCache::validateActionCache() { - auto ValidateRef = [&](FileOffset Offset, ArrayRef Value) -> Error { - assert(Value.size() == sizeof(uint64_t) && "should be validated already"); - auto ID = ObjectID::fromOpaqueData(support::endian::read64le(Value.data())); - auto formatError = [&](Twine Msg) { - return createStringError( - llvm::errc::illegal_byte_sequence, - "bad record at 0x" + - utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " + - Msg.str()); - }; - if (ID.getOpaqueData() == 0) - return formatError("zero is not a valid ref"); - return Error::success(); - }; - if (Error E = PrimaryKVDB->validate(ValidateRef)) - return E; - if (UpstreamKVDB) - return UpstreamKVDB->validate(ValidateRef); - return Error::success(); + return PrimaryKVDB->put(Key, getValueFromObjectID(*PrimaryID)); } /// \returns all the 'v.' names of sub-directories, sorted with /// ascending order of the integer after the dot. Corrupt directories, if /// included, will come first. -static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, - bool IncludeCorrupt = false) { +static Expected> +getAllDBDirs(StringRef Path, bool IncludeCorrupt = false) { struct DBDir { uint64_t Order; std::string Name; }; - SmallVector FoundDBDirs; + SmallVector FoundDBDirs; std::error_code EC; for (sys::fs::directory_iterator DirI(Path, EC), DirE; !EC && DirI != DirE; @@ -214,26 +177,28 @@ static Error getAllDBDirs(StringRef Path, SmallVectorImpl &DBDirs, llvm::sort(FoundDBDirs, [](const DBDir &LHS, const DBDir &RHS) -> bool { return LHS.Order <= RHS.Order; }); + + SmallVector DBDirs; for (DBDir &Dir : FoundDBDirs) DBDirs.push_back(std::move(Dir.Name)); - return Error::success(); + return DBDirs; } -static Error getAllGarbageDirs(StringRef Path, - SmallVectorImpl &DBDirs) { - if (Error E = getAllDBDirs(Path, DBDirs, /*IncludeCorrupt=*/true)) - return E; +static Expected> getAllGarbageDirs(StringRef Path) { + auto DBDirs = getAllDBDirs(Path, /*IncludeCorrupt=*/true); + if (!DBDirs) + return DBDirs.takeError(); // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure // out how to handle the leftover sub-directories of the previous version. - for (unsigned Keep = 2; Keep > 0 && !DBDirs.empty(); --Keep) { - StringRef Back(DBDirs.back()); + for (unsigned Keep = 2; Keep > 0 && !DBDirs->empty(); --Keep) { + StringRef Back(DBDirs->back()); if (Back.starts_with(CorruptPrefix)) break; - DBDirs.pop_back(); + DBDirs->pop_back(); } - return Error::success(); + return *DBDirs; } /// \returns Given a sub-directory named 'v.', it outputs the @@ -301,7 +266,8 @@ static Error validateInProcess(StringRef RootPath, StringRef HashName, auto CAS = builtin::createObjectStoreFromUnifiedOnDiskCache(UniDB); if (Error E = CAS->validate(CheckHash)) return E; - if (Error E = UniDB->validateActionCache()) + auto Cache = builtin::createActionCacheFromUnifiedOnDiskCache(UniDB); + if (Error E = Cache->validate()) return E; return Error::success(); } @@ -325,15 +291,14 @@ static Expected getBootTime() { return createFileError("/proc", EC); return Status.getLastModificationTime().time_since_epoch().count(); #else - llvm::report_fatal_error("unimplemented"); + llvm::report_fatal_error("getBootTime unimplemented"); #endif } -Expected -UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, - unsigned HashByteSize, bool CheckHash, - bool AllowRecovery, bool ForceValidation, - std::optional LLVMCasBinary) { +Expected UnifiedOnDiskCache::validateIfNeeded( + StringRef RootPath, StringRef HashName, unsigned HashByteSize, + bool CheckHash, bool AllowRecovery, bool ForceValidation, + std::optional LLVMCasBinaryPath) { if (std::error_code EC = sys::fs::create_directories(RootPath)) return createFileError(RootPath, EC); @@ -381,7 +346,8 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, return; Logger->log_UnifiedOnDiskCache_validateIfNeeded( RootPath, BootTime, ValidationBootTime, CheckHash, AllowRecovery, - ForceValidation, LLVMCasBinary, LogValidationError, Skipped, Recovered); + ForceValidation, LLVMCasBinaryPath, LogValidationError, Skipped, + Recovered); }); if (ValidationBootTime == BootTime && !ForceValidation) { @@ -392,8 +358,8 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, // Validate! bool NeedsRecovery = false; Error E = - LLVMCasBinary - ? validateOutOfProcess(*LLVMCasBinary, RootPath, CheckHash) + LLVMCasBinaryPath + ? validateOutOfProcess(*LLVMCasBinaryPath, RootPath, CheckHash) : validateInProcess(RootPath, HashName, HashByteSize, CheckHash); if (E) { if (Logger) @@ -425,11 +391,11 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, } auto UnlockFD = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); - for (StringRef DBDir : DBDirs) { + for (StringRef DBDir : *DBDirs) { sys::path::remove_filename(PathBuf); sys::path::append(PathBuf, DBDir); std::error_code EC; @@ -468,8 +434,7 @@ UnifiedOnDiskCache::validateIfNeeded(StringRef RootPath, StringRef HashName, return createFileError(PathBuf, OS.error()); } - return NeedsRecovery ? ValidationResult::Recovered - : ValidationResult::Valid; + return NeedsRecovery ? ValidationResult::Recovered : ValidationResult::Valid; } Expected> @@ -490,16 +455,15 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // from creating a new chain (essentially while a \p UnifiedOnDiskCache // instance holds a shared lock the storage for the primary directory will // grow unrestricted). - if (std::error_code EC = lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) + if (std::error_code EC = + lockFileThreadSafe(LockFD, sys::fs::LockKind::Shared)) return createFileError(PathBuf, EC); - SmallVector DBDirs; - if (Error E = getAllDBDirs(RootPath, DBDirs)) - return std::move(E); - if (DBDirs.empty()) - DBDirs.push_back((Twine(DBDirPrefix) + "1").str()); - - assert(!DBDirs.empty()); + auto DBDirs = getAllDBDirs(RootPath); + if (!DBDirs) + return DBDirs.takeError(); + if (DBDirs->empty()) + DBDirs->push_back((Twine(DBDirPrefix) + "1").str()); std::shared_ptr Logger; if (Error E = @@ -510,10 +474,11 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, /// more directories, get the most recent directories and chain them, with the /// most recent being the primary one. The remaining directories are unused /// data than can be garbage-collected. + auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); std::unique_ptr UpstreamGraphDB; std::unique_ptr UpstreamKVDB; - if (DBDirs.size() > 1) { - StringRef UpstreamDir = *(DBDirs.end() - 2); + if (DBDirs->size() > 1) { + StringRef UpstreamDir = *(DBDirs->end() - 2); PathBuf = RootPath; sys::path::append(PathBuf, UpstreamDir); if (Error E = @@ -523,19 +488,19 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, return std::move(E); if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + /*UnifiedCache=*/nullptr, Logger) .moveInto(UpstreamKVDB)) return std::move(E); } - OnDiskGraphDB *UpstreamGraphDBPtr = UpstreamGraphDB.get(); - StringRef PrimaryDir = *(DBDirs.end() - 1); + StringRef PrimaryDir = *(DBDirs->end() - 1); PathBuf = RootPath; sys::path::append(PathBuf, PrimaryDir); std::unique_ptr PrimaryGraphDB; if (Error E = OnDiskGraphDB::open(PathBuf, HashName, HashByteSize, - std::move(UpstreamGraphDB), Logger, FaultInPolicy) + UpstreamGraphDB.get(), Logger, FaultInPolicy) .moveInto(PrimaryGraphDB)) return std::move(E); std::unique_ptr PrimaryKVDB; @@ -543,17 +508,17 @@ UnifiedOnDiskCache::open(StringRef RootPath, std::optional SizeLimit, // including an extra translation step of the value during fault-in. if (Error E = OnDiskKeyValueDB::open(PathBuf, HashName, HashByteSize, /*ValueName=*/"objectid", - /*ValueSize=*/sizeof(uint64_t), Logger) + /*ValueSize=*/sizeof(uint64_t), + UniDB.get(), Logger) .moveInto(PrimaryKVDB)) return std::move(E); - auto UniDB = std::unique_ptr(new UnifiedOnDiskCache()); UniDB->RootPath = RootPath; UniDB->SizeLimit = SizeLimit.value_or(0); UniDB->LockFD = LockFD; - UniDB->NeedsGarbageCollection = DBDirs.size() > 2; + UniDB->NeedsGarbageCollection = DBDirs->size() > 2; UniDB->PrimaryDBDir = PrimaryDir; - UniDB->UpstreamGraphDB = UpstreamGraphDBPtr; + UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB); UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB); UniDB->UpstreamKVDB = std::move(UpstreamKVDB); UniDB->PrimaryKVDB = std::move(PrimaryKVDB); @@ -607,7 +572,7 @@ bool UnifiedOnDiskCache::hasExceededSizeLimit() const { Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { if (LockFD == -1) return Error::success(); // already closed. - auto _1 = make_scope_exit([&]() { + auto CloseLock = make_scope_exit([&]() { assert(LockFD >= 0); sys::fs::file_t LockFile = sys::fs::convertFDToNativeFile(LockFD); sys::fs::closeFile(LockFile); @@ -615,10 +580,10 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { }); bool ExceededSizeLimit = CheckSizeLimit ? hasExceededSizeLimit() : false; - PrimaryKVDB.reset(); UpstreamKVDB.reset(); + PrimaryKVDB.reset(); + UpstreamGraphDB.reset(); PrimaryGraphDB.reset(); - UpstreamGraphDB = nullptr; if (std::error_code EC = unlockFileThreadSafe(LockFD)) return createFileError(RootPath, EC); @@ -635,7 +600,7 @@ Error UnifiedOnDiskCache::close(bool CheckSizeLimit) { return Error::success(); // couldn't get exclusive lock, give up. return createFileError(RootPath, EC); } - auto _2 = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); + auto UnlockFile = make_scope_exit([&]() { unlockFileThreadSafe(LockFD); }); // Managed to get an exclusive lock which means there are no other open // \p UnifiedOnDiskCache instances for the same path, so we can safely start a @@ -661,12 +626,12 @@ UnifiedOnDiskCache::~UnifiedOnDiskCache() { consumeError(close()); } Error UnifiedOnDiskCache::collectGarbage(StringRef Path, ondisk::OnDiskCASLogger *Logger) { - SmallVector DBDirs; - if (Error E = getAllGarbageDirs(Path, DBDirs)) - return E; + auto DBDirs = getAllGarbageDirs(Path); + if (!DBDirs) + return DBDirs.takeError(); SmallString<256> PathBuf(Path); - for (StringRef UnusedSubDir : DBDirs) { + for (StringRef UnusedSubDir : *DBDirs) { sys::path::append(PathBuf, UnusedSubDir); if (Logger) Logger->log_UnifiedOnDiskCache_collectGarbage(PathBuf); diff --git a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp index bc60264bb58ba..726390a7acce5 100644 --- a/llvm/tools/libCASPluginTest/libCASPluginTest.cpp +++ b/llvm/tools/libCASPluginTest/libCASPluginTest.cpp @@ -13,6 +13,7 @@ #include "llvm-c/CAS/PluginAPI_functions.h" #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/CASID.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" #include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/Errc.h" @@ -318,13 +319,33 @@ Expected CASWrapper::downstreamNode(ObjectID Node) { return importNode(Node, FromDB, ToDB); } +static Expected cachePut(OnDiskKeyValueDB &DB, ArrayRef Key, + ObjectID ID) { + auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID); + auto Result = DB.put(Key, Value); + if (!Result) + return Result.takeError(); + return UnifiedOnDiskCache::getObjectIDFromValue(*Result); +} + +static Expected> cacheGet(OnDiskKeyValueDB &DB, + ArrayRef Key) { + auto Result = DB.get(Key); + if (!Result) + return Result.takeError(); + if (!*Result) + return std::nullopt; + return UnifiedOnDiskCache::getObjectIDFromValue(**Result); +} + Error CASWrapper::upstreamKey(ArrayRef Key, ObjectID Value) { if (!UpstreamDB) return Error::success(); Expected UpstreamVal = upstreamNode(Value); if (!UpstreamVal) return UpstreamVal.takeError(); - Expected PutValue = UpstreamDB->KVPut(Key, *UpstreamVal); + Expected PutValue = + cachePut(UpstreamDB->getKeyValueDB(), Key, *UpstreamVal); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *UpstreamVal); @@ -336,7 +357,8 @@ CASWrapper::downstreamKey(ArrayRef Key) { if (!UpstreamDB) return std::nullopt; std::optional UpstreamValue; - if (Error E = UpstreamDB->KVGet(Key).moveInto(UpstreamValue)) + if (Error E = + cacheGet(UpstreamDB->getKeyValueDB(), Key).moveInto(UpstreamValue)) return std::move(E); if (!UpstreamValue) return std::nullopt; @@ -345,7 +367,7 @@ CASWrapper::downstreamKey(ArrayRef Key) { UpstreamDB->getGraphDB().getDigest(*UpstreamValue)); if (!Value) return Value.takeError(); - Expected PutValue = DB->KVPut(Key, *Value); + Expected PutValue = cachePut(DB->getKeyValueDB(), Key, *Value); if (!PutValue) return PutValue.takeError(); assert(*PutValue == *Value); @@ -628,7 +650,7 @@ llcas_actioncache_get_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ArrayRef Key(c_key.data, c_key.size); std::optional Value; - if (Error E = DB.KVGet(Key).moveInto(Value)) + if (Error E = cacheGet(DB.getKeyValueDB(), Key).moveInto(Value)) return reportError(std::move(E), error, LLCAS_LOOKUP_RESULT_ERROR); if (!Value) { if (!globally) @@ -684,7 +706,7 @@ bool llcas_actioncache_put_for_digest(llcas_cas_t c_cas, llcas_digest_t c_key, auto &DB = *Wrap.DB; ObjectID Value = ObjectID::fromOpaqueData(c_value.opaque); ArrayRef Key(c_key.data, c_key.size); - Expected Ret = DB.KVPut(Key, Value); + Expected Ret = cachePut(DB.getKeyValueDB(), Key, Value); if (!Ret) return reportError(Ret.takeError(), error, true); if (*Ret != Value) diff --git a/llvm/unittests/CAS/OnDiskCommonUtils.h b/llvm/unittests/CAS/OnDiskCommonUtils.h index 89f93e08366c9..48a1830f9b219 100644 --- a/llvm/unittests/CAS/OnDiskCommonUtils.h +++ b/llvm/unittests/CAS/OnDiskCommonUtils.h @@ -12,6 +12,8 @@ #include "llvm/CAS/BuiltinObjectHasher.h" #include "llvm/CAS/OnDiskGraphDB.h" +#include "llvm/CAS/OnDiskKeyValueDB.h" +#include "llvm/CAS/UnifiedOnDiskCache.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Testing/Support/Error.h" @@ -58,6 +60,25 @@ inline Expected store(OnDiskGraphDB &DB, StringRef Data, return ID; } +inline Expected cachePut(OnDiskKeyValueDB &DB, ArrayRef Key, + ObjectID ID) { + auto Value = UnifiedOnDiskCache::getValueFromObjectID(ID); + auto Result = DB.put(Key, Value); + if (!Result) + return Result.takeError(); + return UnifiedOnDiskCache::getObjectIDFromValue(*Result); +} + +inline Expected> cacheGet(OnDiskKeyValueDB &DB, + ArrayRef Key) { + auto Result = DB.get(Key); + if (!Result) + return Result.takeError(); + if (!*Result) + return std::nullopt; + return UnifiedOnDiskCache::getObjectIDFromValue(**Result); +} + inline Error printTree(OnDiskGraphDB &DB, ObjectID ID, raw_ostream &OS, unsigned Indent = 0) { std::optional Obj; diff --git a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp index 9c7c6a929fd39..68af80715cd8c 100644 --- a/llvm/unittests/CAS/OnDiskGraphDBTest.cpp +++ b/llvm/unittests/CAS/OnDiskGraphDBTest.cpp @@ -102,7 +102,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInSingleNode) { std::unique_ptr DB; ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, + UpstreamDB.get(), /*Logger=*/nullptr, OnDiskGraphDB::FaultInPolicy::SingleNode) .moveInto(DB), Succeeded()); @@ -208,7 +208,7 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInFullTree) { unittest::TempDir Temp("ondiskcas", /*Unique=*/true); std::unique_ptr DB; ASSERT_THAT_ERROR(OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), + UpstreamDB.get(), /*Logger=*/nullptr, OnDiskGraphDB::FaultInPolicy::FullTree) .moveInto(DB), @@ -267,13 +267,13 @@ TEST_F(OnDiskCASTest, OnDiskGraphDBFaultInPolicyConflict) { std::unique_ptr DB; ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, Policy1) + UpstreamDB.get(), /*Logger=*/nullptr, Policy1) .moveInto(DB), Succeeded()); DB.reset(); ASSERT_THAT_ERROR( OnDiskGraphDB::open(Temp.path(), "blake3", sizeof(HashType), - std::move(UpstreamDB), /*Logger=*/nullptr, Policy2) + UpstreamDB.get(), /*Logger=*/nullptr, Policy2) .moveInto(DB), Failed()); }; diff --git a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp index e25288a26eb92..09aebc2d4bc19 100644 --- a/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp +++ b/llvm/unittests/CAS/UnifiedOnDiskCacheTest.cpp @@ -82,14 +82,18 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { Key1Hash = digest("key1"); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVPut(Key1Hash, *IDRoot).moveInto(Val), - Succeeded()); + ASSERT_THAT_ERROR( + cachePut(UniDB->getKeyValueDB(), Key1Hash, *IDRoot).moveInto(Val), + Succeeded()); EXPECT_EQ(IDRoot, Val); Key2Hash = digest("key2"); std::optional KeyID; ASSERT_THAT_ERROR(DB.getReference(Key2Hash).moveInto(KeyID), Succeeded()); - ASSERT_THAT_ERROR(UniDB->KVPut(*KeyID, *ID1).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cachePut(UniDB->getKeyValueDB(), + UniDB->getGraphDB().getDigest(*KeyID), *ID1) + .moveInto(Val), + Succeeded()); } auto checkTree = [&](const HashType &Digest, StringRef ExpectedTree) { @@ -108,7 +112,9 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { auto checkKey = [&](const HashType &Key, StringRef ExpectedData) { OnDiskGraphDB &DB = UniDB->getGraphDB(); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVGet(Key).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key).moveInto(Val), + Succeeded()); + ASSERT_TRUE(Val.has_value()); std::optional Obj; ASSERT_THAT_ERROR(DB.load(*Val).moveInto(Obj), Succeeded()); @@ -185,7 +191,8 @@ TEST_F(OnDiskCASTest, UnifiedOnDiskCacheTest) { ASSERT_THAT_ERROR(DB.getReference(OtherHash).moveInto(ID), Succeeded()); EXPECT_FALSE(DB.containsObject(*ID)); std::optional Val; - ASSERT_THAT_ERROR(UniDB->KVGet(Key2Hash).moveInto(Val), Succeeded()); + ASSERT_THAT_ERROR(cacheGet(UniDB->getKeyValueDB(), Key2Hash).moveInto(Val), + Succeeded()); EXPECT_FALSE(Val.has_value()); } }