syntax = "proto3";
package buildbarn.configuration.blobstore;
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/blockdevice/blockdevice.proto";
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/aws/aws.proto";
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/cloud/gcp/gcp.proto";
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/digest/digest.proto";
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/grpc/grpc.proto";
import "github.com/buildbarn/bb-storage/pkg/proto/configuration/http/client/client.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
option go_package = "github.com/buildbarn/bb-storage/pkg/proto/configuration/blobstore";
// Storage configuration for Bazel Buildbarn.
message BlobstoreConfiguration {
// Storage configuration for the Content Addressable Storage (CAS).
BlobAccessConfiguration content_addressable_storage = 1;
// Storage configuration for the Action Cache (AC).
BlobAccessConfiguration action_cache = 2;
}
message BlobAccessConfiguration {
oneof backend {
// Cache reads from a slow remote storage backend into a fast
// local storage backend.
ReadCachingBlobAccessConfiguration read_caching = 4;
// Read objects from/write objects to a GRPC service that
// implements the remote execution protocol.
buildbarn.configuration.grpc.ClientConfiguration grpc = 7;
// Always fail with a fixed error response.
google.rpc.Status error = 8;
// Fan out requests across multiple storage backends to spread
// out load.
ShardingBlobAccessConfiguration sharding = 9;
// Store blobs in two backends. Blobs present in exactly one backend
// are automatically replicated to the other backend.
//
// This backend does not guarantee high availability, as it does not
// function in case one backend is unavailable. Crashed backends
// need to be replaced with functional empty instances. These will
// be refilled automatically.
MirroredBlobAccessConfiguration mirrored = 14;
// Store blobs on the local system.
LocalBlobAccessConfiguration local = 15;
// Cache knowledge of which blobs exist locally.
//
// Bazel doesn't have a client-side cache with knowledge on which
// objects are present inside a remote cache. This means that it
// will often call ContentAddressableStorage.FindMissingBlobs() with
// sets that have a strong overlap with what was requested
// previously.
//
// This decorator can be used to introduce such a cache server side.
// It is especially useful for multi-level storage setups. It can
// cause a reduction in load on storage nodes when this cache
// enabled on frontend nodes.
//
// It only makes sense to use this decorator for the Content
// Addressable Storage, as FindMissingBlobs() is never called
// against the Action Cache. The storage backend must also be robust
// enough to guarantee that objects don't disappear shortly after
// calling ContentAddressableStorage.FindMissingBlobs(), as that
// would cause this decorator to cache invalid data.
ExistenceCachingBlobAccessConfiguration existence_caching = 16;
// Only return ActionResult messages for which all output files are
// present in the Content Addressable Storage (CAS). Certain
// clients, such as Bazel, require the use of this decorator. To
// reduce latency, it is advised that this decorator is used at the
// lowest level that has a full view of the entire CAS.
//
// This decorator must be placed on the Action Cache.
CompletenessCheckingBlobAccessConfiguration completeness_checking = 17;
// Fall back to reading data from a secondary backend when not found
// in the primary backend. Data is written to the primary backend only.
//
// This backend can be used to integrate external data sets into the
// system, e.g. by combining it with reference_expanding.
ReadFallbackBlobAccessConfiguration read_fallback = 18;
// Load Reference messages from an Indirect Content Addressable
// Storage (ICAS). Expand them by fetching the object from the
// location stored in the Reference message. This backend is only
// supported for the CAS.
//
// This backend can be used to integrate external data sets into the
// system by combining it with read_fallback.
ReferenceExpandingBlobAccessConfiguration reference_expanding = 19;
// Demultiplex requests across multiple storage backends, based on
// the instance name prefix.
//
// The logic for matching incoming requests and mutating the
// instance name in outgoing requests is identical to bb_storage's
// 'schedulers' configuration option.
DemultiplexingBlobAccessConfiguration demultiplexing = 20;
// Read objects using instance names in a hierarchical fashion. This
// means that if an object is written using instance name "foo/bar",
// it will be possible to read it using instance names "foo/bar",
// "foo/bar/baz", "foo/bar/baz/qux", but not instance names "",
// "foo", "foo/xyzzy". In other words, non-empty instance names will
// have contents inherited from their parent instance names. In case
// multiple instance names contain an object of a given digest, the
// one with the longest instance name is preferred.
//
// For the Action Cache (AC), it is recommended that this decorator
// is placed above CompletenessCheckingBlobAccess. This ensures that
// resolution continues, even if one or more instance names store an
// incomplete ActionResult.
//
// For every read operation, this decorator may generate a linear
// number of operations against the backend, based on the number of
// components in the instance name. This is acceptable for
// low-throughput data stores such as the Action Cache (AC) and
// Initial Size Class Cache (ISCC). For the Content Addressable
// Storage (CAS), this approach tends to be too inefficient. For the
// CAS, it would also be better to prefer the object with the
// shortest instance name, so that sharing of data between instance
// names is maximised. This is why this implementation does not
// allow enabling this option for the CAS. It is recommended that
// the LocalBlobAccessConfiguration.hierarchical_instance_names
// option is used instead.
BlobAccessConfiguration hierarchical_instance_names = 21;
// Hide ActionResult messages in the Action Cache (AC) where the
// 'worker_completed_timestamp' field in the ExecutedActionMetadata
// is too far in the past. This decorator can be used to ensure that
// all targets are rebuilt periodically.
ActionResultExpiringBlobAccessConfiguration action_result_expiring = 22;
// Send read traffic to a read-only replica, while sending write
// traffic to a source of truth. Read traffic may be sent to the
// source of truth if the replica is unavailable.
//
// By default, all requests are sent to the source. For read
// requests, this backend periodically sends a single canary request
// to the replica. Upon success, all subsequent read requests are
// sent to the replica as well. Upon failure, all requests will
// continue to go to the source.
//
// Only infrastructure errors (RPCs failing with INTERNAL,
// UNAVAILABLE and UNKNOWN) are considered failures.
ReadCanaryingBlobAccessConfiguration read_canarying = 23;
// Read objects from a ZIP file. Example use cases of this backend
// include the following:
//
// - When used in combination with ReadFallbackBlobAccess, it may be
// used to augment a data store with a set of objects that are
// guaranteed to remain present.
// - It may be used to access historical build actions that have
// been archived, so that they can be inspected or rerun.
//
// If this backend is used as a Content Addressable Storage (CAS),
// it will search for files named:
//
// ${digestFunction}-${hash}-${sizeBytes}
//
// For other storage types it will search for files named:
//
// ${digestFunction}-${hash}-${sizeBytes}-${instanceName}
ZIPBlobAccessConfiguration zip_reading = 24;
// Write objects to an uncompressed ZIP file. The resulting ZIP
// files can be read back using the 'zip_reading' option.
//
// This backend does not support reopening existing ZIP files. ZIP
// files will always be truncated upon startup. The trailing central
// directory is only written upon graceful termination, meaning that
// interrupting execution will create a malformed ZIP file.
ZIPBlobAccessConfiguration zip_writing = 25;
// Prevent repetition in the BlobAccess configuration by introducing
// one or more BlobAccess objects that can later be referred to
// using string labels.
//
// This option does not introduce new kind of actual backend; it's
// merely present to allow creating BlobAccess setups that are DAG
// (Directed Acyclic Graph) shaped, as opposed to just trees.
WithLabelsBlobAccessConfiguration with_labels = 26;
// Refer to a BlobAccess object declared through 'with_labels'.
string label = 27;
// Sets the timeout of contexts passed to other backends to a known
// value. When gRPC calls are timed out a `DEADLINE_EXCEEDED` error
// code will be returned.
DeadlineEnforcingBlobAccess deadline_enforcing = 28;
}
// Was 'redis'. Instead of using Redis, one may run a separate
// instance of bb_storage that uses the 'local' backend.
reserved 2;
// Was 'http'. Using HTTP as a transport for REv2 is suboptimal, as
// it does not provide any facilities for performing batch existence
// checks. Please use 'grpc' instead.
reserved 3;
// Was 'size_distinguishing'. This was mainly of use with the
// initial versions of Buildbarn, where the recommended approach for
// storing CAS objects was to store small objects in Redis and large
// objects in S3. Unlike the 'redis' backend, the 'local' backend is
// capable of storing objects of any size.
reserved 5;
// Was 'circular' (CircularBlobAccess). This backend has been replaced
// by 'local' (LocalBlobAccess).
reserved 6;
// Was 'cloud' (CloudBlobAccess for systems such as S3 and GCS). This
// backend has been removed for several reasons:
//
// - Compared to other storage backends, its time to first byte (TTFB)
// was relatively high, making it unattractive for storing
// everything but large Content Addressable Storage (CAS) objects.
// - The lack of efficient bulk operations meant that
// FindMissingBlobs() performance was very poor.
// - The consistency guarantees provided by many bucket
// implementations, most notably Amazon S3, are too weak for build
// clients to function properly.
//
// Users are instructed to migrate to LocalBlobAccess in combination
// with ShardingBlobAccess and MirroredBlobAccess. More details can be
// found in the following Architecture Decision Record (ADR):
//
// https://github.com/buildbarn/bb-adrs/blob/main/0002-storage.md
//
// If S3 was mainly used to integrate existing large corpora into the
// CAS, it may be sufficient to use ReferenceExpandingBlobAccess
// instead. More details about that can be found in this ADR:
//
// https://github.com/buildbarn/bb-adrs/blob/main/0004-icas.md
reserved 10;
}
message ReadCachingBlobAccessConfiguration {
// A remote storage backend that can only be accessed slowly. This
// storage backend is treated as the source of truth. Write
// operations are forwarded to this backend.
BlobAccessConfiguration slow = 1;
// A local storage backend that can be accessed quickly. This
// storage backend is treated as a cache. Objects will only be
// written into it when requested for reading.
BlobAccessConfiguration fast = 2;
// The replication strategy that should be used to copy objects from
// the slow backend to the fast backend.
BlobReplicatorConfiguration replicator = 3;
}
message ShardingBlobAccessConfiguration {
message Shard {
// Storage backend that is used by this shard. Omitting this
// causes the implementation to assume this shard is drained.
// Requests to this shard will be spread out across the other
// shards.
BlobAccessConfiguration backend = 1;
// Non-zero ratio of how many keys are allocated to this shard.
// When all shards have equal specifications (i.e., capacity and
// bandwidth), every shard may have a weight of one.
//
// For the backend selection algorithm to run quickly, it is not
// not advised to let the total weight of drained backends
// strongly exceed the total weight of undrained ones.
uint32 weight = 2;
}
message Legacy {
// Order of the shards for the legacy schema. Each key here refers to
// a corresponding key in the 'shard_map' or null for drained backends.
repeated string shard_order = 1;
// Hash initialization seed used for legacy schema.
uint64 hash_initialization = 2;
}
// Was 'hash_initialization' a seed for the shards array, has been made
// redundant with the 'shard_map', for running in compatible legacy mode set
// the 'hash_initialization' in 'legacy'.
reserved 1;
// Shards identified by a key within the context of this sharding
// configuration. The key is a freeform string which describes the identity
// of the shard in the context of the current sharding configuration.
// Shards are chosen via Rendezvous hashing based on the digest, weight and
// key of the configuration.
//
// When removing a shard from the map it is guaranteed that only blobs
// which resolved to the removed shard will get a different shard. When
// adding shards there is a weight/total_weight probability that any given
// blob will be resolved to the new shards.
map shards = 2;
// A temporary legacy mode which allows clients to use storage backends which
// are sharded with the old sharding topology implementation. Consumers are
// expected to migrate in a timely fashion and support for the legacy schema
// will be removed by 2025-12-31.
Legacy legacy = 3;
}
message MirroredBlobAccessConfiguration {
// Primary backend.
BlobAccessConfiguration backend_a = 1;
// Secondary backend.
BlobAccessConfiguration backend_b = 2;
// The replication strategy that should be used to copy objects from
// the primary backend to the secondary backend in case of
// inconsistencies.
BlobReplicatorConfiguration replicator_a_to_b = 3;
// The replication strategy that should be used to copy objects from
// the secondary backend to the primary backend in case of
// inconsistencies.
BlobReplicatorConfiguration replicator_b_to_a = 4;
}
// LocalBlobAccess stores all data onto disk inside blocks. A block can
// contain multiple blobs, but blob cannot span multiple blocks. This
// means that a block needs to be at least as large as the maximum blob
// size you're willing to accept. For example, if you have a 512 GiB
// cache, having a total of 32 blocks means you can store objects up to
// 16 GiB in size. This means that for small instances of
// LocalBlobAccess, the number of blocks may need to be reduced.
//
// Blocks are the unit at which LocalBlobAccess performs garbage
// collection. If storage space in all used blocks is exhausted,
// LocalBlobAccess discards the contents of a block so that space may be
// reused. Therefore, the total number of blocks does not need to be
// very high. If the total number of blocks is 32, LocalBlobAccess will
// only discard 100% / 32 = 3.125% of its data at a time. This means
// that 96.875% of storage space remains in continuous use, which is
// more than adequate.
//
// Blocks are partitioned into three groups based on their creation
// time, named "old", "current" and "new". Blobs provided to Put() will
// always be stored in a block in the "new" group. When the oldest block
// in the "new" group becomes full, it is moved to the "current" group.
// This causes the oldest block in the "current" group to be displaced
// to the "old" group. The oldest block in the "old" group is discarded.
//
// The difference between the "current" group and the "old" group is
// that data is refreshed when accessed. Data in the "old" group is at
// risk of being removed in the nearby future, which is why it needs to
// be copied into the "new" group when requested to be retained. Data
// in the "current" group is assumed to remain present for the time
// being, which is why it is left in place.
//
// Below is an illustration of how the blocks of data may be laid out at
// a given point in time. Every column of █ characters corresponds to a
// single block. The number of characters indicates the amount of data
// stored within.
//
// ← Over time, blocks move from "new" to "current" to "old" ←
//
// Old Current New
// █ █ █ █ │ █ █ █ █ █ █ █ █ │
// █ █ █ █ │ █ █ █ █ █ █ █ █ │
// █ █ █ █ │ █ █ █ █ █ █ █ █ │
// █ █ █ █ │ █ █ █ █ █ █ █ █ │
// █ █ █ █ │ █ █ █ █ █ █ █ █ │ █
// █ █ █ █ │ █ █ █ █ █ █ █ █ │ █
// █ █ █ █ │ █ █ █ █ █ █ █ █ │ █ █
// █ █ █ █ │ █ █ █ █ █ █ █ █ │ █ █ █
// ↓ ↓ ↓ ↓ ↑ ↑ ↑ ↑
// └─┴─┴─┴─────────────────────┴─┴─┴─┘
// Data gets copied from "old" to "new" when requested.
//
// Blobs get stored in blocks in the "new" group with an inverse
// exponential probability. This is done to reduce the probability of
// multiple block rotations close after each other, as this might put
// excessive pressure on the garbage collector. Because the placement
// distribution decreases rapidly, having more than three or four "new"
// blocks would be wasteful. Having fewer is also not recommended, as
// that increases the chance of placing objects that are used together
// inside the same block. This may cause 'tidal waves' of I/O whenever
// such data ends up in the "old" group at once.
//
// After initialization, there will be fewer blocks in the "current"
// group than configured, due to there simply being no data. This is
// compensated by adding more blocks to the "new" group. Unlike the
// regular blocks in this group, these will have a uniform placement
// distribution that is twice as high as normal. This is done to ensure
// the "current" blocks are randomly seeded to reduce 'tidal waves'
// later on.
//
// The number of blocks in the "old" group should not be too low, as
// this would cause this storage backend to become a FIFO instead of
// being LRU-like. Setting it too high is also not recommended, as this
// would increase redundancy in the data stored. The "current" group
// should likely be two or three times as large as the "old" group.
message LocalBlobAccessConfiguration {
// Was 'digest_location_map_size'. This option has been moved to
// 'key_location_map_in_memory.entries'.
reserved 1;
message KeyLocationMapInMemory {
// The key-location map is a hash table that is used by this storage
// backend to resolve digests to locations where data is stored.
// This option determines the size of this hash table. Because
// entries are small (about 64 bytes in size), it is recommended to
// make this map relatively large to reduce collisions.
//
// Recommended value: between 2 and 10 times the expected number of
// objects stored.
int64 entries = 1;
}
// Data store for the metadata of objects. The following Prometheus
// queries may be used to determine whether insertion into the
// key-location map caused other entries to be displaced prematurely:
//
// buildbarn_blobstore_hashing_key_location_map_put_iterations_count{outcome="TooManyAttempts"}
// buildbarn_blobstore_hashing_key_location_map_put_too_many_iterations_total
//
// If this query yields values greater than zero, you may need to
// increase this data store's size (or reduce the size of the blocks
// backend).
//
// Note that restarting bb_storage causes these metrics to be reset,
// meaning that you may need to run bb_storage for a longer amount of
// time to get better insight in whether objects are discarded
// prematurely.
oneof key_location_map_backend {
// Store the key-location map in memory.
KeyLocationMapInMemory key_location_map_in_memory = 11;
// Store the key-location map on a block device. The size of the
// block device determines the number of entries stored.
buildbarn.configuration.blockdevice.Configuration
key_location_map_on_block_device = 12;
}
// The number of indices a Get() call on the key-location map may
// attempt to access. The lower the utilization rate of the
// key-location map, the lower this value may be set. For example, if
// the size of the key-location map is set in such a way that it is
// only utilized by 10% (factor 0.1), setting this field to 16 means
// there is only a 0.1^16 chance that inserting an entry prematurely
// displaces another object from storage.
//
// Recommended value: 16
uint32 key_location_map_maximum_get_attempts = 2;
// The number of mutations that a Put() on the key-location map may
// perform. Because the key-location map uses a scheme similar to
// Robin Hood hashing, insertions may cause other entries to be
// displaced. Those entries may then cause even more entries to be
// displaced. Because of that, it is recommended to set this field to
// a small multiple of the maximum Get() attempts.
//
// Recommended value: 64
int64 key_location_map_maximum_put_attempts = 3;
// The number of blocks, where attempting to access any data stored
// within will cause it to be refreshed (i.e., copied into new
// blocks).
//
// Setting the number of old blocks too low may cause builds to fail,
// due to data disappearing prematurely. Setting the number of old
// blocks too high may cause an excessive amount of duplication in the
// data set. For example, if old_blocks == current_blocks + new_blocks,
// there may be a redundancy in the data set up to a factor of two.
//
// Recommended value: 8
int32 old_blocks = 5;
// The number of blocks, where attempting to access data stored within
// will not cause data to be refreshed immediately. The containing
// block will first need to become old for data to be eligible for
// refreshes.
//
// Recommended value: 24
int32 current_blocks = 6;
// The number of blocks where new data needs to be written. It is
// valid to set this to just 1. Setting it to a slightly higher value
// has the advantage that frequently used objects will over time get
// smeared out across the data set. This spreads out the cost
// refreshing data from old to new blocks.
//
// Because the probability of storing objects in new blocks has an
// inverse exponential distribution, it is not recommended to set this
// to any value higher than 4. Whereas the first new block will at
// times be somewhere between 50% and 100% full, the fourth new block
// will only be between 6.25% and 12.5% full, which is wasteful.
//
// Setting this to any value other than 1 is only supported for the
// Content Addressable Storage (CAS). Other storage types such as the
// Action Cache (AC) need to support updates to existing objects,
// which can only be done reliably if new objects are written into a
// single block.
//
// Recommended value: 3 for the CAS, 1 for other storage types.
int32 new_blocks = 7;
// Was 'instances'. This field no longer needs to be provided, as this
// storage backend is now capable of storing entries for arbitrary
// instance names transparently.
reserved 8;
message BlocksInMemory {
// Data is stored in a list of blocks. The total number of blocks
// constant over time, with small fluctuations to deal with lingering
// requests when removing a block. This option sets the size of an
// individual block.
//
// Recommended value: (total space available) /
// (old_blocks + current_blocks + new_blocks)
int64 block_size_bytes = 1;
}
message BlocksOnBlockDevice {
// The block device where data needs to be stored.
buildbarn.configuration.blockdevice.Configuration source = 1;
// To deal with lingering read requests, a small number of old
// blocks may need to be retained for a short period of time before
// being recycled to store new data. This option determines how many
// of such lingering blocks are allocated.
//
// Unlike in-memory storage, where the block size is configured
// explicitly, block device backed storage automatically infers an
// optimal block size. The block size is equal to:
//
// block_size = (size of block device) /
// (spare_blocks + old_blocks + current_blocks + new_blocks)
//
// Recommended value: 3
int32 spare_blocks = 2;
// When set, temporarily cache the integrity of data after it's been
// read from the block device. This is a requirement for being able
// to randomly access objects quickly.
//
// The disadvantage of enabling this option is that data corruption
// on the block device may not be detected. It is therefore
// recommended to set the cache duration to a limited value (e.g.,
// "4h").
buildbarn.configuration.digest.ExistenceCacheConfiguration
data_integrity_validation_cache = 3;
}
// Data store for the contents of objects. The following Prometheus
// query may be used to determine the worst-case retention of this
// data store in seconds:
//
// time() -
// buildbarn_blobstore_old_current_new_location_blob_map_last_removed_old_block_insertion_time_seconds
//
// If this query yields a value that is lower than desired, you may
// need to increase this data store's size.
//
// Note that restarting bb_storage causes this metric to be reset,
// meaning that you may need to run bb_storage for a longer amount of
// time to get better insight in the worst-case retention.
oneof blocks_backend {
// Store all data in memory. For larger setups, this may place a lot
// of pressure on Go's garbage collector. It may be necessary to
// reduce the value of GOGC to use this option reliably.
BlocksInMemory blocks_in_memory = 9;
// Store the blocks containing data on a block device.
BlocksOnBlockDevice blocks_on_block_device = 10;
}
message Persistent {
// Path to a directory on disk where metadata can be stored to be
// able to persist. This metadata needs to be reloaded on startup to
// be able to access previous data.
//
// This directory will hold a single file named "state", containing
// a Protobuf message of type
// buildbarn.blobstore.local.PersistentState. It is not recommended
// to use this directory for any purpose other than storing the
// persistent state file, as fsync() is called on it regularly.
string state_directory_path = 1;
// The amount of time between fsync() calls against the block device
// used to store blocks of data. Setting this option to a lower
// value reduces the amount of data that may get lost across
// restarts.
//
// This option acts as a lower bound on the amount of time between
// fsync() calls. No calls to fsync() are made if the system is
// idle, nor are multiple calls performed in parallel in case they
// take longer to complete than the configured interval.
//
// Care should be taken that this value is not set too low. Every
// epoch that still references valid data consumes 16 bytes of
// memory and increases the size of the state file by a similar
// amount. This means that if this option is set to '300s', epoch
// bookkeeping consumes up to 12*24*365*16 B = ~1.68 MB of space if
// the system were to operate for a full year without blocks being
// released. Setting this to '1s' blows this up by a factor 300.
//
// Recommended value: '300s'
google.protobuf.Duration minimum_epoch_interval = 2;
}
// When set, persist data across restarts. This feature is only
// available when both the key-location map and blocks are stored on a
// block device.
//
// When not set, data is not persisted. The data store will be empty
// every time the application is restarted. Existing entries in the
// key-location map and data in blocks will be ignored, even if their
// contents are valid.
Persistent persistent = 13;
// For all data stores except for the Content Addressable Storage
// (CAS), this storage backend always fully respects the REv2 instance
// name. This means that every instance name may store a separate copy
// of an object. Reads and writes are fully isolated.
//
// For the Content Addressable Storage, this option determines to what
// extent the instance name should be respected. When set to false,
// the instance name is completely ignored, meaning that all instance
// names share all objects. This is great from a performance point of
// view, as it means that users of multi-tenant setups need to upload
// objects less frequently, and that storage space usage is minimised.
// Unfortunately, it does mean that all tenants can access each
// other's objects once they get their hands on their digests.
//
// When this option is set to true, the instance name is respected in
// a hierarchical fashion. This means that if an object is written
// using instance name "foo/bar", it will be possible to read it using
// instance names "foo/bar", "foo/bar/baz", "foo/bar/baz/qux", but not
// instance names "", "foo", "foo/xyzzy". In other words, non-empty
// instance names will have Content Addressable Storage contents
// inherited from their parent instance names.
//
// This feature is implemented in such a way that object contents are
// still shared across all instance names. Enabling this option does
// not cause more data to be written into blocks, as uploads for
// objects that already exist under another instance name are treated
// as no-ops. It does cause at least a twofold increase in
// key-location map usage to track which instance name prefixes may
// access an object, proportional to the number of instance names
// used.
//
// This option is only supported for the Content Addressable Storage,
// as only for this data store it is safe to provide such behaviour at
// the individual storage node level. For the Action Cache, you may
// only want to do hierarchical instance name matching at a higher
// level, e.g., on top of CompletenessCheckingBlobAccess. This can be
// achieved by using HierarchicalInstanceNamesBlobAccess.
bool hierarchical_instance_names = 14;
}
message ExistenceCachingBlobAccessConfiguration {
// The backend for which results of
// ContentAddressableStorage.FindMissingBlobs() results need to be
// cached.
BlobAccessConfiguration backend = 1;
// Parameters for the cache data structure that is used by this
// decorator.
buildbarn.configuration.digest.ExistenceCacheConfiguration existence_cache =
2;
}
message CompletenessCheckingBlobAccessConfiguration {
// The Action Cache (AC) backend from which ActionResult messages are
// loaded.
BlobAccessConfiguration backend = 1;
// The maximum combined size of Tree objects that may be referenced by
// the ActionResult message. ActionResults having output directories
// whose combined size exceeds that exceeds this limit are suppressed
// (i.e., a NOT_FOUND error will be returned).
//
// This option places a limit on the amount of data that is read from
// the Content Addressable Storage (CAS) while processing a call to
// GetActionResult().
int64 maximum_total_tree_size_bytes = 2;
}
message ReadFallbackBlobAccessConfiguration {
// Backend from which data is attempted to be read first, and to which
// data is written.
BlobAccessConfiguration primary = 1;
// Backend from which data is attempted to be read last.
BlobAccessConfiguration secondary = 2;
// The replication strategy that should be used to copy objects from
// the secondary backend to the primary backend. If unset, objects
// will not be copied.
BlobReplicatorConfiguration replicator = 3;
}
message ReferenceExpandingBlobAccessConfiguration {
// The Indirect Content Addressable Storage (ICAS) backend from which
// Reference objects are loaded.
BlobAccessConfiguration indirect_content_addressable_storage = 1;
// Optional: AWS access options and credentials for objects loaded
// from S3.
buildbarn.configuration.cloud.aws.SessionConfiguration aws_session = 2;
// Optional: Options to be used by the HTTP client.
buildbarn.configuration.http.client.Configuration http_client = 3;
// Optional: Google Cloud Platform (GCP) client options for objects
// loaded from GCS. Support for GCS is disabled if left unspecified.
buildbarn.configuration.cloud.gcp.ClientOptionsConfiguration
gcp_client_options = 4;
// Optional: Storage backend to use when Reference objects refer to
// objects stored in another Content Addressable Storage.
BlobAccessConfiguration content_addressable_storage = 5;
}
message BlobReplicatorConfiguration {
oneof mode {
// When blobs are only present in one backend, but not the other,
// they are copied by the client immediately.
//
// Because there is no orchestration between clients, this may for
// certain workloads cause multiple clients to all replicate the
// same objects. Especially for setups with many clients, this could
// put a lot of pressure on storage nodes.
//
// This strategy may still be acceptable for the Action Cache, even
// for larger setups. The Action Cache receives less load than the
// Content Addressable Storage. There is also a lower propbability
// of clients requesting the same object at around the same time.
// Action Cache objects also tend to be relatively small, meaning
// that little space and bandwidth is wasted when replicating
// objects unnecessarily.
google.protobuf.Empty local = 1;
// Instruct an external gRPC service (bb_replicator) to perform
// replications. This is advised for setups with a larger number of
// clients, as a centralized replicator process may deduplicate
// replication actions. This reduces the load on storage nodes.
//
// This strategy is only supported for the Content Addressable
// Storage.
buildbarn.configuration.grpc.ClientConfiguration remote = 2;
// Queue and deduplicate all replication operations prior to
// executing them.
//
// In setups with a high volume of requests, it may normally be
// unsafe to restart a non-persistent storage node. Once the storage
// node would come back online, it would succumb to traffic
// generated by clients to replicate missing data.
//
// By executing all replication operations sequentially, the amount
// of pressure placed on storage nodes is bounded. By letting a
// dedicated bb_replicator instance use this strategy, replication
// throughput is bounded globally.
//
// TODO: This backend shares some overlap with 'deduplicating' and
// 'concurrency_limiting'. Should it be removed in favor of those?
// Right now this backend is more efficient for remote sinks,
// because it doesn't decompose requests for multiple objects.
QueuedBlobReplicatorConfiguration queued = 3;
// No replication will be performed. This can be useful when one
// or more of the backends have their contents managed externally.
google.protobuf.Empty noop = 4;
// Ensure that blobs are not replicated redundantly. Replication
// requests for the same blob are merged. To deal with potential
// race conditions, double check whether the sink already contains a
// blob before copying.
//
// In order to guarantee responsiveness for all callers, this
// replicator decomposes requests for multiple blobs into one
// request per blob. To prevent callers from stalling the
// replication process, it also doesn't stream data back to the
// caller as it is being replicated. This means that blobs are fully
// replicated from the source to the sink, prior to letting the
// caller read the data from the sink at its own pace.
//
// This replicator has been designed to reduce the amount of traffic
// against the source to an absolute minimum, at the cost of
// generating more traffic against the sink. It is recommended to
// use this replicator when the sink is an instance of
// LocalBlobAccess that is embedded into the same process, and blobs
// are expected to be consumed locally.
//
// This strategy is only supported for the Content Addressable
// Storage (CAS) and Indirect Content Addressable Storage (ICAS).
BlobReplicatorConfiguration deduplicating = 5;
// Ensure that the total number of concurrent replication requests
// remains bounded by a constant. By limiting the number of
// concurrent requests issues against a source, network starvation
// may be prevented.
//
// If this replicator is used in combination with 'deduplicating',
// it is recommended that 'deduplicating' is placed on the outside.
// More concretely:
//
// { deduplicating: { concurrencyLimiting: { ... } }
//
// Otherwise, the concurrency limit will be applied against requests
// that haven't been deduplicated yet, leading to lower concurrency.
ConcurrencyLimitingBlobReplicatorConfiguration concurrency_limiting = 6;
}
}
message QueuedBlobReplicatorConfiguration {
// Base replication strategy to which calls should be forwarded.
BlobReplicatorConfiguration base = 1;
// Parameters for the cache data structure that is used to incoming
// deduplicate replication operations.
buildbarn.configuration.digest.ExistenceCacheConfiguration existence_cache =
2;
}
message ConcurrencyLimitingBlobReplicatorConfiguration {
// Base replication strategy to which calls should be forwarded.
BlobReplicatorConfiguration base = 1;
// The maximum number of concurrent replication requests that are
// forwarded to the base replication strategy.
int64 maximum_concurrency = 2;
}
message DemultiplexingBlobAccessConfiguration {
// Map of storage backends, where the key corresponds to the instance
// name prefix to match. In case of multiple matches, the storage
// backend with the longest matching prefix is used. The matching
// prefix is removed from the resulting instance name.
//
// For example, if storage backends for instance name prefixes
// "acmecorp" and "acmecorp/rockets" are declared, requests for
// instance name "acmecorp/rockets/mars" will be forwarded to the
// latter. This storage backend will receive requests with instance
// name "mars".
//
// The empty string can be used to match all instance names, thereby
// causing all requests to be forwarded to a single storage backend.
map instance_name_prefixes = 1;
}
message DemultiplexedBlobAccessConfiguration {
// The backend to which requests are forwarded.
BlobAccessConfiguration backend = 1;
// Add a prefix to the instance name of all requests forwarded to this
// backend.
string add_instance_name_prefix = 2;
}
message ActionResultExpiringBlobAccessConfiguration {
// The backend to which requests are forwarded.
BlobAccessConfiguration backend = 1;
// The minimum amount of time to pass before an ActionResult expires.
google.protobuf.Duration minimum_validity = 2;
// Maximum amount of jitter to be added to the expiration time. This
// ensures that actions that were built at around the same time don't
// also expire at around the same time, therefore amortizing the rate
// at which actions are rebuilt.
//
// The process for computing the jitter is deterministic, meaning that
// subsequent requests for the same ActionResult still yield the same
// expiration time.
google.protobuf.Duration maximum_validity_jitter = 3;
// The minimum value 'worker_completed_timestamp' should have for it
// to be considered valid. This can be used to fully invalidate the
// contents of the Action Cache (AC) in case its contents have become
// poisoned.
google.protobuf.Timestamp minimum_timestamp = 4;
}
message ReadCanaryingBlobAccessConfiguration {
// The backend that is the source of truth.
BlobAccessConfiguration source = 1;
// The backend that contains a read-only replica of the source.
BlobAccessConfiguration replica = 2;
// Size of the cache that is used to track the availability of the
// replica on a per REv2 instance name basis. This ensures that if the
// replica uses features authoriation and demultiplexing based on
// instance names, availability is tracked accurately.
//
// Recommended value: 256
int32 maximum_cache_size = 3;
// The validity duration of cache entries. This controls how much time
// may pass without any read traffic before the backend falls back to
// the default state.
//
// Recommended value: 300s
google.protobuf.Duration maximum_cache_duration = 4;
}
message ZIPBlobAccessConfiguration {
// Path of the ZIP file.
string path = 1;
// When set, temporarily cache the integrity of data after it's been
// read from the ZIP file. Once cached, uncompressed files in the ZIP
// file (i.e., ones stored with compression method STORE) may be
// randomly accessed quickly.
//
// The disadvantage of enabling this option is that data corruption in
// the ZIP file may not be detected. It is therefore recommended to
// set the cache duration to a limited value (e.g., "4h").
buildbarn.configuration.digest.ExistenceCacheConfiguration
data_integrity_validation_cache = 2;
}
message WithLabelsBlobAccessConfiguration {
// The backend that should be created, having access to the declared
// labels.
BlobAccessConfiguration backend = 1;
// A map of string labels to backends that can be referenced.
map labels = 2;
}
message DeadlineEnforcingBlobAccess {
// The duration to use for the timeout. After this duration, the
// context will be cancelled, so please ensure this is long enough
// for any operations you expect to execute to finish.
google.protobuf.Duration timeout = 1;
// The backend to which all operations are delegated.
BlobAccessConfiguration backend = 2;
}