Coverage Report

Created: 2024-12-20 00:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-config/src/stores.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use serde::{Deserialize, Serialize};
16
17
use crate::serde_utils::{
18
    convert_data_size_with_shellexpand, convert_duration_with_shellexpand,
19
    convert_numeric_with_shellexpand, convert_optional_string_with_shellexpand,
20
    convert_string_with_shellexpand, convert_vec_string_with_shellexpand,
21
};
22
23
/// Name of the store. This type will be used when referencing a store
24
/// in the `CasConfig::stores`'s map key.
25
pub type StoreRefName = String;
26
27
#[allow(non_camel_case_types)]
28
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
29
pub enum ConfigDigestHashFunction {
30
    /// Use the sha256 hash function.
31
    /// <https://en.wikipedia.org/wiki/SHA-2>
32
    sha256,
33
34
    /// Use the blake3 hash function.
35
    /// <https://en.wikipedia.org/wiki/BLAKE_(hash_function)>
36
    blake3,
37
}
38
39
#[allow(non_camel_case_types)]
40
#[derive(Serialize, Deserialize, Debug, Clone)]
41
pub enum StoreSpec {
42
    /// Memory store will store all data in a hashmap in memory.
43
    ///
44
    /// **Example JSON Config:**
45
    /// ```json
46
    /// "memory": {
47
    ///     "eviction_policy": {
48
    ///       // 10mb.
49
    ///       "max_bytes": 10000000,
50
    ///     }
51
    ///   }
52
    /// }
53
    /// ```
54
    ///
55
    memory(MemorySpec),
56
57
    /// S3 store will use Amazon's S3 service as a backend to store
58
    /// the files. This configuration can be used to share files
59
    /// across multiple instances.
60
    ///
61
    /// This configuration will never delete files, so you are
62
    /// responsible for purging old files in other ways.
63
    ///
64
    /// **Example JSON Config:**
65
    /// ```json
66
    /// "experimental_s3_store": {
67
    ///   "region": "eu-north-1",
68
    ///   "bucket": "crossplane-bucket-af79aeca9",
69
    ///   "key_prefix": "test-prefix-index/",
70
    ///   "retry": {
71
    ///     "max_retries": 6,
72
    ///     "delay": 0.3,
73
    ///     "jitter": 0.5
74
    ///   },
75
    ///   "multipart_max_concurrent_uploads": 10
76
    /// }
77
    /// ```
78
    ///
79
    experimental_s3_store(S3Spec),
80
81
    /// Verify store is used to apply verifications to an underlying
82
    /// store implementation. It is strongly encouraged to validate
83
    /// as much data as you can before accepting data from a client,
84
    /// failing to do so may cause the data in the store to be
85
    /// populated with invalid data causing all kinds of problems.
86
    ///
87
    /// The suggested configuration is to have the CAS validate the
88
    /// hash and size and the AC validate nothing.
89
    ///
90
    /// **Example JSON Config:**
91
    /// ```json
92
    /// "verify": {
93
    ///   "memory": {
94
    ///     "eviction_policy": {
95
    ///       "max_bytes": 500000000 // 500mb.
96
    ///     }
97
    ///   },
98
    ///   "verify_size": true,
99
    ///   "hash_verification_function": "sha256"
100
    /// }
101
    /// ```
102
    ///
103
    verify(Box<VerifySpec>),
104
105
    /// Completeness checking store verifies if the
106
    /// output files & folders exist in the CAS before forwarding
107
    /// the request to the underlying store.
108
    /// Note: This store should only be used on AC stores.
109
    ///
110
    /// **Example JSON Config:**
111
    /// ```json
112
    /// "completeness_checking": {
113
    ///     "backend": {
114
    ///       "filesystem": {
115
    ///         "content_path": "~/.cache/nativelink/content_path-ac",
116
    ///         "temp_path": "~/.cache/nativelink/tmp_path-ac",
117
    ///         "eviction_policy": {
118
    ///           // 500mb.
119
    ///           "max_bytes": 500000000,
120
    ///         }
121
    ///       }
122
    ///     },
123
    ///     "cas_store": {
124
    ///       "ref_store": {
125
    ///         "name": "CAS_MAIN_STORE"
126
    ///       }
127
    ///     }
128
    ///   }
129
    /// ```
130
    ///
131
    completeness_checking(Box<CompletenessCheckingSpec>),
132
133
    /// A compression store that will compress the data inbound and
134
    /// outbound. There will be a non-trivial cost to compress and
135
    /// decompress the data, but in many cases if the final store is
136
    /// a store that requires network transport and/or storage space
137
    /// is a concern it is often faster and more efficient to use this
138
    /// store before those stores.
139
    ///
140
    /// **Example JSON Config:**
141
    /// ```json
142
    /// "compression": {
143
    ///     "compression_algorithm": {
144
    ///       "lz4": {}
145
    ///     },
146
    ///     "backend": {
147
    ///       "filesystem": {
148
    ///         "content_path": "/tmp/nativelink/data/content_path-cas",
149
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-cas",
150
    ///         "eviction_policy": {
151
    ///           // 2gb.
152
    ///           "max_bytes": 2000000000,
153
    ///         }
154
    ///       }
155
    ///     }
156
    ///   }
157
    /// ```
158
    ///
159
    compression(Box<CompressionSpec>),
160
161
    /// A dedup store will take the inputs and run a rolling hash
162
    /// algorithm on them to slice the input into smaller parts then
163
    /// run a sha256 algorithm on the slice and if the object doesn't
164
    /// already exist, upload the slice to the `content_store` using
165
    /// a new digest of just the slice. Once all parts exist, an
166
    /// Action-Cache-like digest will be built and uploaded to the
167
    /// `index_store` which will contain a reference to each
168
    /// chunk/digest of the uploaded file. Downloading a request will
169
    /// first grab the index from the `index_store`, and forward the
170
    /// download content of each chunk as if it were one file.
171
    ///
172
    /// This store is exceptionally good when the following conditions
173
    /// are met:
174
    /// * Content is mostly the same (inserts, updates, deletes are ok)
175
    /// * Content is not compressed or encrypted
176
    /// * Uploading or downloading from `content_store` is the bottleneck.
177
    ///
178
    /// Note: This store pairs well when used with `CompressionSpec` as
179
    /// the `content_store`, but never put `DedupSpec` as the backend of
180
    /// `CompressionSpec` as it will negate all the gains.
181
    ///
182
    /// Note: When running `.has()` on this store, it will only check
183
    /// to see if the entry exists in the `index_store` and not check
184
    /// if the individual chunks exist in the `content_store`.
185
    ///
186
    /// **Example JSON Config:**
187
    /// ```json
188
    /// "dedup": {
189
    ///     "index_store": {
190
    ///       "memory_store": {
191
    ///         "max_size": 1000000000, // 1GB
192
    ///         "eviction_policy": "LeastRecentlyUsed"
193
    ///       }
194
    ///     },
195
    ///     "content_store": {
196
    ///       "compression": {
197
    ///         "compression_algorithm": {
198
    ///           "lz4": {}
199
    ///         },
200
    ///         "backend": {
201
    ///           "fast_slow": {
202
    ///             "fast": {
203
    ///               "memory_store": {
204
    ///                 "max_size": 500000000, // 500MB
205
    ///                 "eviction_policy": "LeastRecentlyUsed"
206
    ///               }
207
    ///             },
208
    ///             "slow": {
209
    ///               "filesystem": {
210
    ///                 "content_path": "/tmp/nativelink/data/content_path-content",
211
    ///                 "temp_path": "/tmp/nativelink/data/tmp_path-content",
212
    ///                 "eviction_policy": {
213
    ///                   "max_bytes": 2000000000 // 2gb.
214
    ///                 }
215
    ///               }
216
    ///             }
217
    ///           }
218
    ///         }
219
    ///       }
220
    ///     }
221
    ///   }
222
    /// ```
223
    ///
224
    dedup(Box<DedupSpec>),
225
226
    /// Existence store will wrap around another store and cache calls
227
    /// to has so that subsequent `has_with_results` calls will be
228
    /// faster. This is useful for cases when you have a store that
229
    /// is slow to respond to has calls.
230
    /// Note: This store should only be used on CAS stores.
231
    ///
232
    /// **Example JSON Config:**
233
    /// ```json
234
    /// "existence_cache": {
235
    ///     "backend": {
236
    ///       "memory": {
237
    ///         "eviction_policy": {
238
    ///           // 500mb.
239
    ///           "max_bytes": 500000000,
240
    ///         }
241
    ///       }
242
    ///     },
243
    ///     "cas_store": {
244
    ///       "ref_store": {
245
    ///         "name": "CAS_MAIN_STORE"
246
    ///       }
247
    ///     }
248
    ///   }
249
    /// ```
250
    ///
251
    existence_cache(Box<ExistenceCacheSpec>),
252
253
    /// `FastSlow` store will first try to fetch the data from the `fast`
254
    /// store and then if it does not exist try the `slow` store.
255
    /// When the object does exist in the `slow` store, it will copy
256
    /// the data to the `fast` store while returning the data.
257
    /// This store should be thought of as a store that "buffers"
258
    /// the data to the `fast` store.
259
    /// On uploads it will mirror data to both `fast` and `slow` stores.
260
    ///
261
    /// WARNING: If you need data to always exist in the `slow` store
262
    /// for something like remote execution, be careful because this
263
    /// store will never check to see if the objects exist in the
264
    /// `slow` store if it exists in the `fast` store (ie: it assumes
265
    /// that if an object exists `fast` store it will exist in `slow`
266
    /// store).
267
    ///
268
    /// ***Example JSON Config:***
269
    /// ```json
270
    /// "fast_slow": {
271
    ///     "fast": {
272
    ///       "filesystem": {
273
    ///         "content_path": "/tmp/nativelink/data/content_path-index",
274
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-index",
275
    ///         "eviction_policy": {
276
    ///           // 500mb.
277
    ///           "max_bytes": 500000000,
278
    ///         }
279
    ///       }
280
    ///     },
281
    ///     "slow": {
282
    ///       "filesystem": {
283
    ///         "content_path": "/tmp/nativelink/data/content_path-index",
284
    ///         "temp_path": "/tmp/nativelink/data/tmp_path-index",
285
    ///         "eviction_policy": {
286
    ///           // 500mb.
287
    ///           "max_bytes": 500000000,
288
    ///         }
289
    ///       }
290
    ///     }
291
    ///   }
292
    /// ```
293
    ///
294
    fast_slow(Box<FastSlowSpec>),
295
296
    /// Shards the data to multiple stores. This is useful for cases
297
    /// when you want to distribute the load across multiple stores.
298
    /// The digest hash is used to determine which store to send the
299
    /// data to.
300
    ///
301
    /// **Example JSON Config:**
302
    /// ```json
303
    /// "shard": {
304
    ///     "stores": [
305
    ///         "memory": {
306
    ///             "eviction_policy": {
307
    ///                 // 10mb.
308
    ///                 "max_bytes": 10000000
309
    ///             },
310
    ///             "weight": 1
311
    ///         }
312
    ///     ]
313
    /// }
314
    /// ```
315
    ///
316
    shard(ShardSpec),
317
318
    /// Stores the data on the filesystem. This store is designed for
319
    /// local persistent storage. Restarts of this program should restore
320
    /// the previous state, meaning anything uploaded will be persistent
321
    /// as long as the filesystem integrity holds. This store uses the
322
    /// filesystem's `atime` (access time) to hold the last touched time
323
    /// of the file(s).
324
    ///
325
    /// **Example JSON Config:**
326
    /// ```json
327
    /// "filesystem": {
328
    ///     "content_path": "/tmp/nativelink/data-worker-test/content_path-cas",
329
    ///     "temp_path": "/tmp/nativelink/data-worker-test/tmp_path-cas",
330
    ///     "eviction_policy": {
331
    ///       // 10gb.
332
    ///       "max_bytes": 10000000000,
333
    ///     }
334
    /// }
335
    /// ```
336
    ///
337
    filesystem(FilesystemSpec),
338
339
    /// Store used to reference a store in the root store manager.
340
    /// This is useful for cases when you want to share a store in different
341
    /// nested stores. Example, you may want to share the same memory store
342
    /// used for the action cache, but use a `FastSlowSpec` and have the fast
343
    /// store also share the memory store for efficiency.
344
    ///
345
    /// **Example JSON Config:**
346
    /// ```json
347
    /// "ref_store": {
348
    ///     "name": "FS_CONTENT_STORE"
349
    /// }
350
    /// ```
351
    ///
352
    ref_store(RefSpec),
353
354
    /// Uses the size field of the digest to separate which store to send the
355
    /// data. This is useful for cases when you'd like to put small objects
356
    /// in one store and large objects in another store. This should only be
357
    /// used if the size field is the real size of the content, in other
358
    /// words, don't use on AC (Action Cache) stores. Any store where you can
359
    /// safely use `VerifySpec.verify_size = true`, this store should be safe
360
    /// to use (ie: CAS stores).
361
    ///
362
    /// **Example JSON Config:**
363
    /// ```json
364
    /// "size_partitioning": {
365
    ///     "size": 134217728, // 128mib.
366
    ///     "lower_store": {
367
    ///       "memory": {
368
    ///         "eviction_policy": {
369
    ///           "max_bytes": "${NATIVELINK_CAS_MEMORY_CONTENT_LIMIT:-100000000}"
370
    ///         }
371
    ///       }
372
    ///     },
373
    ///     "upper_store": {
374
    ///       /// This store discards data larger than 128mib.
375
    ///       "noop": {}
376
    ///     }
377
    ///   }
378
    /// ```
379
    ///
380
    size_partitioning(Box<SizePartitioningSpec>),
381
382
    /// This store will pass-through calls to another GRPC store. This store
383
    /// is not designed to be used as a sub-store of another store, but it
384
    /// does satisfy the interface and will likely work.
385
    ///
386
    /// One major GOTCHA is that some stores use a special function on this
387
    /// store to get the size of the underlying object, which is only reliable
388
    /// when this store is serving the a CAS store, not an AC store. If using
389
    /// this store directly without being a child of any store there are no
390
    /// side effects and is the most efficient way to use it.
391
    ///
392
    /// **Example JSON Config:**
393
    /// ```json
394
    /// "grpc": {
395
    ///     "instance_name": "main",
396
    ///     "endpoints": [
397
    ///       {"address": "grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"}
398
    ///     ],
399
    ///     "store_type": "ac"
400
    ///   }
401
    /// ```
402
    ///
403
    grpc(GrpcSpec),
404
405
    /// Stores data in any stores compatible with Redis APIs.
406
    ///
407
    /// Pairs well with `SizePartitioning` and/or `FastSlow` stores.
408
    /// Ideal for accepting small object sizes as most redis store
409
    /// services have a max file upload of between 256Mb-512Mb.
410
    ///
411
    /// **Example JSON Config:**
412
    /// ```json
413
    /// "redis_store": {
414
    ///     "addresses": [
415
    ///         "redis://127.0.0.1:6379/",
416
    ///     ]
417
    /// }
418
    /// ```
419
    ///
420
    redis_store(RedisSpec),
421
422
    /// Noop store is a store that sends streams into the void and all data
423
    /// retrieval will return 404 (`NotFound`). This can be useful for cases
424
    /// where you may need to partition your data and part of your data needs
425
    /// to be discarded.
426
    ///
427
    /// **Example JSON Config:**
428
    /// ```json
429
    /// "noop": {}
430
    /// ```
431
    ///
432
    noop(NoopSpec),
433
}
434
435
/// Configuration for an individual shard of the store.
436
#[derive(Serialize, Deserialize, Debug, Clone)]
437
#[serde(deny_unknown_fields)]
438
pub struct ShardConfig {
439
    /// Store to shard the data to.
440
    pub store: StoreSpec,
441
442
    /// The weight of the store. This is used to determine how much data
443
    /// should be sent to the store. The actual percentage is the sum of
444
    /// all the store's weights divided by the individual store's weight.
445
    ///
446
    /// Default: 1
447
    pub weight: Option<u32>,
448
}
449
450
#[derive(Serialize, Deserialize, Debug, Clone)]
451
#[serde(deny_unknown_fields)]
452
pub struct ShardSpec {
453
    /// Stores to shard the data to.
454
    pub stores: Vec<ShardConfig>,
455
}
456
457
0
#[derive(Serialize, Deserialize, Debug, Clone)]
458
#[serde(deny_unknown_fields)]
459
pub struct SizePartitioningSpec {
460
    /// Size to partition the data on.
461
    #[serde(deserialize_with = "convert_data_size_with_shellexpand")]
462
    pub size: u64,
463
464
    /// Store to send data when object is < (less than) size.
465
    pub lower_store: StoreSpec,
466
467
    /// Store to send data when object is >= (less than eq) size.
468
    pub upper_store: StoreSpec,
469
}
470
471
0
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
472
#[serde(deny_unknown_fields)]
473
pub struct RefSpec {
474
    /// Name of the store under the root "stores" config object.
475
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
476
    pub name: String,
477
}
478
479
0
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
480
#[serde(deny_unknown_fields)]
481
pub struct FilesystemSpec {
482
    /// Path on the system where to store the actual content. This is where
483
    /// the bulk of the data will be placed.
484
    /// On service bootup this folder will be scanned and all files will be
485
    /// added to the cache. In the event one of the files doesn't match the
486
    /// criteria, the file will be deleted.
487
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
488
    pub content_path: String,
489
490
    /// A temporary location of where files that are being uploaded or
491
    /// deleted will be placed while the content cannot be guaranteed to be
492
    /// accurate. This location must be on the same block device as
493
    /// `content_path` so atomic moves can happen (ie: move without copy).
494
    /// All files in this folder will be deleted on every startup.
495
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
496
    pub temp_path: String,
497
498
    /// Buffer size to use when reading files. Generally this should be left
499
    /// to the default value except for testing.
500
    /// Default: 32k.
501
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
502
    pub read_buffer_size: u32,
503
504
    /// Policy used to evict items out of the store. Failure to set this
505
    /// value will cause items to never be removed from the store causing
506
    /// infinite memory usage.
507
    pub eviction_policy: Option<EvictionPolicy>,
508
509
    /// The block size of the filesystem for the running machine
510
    /// value is used to determine an entry's actual size on disk consumed
511
    /// For a 4KB block size filesystem, a 1B file actually consumes 4KB
512
    /// Default: 4096
513
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
514
    pub block_size: u64,
515
}
516
517
#[derive(Serialize, Deserialize, Debug, Clone)]
518
#[serde(deny_unknown_fields)]
519
pub struct FastSlowSpec {
520
    /// Fast store that will be attempted to be contacted before reaching
521
    /// out to the `slow` store.
522
    pub fast: StoreSpec,
523
524
    /// If the object does not exist in the `fast` store it will try to
525
    /// get it from this store.
526
    pub slow: StoreSpec,
527
}
528
529
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
530
#[serde(deny_unknown_fields)]
531
pub struct MemorySpec {
532
    /// Policy used to evict items out of the store. Failure to set this
533
    /// value will cause items to never be removed from the store causing
534
    /// infinite memory usage.
535
    pub eviction_policy: Option<EvictionPolicy>,
536
}
537
538
0
#[derive(Serialize, Deserialize, Debug, Clone)]
539
#[serde(deny_unknown_fields)]
540
pub struct DedupSpec {
541
    /// Store used to store the index of each dedup slice. This store
542
    /// should generally be fast and small.
543
    pub index_store: StoreSpec,
544
545
    /// The store where the individual chunks will be uploaded. This
546
    /// store should generally be the slower & larger store.
547
    pub content_store: StoreSpec,
548
549
    /// Minimum size that a chunk will be when slicing up the content.
550
    /// Note: This setting can be increased to improve performance
551
    /// because it will actually not check this number of bytes when
552
    /// deciding where to partition the data.
553
    ///
554
    /// Default: 65536 (64k)
555
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
556
    pub min_size: u32,
557
558
    /// A best-effort attempt will be made to keep the average size
559
    /// of the chunks to this number. It is not a guarantee, but a
560
    /// slight attempt will be made.
561
    ///
562
    /// This value will also be about the threshold used to determine
563
    /// if we should even attempt to dedup the entry or just forward
564
    /// it directly to the `content_store` without an index. The actual
565
    /// value will be about `normal_size * 1.3` due to implementation
566
    /// details.
567
    ///
568
    /// Default: 262144 (256k)
569
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
570
    pub normal_size: u32,
571
572
    /// Maximum size a chunk is allowed to be.
573
    ///
574
    /// Default: 524288 (512k)
575
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
576
    pub max_size: u32,
577
578
    /// Due to implementation detail, we want to prefer to download
579
    /// the first chunks of the file so we can stream the content
580
    /// out and free up some of our buffers. This configuration
581
    /// will be used to to restrict the number of concurrent chunk
582
    /// downloads at a time per `get()` request.
583
    ///
584
    /// This setting will also affect how much memory might be used
585
    /// per `get()` request. Estimated worst case memory per `get()`
586
    /// request is: `max_concurrent_fetch_per_get * max_size`.
587
    ///
588
    /// Default: 10
589
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
590
    pub max_concurrent_fetch_per_get: u32,
591
}
592
593
#[derive(Serialize, Deserialize, Debug, Clone)]
594
#[serde(deny_unknown_fields)]
595
pub struct ExistenceCacheSpec {
596
    /// The underlying store wrap around. All content will first flow
597
    /// through self before forwarding to backend. In the event there
598
    /// is an error detected in self, the connection to the backend
599
    /// will be terminated, and early termination should always cause
600
    /// updates to fail on the backend.
601
    pub backend: StoreSpec,
602
603
    /// Policy used to evict items out of the store. Failure to set this
604
    /// value will cause items to never be removed from the store causing
605
    /// infinite memory usage.
606
    pub eviction_policy: Option<EvictionPolicy>,
607
}
608
609
#[derive(Serialize, Deserialize, Debug, Clone)]
610
#[serde(deny_unknown_fields)]
611
pub struct VerifySpec {
612
    /// The underlying store wrap around. All content will first flow
613
    /// through self before forwarding to backend. In the event there
614
    /// is an error detected in self, the connection to the backend
615
    /// will be terminated, and early termination should always cause
616
    /// updates to fail on the backend.
617
    pub backend: StoreSpec,
618
619
    /// If set the store will verify the size of the data before accepting
620
    /// an upload of data.
621
    ///
622
    /// This should be set to false for AC, but true for CAS stores.
623
    #[serde(default)]
624
    pub verify_size: bool,
625
626
    /// If the data should be hashed and verify that the key matches the
627
    /// computed hash. The hash function is automatically determined based
628
    /// request and if not set will use the global default.
629
    ///
630
    /// This should be set to None for AC, but hashing function like `sha256` for CAS stores.
631
    #[serde(default)]
632
    pub verify_hash: bool,
633
}
634
635
#[derive(Serialize, Deserialize, Debug, Clone)]
636
#[serde(deny_unknown_fields)]
637
pub struct CompletenessCheckingSpec {
638
    /// The underlying store that will have it's results validated before sending to client.
639
    pub backend: StoreSpec,
640
641
    /// When a request is made, the results are decoded and all output digests/files are verified
642
    /// to exist in this CAS store before returning success.
643
    pub cas_store: StoreSpec,
644
}
645
646
0
#[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone, Copy)]
647
#[serde(deny_unknown_fields)]
648
pub struct Lz4Config {
649
    /// Size of the blocks to compress.
650
    /// Higher values require more ram, but might yield slightly better
651
    /// compression ratios.
652
    ///
653
    /// Default: 65536 (64k).
654
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
655
    pub block_size: u32,
656
657
    /// Maximum size allowed to attempt to deserialize data into.
658
    /// This is needed because the `block_size` is embedded into the data
659
    /// so if there was a bad actor, they could upload an extremely large
660
    /// `block_size`'ed entry and we'd allocate a large amount of memory
661
    /// when retrieving the data. To prevent this from happening, we
662
    /// allow you to specify the maximum that we'll attempt deserialize.
663
    ///
664
    /// Default: value in `block_size`.
665
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
666
    pub max_decode_block_size: u32,
667
}
668
669
#[allow(non_camel_case_types)]
670
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
671
pub enum CompressionAlgorithm {
672
    /// LZ4 compression algorithm is extremely fast for compression and
673
    /// decompression, however does not perform very well in compression
674
    /// ratio. In most cases build artifacts are highly compressible, however
675
    /// lz4 is quite good at aborting early if the data is not deemed very
676
    /// compressible.
677
    ///
678
    /// see: <https://lz4.github.io/lz4/>
679
    lz4(Lz4Config),
680
}
681
682
#[derive(Serialize, Deserialize, Debug, Clone)]
683
#[serde(deny_unknown_fields)]
684
pub struct CompressionSpec {
685
    /// The underlying store wrap around. All content will first flow
686
    /// through self before forwarding to backend. In the event there
687
    /// is an error detected in self, the connection to the backend
688
    /// will be terminated, and early termination should always cause
689
    /// updates to fail on the backend.
690
    pub backend: StoreSpec,
691
692
    /// The compression algorithm to use.
693
    pub compression_algorithm: CompressionAlgorithm,
694
}
695
696
/// Eviction policy always works on LRU (Least Recently Used). Any time an entry
697
/// is touched it updates the timestamp. Inserts and updates will execute the
698
/// eviction policy removing any expired entries and/or the oldest entries
699
/// until the store size becomes smaller than `max_bytes`.
700
0
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
701
#[serde(deny_unknown_fields)]
702
pub struct EvictionPolicy {
703
    /// Maximum number of bytes before eviction takes place.
704
    /// Default: 0. Zero means never evict based on size.
705
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
706
    pub max_bytes: usize,
707
708
    /// When eviction starts based on hitting `max_bytes`, continue until
709
    /// `max_bytes - evict_bytes` is met to create a low watermark.  This stops
710
    /// operations from thrashing when the store is close to the limit.
711
    /// Default: 0
712
    #[serde(default, deserialize_with = "convert_data_size_with_shellexpand")]
713
    pub evict_bytes: usize,
714
715
    /// Maximum number of seconds for an entry to live since it was last
716
    /// accessed before it is evicted.
717
    /// Default: 0. Zero means never evict based on time.
718
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
719
    pub max_seconds: u32,
720
721
    /// Maximum size of the store before an eviction takes place.
722
    /// Default: 0. Zero means never evict based on count.
723
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
724
    pub max_count: u64,
725
}
726
727
0
#[derive(Serialize, Deserialize, Debug, Default, Clone)]
728
#[serde(deny_unknown_fields)]
729
pub struct S3Spec {
730
    /// S3 region. Usually us-east-1, us-west-2, af-south-1, exc...
731
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
732
    pub region: String,
733
734
    /// Bucket name to use as the backend.
735
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
736
    pub bucket: String,
737
738
    /// If you wish to prefix the location on s3. If None, no prefix will be used.
739
    #[serde(default)]
740
    pub key_prefix: Option<String>,
741
742
    /// Retry configuration to use when a network request fails.
743
    #[serde(default)]
744
    pub retry: Retry,
745
746
    /// If the number of seconds since the `last_modified` time of the object
747
    /// is greater than this value, the object will not be considered
748
    /// "existing". This allows for external tools to delete objects that
749
    /// have not been uploaded in a long time. If a client receives a `NotFound`
750
    /// the client should re-upload the object.
751
    ///
752
    /// There should be sufficient buffer time between how long the expiration
753
    /// configuration of the external tool is and this value. Keeping items
754
    /// around for a few days is generally a good idea.
755
    ///
756
    /// Default: 0. Zero means never consider an object expired.
757
    #[serde(default, deserialize_with = "convert_duration_with_shellexpand")]
758
    pub consider_expired_after_s: u32,
759
760
    /// The maximum buffer size to retain in case of a retryable error
761
    /// during upload. Setting this to zero will disable upload buffering;
762
    /// this means that in the event of a failure during upload, the entire
763
    /// upload will be aborted and the client will likely receive an error.
764
    ///
765
    /// Default: 5MB.
766
    pub max_retry_buffer_per_request: Option<usize>,
767
768
    /// Maximum number of concurrent `UploadPart` requests per `MultipartUpload`.
769
    ///
770
    /// Default: 10.
771
    pub multipart_max_concurrent_uploads: Option<usize>,
772
773
    /// Allow unencrypted HTTP connections. Only use this for local testing.
774
    ///
775
    /// Default: false
776
    #[serde(default)]
777
    pub insecure_allow_http: bool,
778
779
    /// Disable http/2 connections and only use http/1.1. Default client
780
    /// configuration will have http/1.1 and http/2 enabled for connection
781
    /// schemes. Http/2 should be disabled if environments have poor support
782
    /// or performance related to http/2. Safe to keep default unless
783
    /// underlying network environment or S3 API servers specify otherwise.
784
    ///
785
    /// Default: false
786
    #[serde(default)]
787
    pub disable_http2: bool,
788
}
789
790
#[allow(non_camel_case_types)]
791
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
792
pub enum StoreType {
793
    /// The store is content addressable storage.
794
    cas,
795
    /// The store is an action cache.
796
    ac,
797
}
798
799
0
#[derive(Serialize, Deserialize, Debug, Clone)]
800
pub struct ClientTlsConfig {
801
    /// Path to the certificate authority to use to validate the remote.
802
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
803
    pub ca_file: String,
804
805
    /// Path to the certificate file for client authentication.
806
    #[serde(deserialize_with = "convert_optional_string_with_shellexpand")]
807
    pub cert_file: Option<String>,
808
809
    /// Path to the private key file for client authentication.
810
    #[serde(deserialize_with = "convert_optional_string_with_shellexpand")]
811
    pub key_file: Option<String>,
812
}
813
814
0
#[derive(Serialize, Deserialize, Debug, Clone)]
815
#[serde(deny_unknown_fields)]
816
pub struct GrpcEndpoint {
817
    /// The endpoint address (i.e. grpc(s)://example.com:443).
818
    #[serde(deserialize_with = "convert_string_with_shellexpand")]
819
    pub address: String,
820
    /// The TLS configuration to use to connect to the endpoint (if grpcs).
821
    pub tls_config: Option<ClientTlsConfig>,
822
    /// The maximum concurrency to allow on this endpoint.
823
    pub concurrency_limit: Option<usize>,
824
}
825
826
0
#[derive(Serialize, Deserialize, Debug, Clone)]
827
#[serde(deny_unknown_fields)]
828
pub struct GrpcSpec {
829
    /// Instance name for GRPC calls. Proxy calls will have the `instance_name` changed to this.
830
    #[serde(default, deserialize_with = "convert_string_with_shellexpand")]
831
    pub instance_name: String,
832
833
    /// The endpoint of the grpc connection.
834
    pub endpoints: Vec<GrpcEndpoint>,
835
836
    /// The type of the upstream store, this ensures that the correct server calls are made.
837
    pub store_type: StoreType,
838
839
    /// Retry configuration to use when a network request fails.
840
    #[serde(default)]
841
    pub retry: Retry,
842
843
    /// Limit the number of simultaneous upstream requests to this many.  A
844
    /// value of zero is treated as unlimited.  If the limit is reached the
845
    /// request is queued.
846
    #[serde(default)]
847
    pub max_concurrent_requests: usize,
848
849
    /// The number of connections to make to each specified endpoint to balance
850
    /// the load over multiple TCP connections.  Default 1.
851
    #[serde(default)]
852
    pub connections_per_endpoint: usize,
853
}
854
855
/// The possible error codes that might occur on an upstream request.
856
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
857
pub enum ErrorCode {
858
    Cancelled = 1,
859
    Unknown = 2,
860
    InvalidArgument = 3,
861
    DeadlineExceeded = 4,
862
    NotFound = 5,
863
    AlreadyExists = 6,
864
    PermissionDenied = 7,
865
    ResourceExhausted = 8,
866
    FailedPrecondition = 9,
867
    Aborted = 10,
868
    OutOfRange = 11,
869
    Unimplemented = 12,
870
    Internal = 13,
871
    Unavailable = 14,
872
    DataLoss = 15,
873
    Unauthenticated = 16,
874
    // Note: This list is duplicated from nativelink-error/lib.rs.
875
}
876
877
0
#[derive(Serialize, Deserialize, Debug, Clone)]
878
pub struct RedisSpec {
879
    /// The hostname or IP address of the Redis server.
880
    /// Ex: `["redis://username:password@redis-server-url:6380/99"]`
881
    /// 99 Represents database ID, 6380 represents the port.
882
    #[serde(deserialize_with = "convert_vec_string_with_shellexpand")]
883
    pub addresses: Vec<String>,
884
885
    /// The response timeout for the Redis connection in seconds.
886
    ///
887
    /// Default: 10
888
    #[serde(default)]
889
    pub response_timeout_s: u64,
890
891
    /// The connection timeout for the Redis connection in seconds.
892
    ///
893
    /// Default: 10
894
    #[serde(default)]
895
    pub connection_timeout_s: u64,
896
897
    /// An optional and experimental Redis channel to publish write events to.
898
    ///
899
    /// If set, every time a write operation is made to a Redis node
900
    /// then an event will be published to a Redis channel with the given name.
901
    /// If unset, the writes will still be made,
902
    /// but the write events will not be published.
903
    ///
904
    /// Default: (Empty String / No Channel)
905
    #[serde(default)]
906
    pub experimental_pub_sub_channel: Option<String>,
907
908
    /// An optional prefix to prepend to all keys in this store.
909
    ///
910
    /// Setting this value can make it convenient to query or
911
    /// organize your data according to the shared prefix.
912
    ///
913
    /// Default: (Empty String / No Prefix)
914
    #[serde(default)]
915
    pub key_prefix: String,
916
917
    /// Set the mode Redis is operating in.
918
    ///
919
    /// Available options are "cluster" for
920
    /// [cluster mode](https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/),
921
    /// "sentinel" for [sentinel mode](https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/),
922
    /// or "standard" if Redis is operating in neither cluster nor sentinel mode.
923
    ///
924
    /// Default: standard,
925
    #[serde(default)]
926
    pub mode: RedisMode,
927
928
    /// When using pubsub interface, this is the maximum number of items to keep
929
    /// queued up before dropping old items.
930
    ///
931
    /// Default: 4096
932
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
933
    pub broadcast_channel_capacity: usize,
934
935
    /// The amount of time in milliseconds until the redis store considers the
936
    /// command to be timed out. This will trigger a retry of the command and
937
    /// potentially a reconnection to the redis server.
938
    ///
939
    /// Default: 10000 (10 seconds)
940
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
941
    pub command_timeout_ms: u64,
942
943
    /// The amount of time in milliseconds until the redis store considers the
944
    /// connection to unresponsive. This will trigger a reconnection to the
945
    /// redis server.
946
    ///
947
    /// Default: 3000 (3 seconds)
948
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
949
    pub connection_timeout_ms: u64,
950
951
    /// The amount of data to read from the redis server at a time.
952
    /// This is used to limit the amount of memory used when reading
953
    /// large objects from the redis server as well as limiting the
954
    /// amount of time a single read operation can take.
955
    ///
956
    /// IMPORTANT: If this value is too high, the `command_timeout_ms`
957
    /// might be triggered if the latency or throughput to the redis
958
    /// server is too low.
959
    ///
960
    /// Default: 64KiB
961
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
962
    pub read_chunk_size: usize,
963
964
    /// The number of connections to keep open to the redis server(s).
965
    ///
966
    /// Default: 3
967
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
968
    pub connection_pool_size: usize,
969
970
    /// The maximum number of upload chunks to allow per update.
971
    /// This is used to limit the amount of memory used when uploading
972
    /// large objects to the redis server. A good rule of thumb is to
973
    /// think of the data as:
974
    /// `AVAIL_MEMORY / (read_chunk_size * max_chunk_uploads_per_update) = THORETICAL_MAX_CONCURRENT_UPLOADS`
975
    /// (note: it is a good idea to divide `AVAIL_MAX_MEMORY` by ~10 to account for other memory usage)
976
    ///
977
    /// Default: 10
978
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
979
    pub max_chunk_uploads_per_update: usize,
980
981
    /// Retry configuration to use when a network request fails.
982
    /// See the `Retry` struct for more information.
983
    ///
984
    /// ```txt
985
    /// Default: Retry {
986
    ///   max_retries: 0, /* unlimited */
987
    ///   delay: 0.1, /* 100ms */
988
    ///   jitter: 0.5, /* 50% */
989
    ///   retry_on_errors: None, /* not used in redis store */
990
    /// }
991
    /// ```
992
    #[serde(default)]
993
    pub retry: Retry,
994
}
995
996
#[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq)]
997
#[serde(rename_all = "lowercase")]
998
pub enum RedisMode {
999
    Cluster,
1000
    Sentinel,
1001
    #[default]
1002
    Standard,
1003
}
1004
1005
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
1006
pub struct NoopSpec {}
1007
1008
/// Retry configuration. This configuration is exponential and each iteration
1009
/// a jitter as a percentage is applied of the calculated delay. For example:
1010
/// ```haskell
1011
/// Retry{
1012
///   max_retries: 7,
1013
///   delay: 0.1,
1014
///   jitter: 0.5,
1015
/// }
1016
/// ```
1017
/// will result in:
1018
/// Attempt - Delay
1019
/// 1         0ms
1020
/// 2         75ms - 125ms
1021
/// 3         150ms - 250ms
1022
/// 4         300ms - 500ms
1023
/// 5         600ms - 1s
1024
/// 6         1.2s - 2s
1025
/// 7         2.4s - 4s
1026
/// 8         4.8s - 8s
1027
/// Remember that to get total results is additive, meaning the above results
1028
/// would mean a single request would have a total delay of 9.525s - 15.875s.
1029
0
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
1030
#[serde(deny_unknown_fields)]
1031
pub struct Retry {
1032
    /// Maximum number of retries until retrying stops.
1033
    /// Setting this to zero will always attempt 1 time, but not retry.
1034
    #[serde(default, deserialize_with = "convert_numeric_with_shellexpand")]
1035
    pub max_retries: usize,
1036
1037
    /// Delay in seconds for exponential back off.
1038
    #[serde(default)]
1039
    pub delay: f32,
1040
1041
    /// Amount of jitter to add as a percentage in decimal form. This will
1042
    /// change the formula like:
1043
    /// ```haskell
1044
    /// random(
1045
    ///    (2 ^ {attempt_number}) * {delay} * (1 - (jitter / 2)),
1046
    ///    (2 ^ {attempt_number}) * {delay} * (1 + (jitter / 2)),
1047
    /// )
1048
    /// ```
1049
    #[serde(default)]
1050
    pub jitter: f32,
1051
1052
    /// A list of error codes to retry on, if this is not set then the default
1053
    /// error codes to retry on are used.  These default codes are the most
1054
    /// likely to be non-permanent.
1055
    ///  - `Unknown`
1056
    ///  - `Cancelled`
1057
    ///  - `DeadlineExceeded`
1058
    ///  - `ResourceExhausted`
1059
    ///  - `Aborted`
1060
    ///  - `Internal`
1061
    ///  - `Unavailable`
1062
    ///  - `DataLoss`
1063
    #[serde(default)]
1064
    pub retry_on_errors: Option<Vec<ErrorCode>>,
1065
}