Coverage Report

Created: 2024-12-20 00:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/build/source/nativelink-store/src/verify_store.rs
Line
Count
Source
1
// Copyright 2024 The NativeLink Authors. All rights reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//    http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
use std::pin::Pin;
16
use std::sync::Arc;
17
18
use async_trait::async_trait;
19
use nativelink_config::stores::VerifySpec;
20
use nativelink_error::{make_input_err, Error, ResultExt};
21
use nativelink_metric::MetricsComponent;
22
use nativelink_util::buf_channel::{
23
    make_buf_channel_pair, DropCloserReadHalf, DropCloserWriteHalf,
24
};
25
use nativelink_util::common::PackedHash;
26
use nativelink_util::digest_hasher::{
27
    default_digest_hasher_func, DigestHasher, ACTIVE_HASHER_FUNC,
28
};
29
use nativelink_util::health_utils::{default_health_status_indicator, HealthStatusIndicator};
30
use nativelink_util::metrics_utils::CounterWithTime;
31
use nativelink_util::origin_context::ActiveOriginContext;
32
use nativelink_util::store_trait::{Store, StoreDriver, StoreKey, StoreLike, UploadSizeInfo};
33
34
#[derive(MetricsComponent)]
35
pub struct VerifyStore {
36
    #[metric(group = "inner_store")]
37
    inner_store: Store,
38
    #[metric(help = "If the verification store is verifying the size of the data")]
39
    verify_size: bool,
40
    #[metric(help = "If the verification store is verifying the hash of the data")]
41
    verify_hash: bool,
42
43
    // Metrics.
44
    #[metric(help = "Number of failures the verification store had due to size mismatches")]
45
    size_verification_failures: CounterWithTime,
46
    #[metric(help = "Number of failures the verification store had due to hash mismatches")]
47
    hash_verification_failures: CounterWithTime,
48
}
49
50
impl VerifyStore {
51
10
    pub fn new(spec: &VerifySpec, inner_store: Store) -> Arc<Self> {
52
10
        Arc::new(VerifyStore {
53
10
            inner_store,
54
10
            verify_size: spec.verify_size,
55
10
            verify_hash: spec.verify_hash,
56
10
            size_verification_failures: CounterWithTime::default(),
57
10
            hash_verification_failures: CounterWithTime::default(),
58
10
        })
59
10
    }
60
61
10
    async fn inner_check_update<D: DigestHasher>(
62
10
        &self,
63
10
        mut tx: DropCloserWriteHalf,
64
10
        mut rx: DropCloserReadHalf,
65
10
        maybe_expected_digest_size: Option<u64>,
66
10
        original_hash: &PackedHash,
67
10
        mut maybe_hasher: Option<&mut D>,
68
10
    ) -> Result<(), Error> {
69
10
        let mut sum_size: u64 = 0;
70
        loop {
71
21
            let chunk = rx
72
21
                .recv()
73
21
                .await
74
21
                .err_tip(|| 
"Failed to read chunk in check_update in verify store"0
)
?0
;
75
21
            sum_size += chunk.len() as u64;
76
77
            // Ensure if a user sends us too much data we fail quickly.
78
21
            if let Some(
expected_size11
) = maybe_expected_digest_size {
  Branch (78:20): [True: 11, False: 10]
  Branch (78:20): [Folded - Ignored]
79
11
                match sum_size.cmp(&expected_size) {
80
                    std::cmp::Ordering::Greater => {
81
1
                        self.size_verification_failures.inc();
82
1
                        return Err(make_input_err!(
83
1
                            "Expected size {} but already received {} on insert",
84
1
                            expected_size,
85
1
                            sum_size
86
1
                        ));
87
                    }
88
                    std::cmp::Ordering::Equal => {
89
                        // Ensure our next chunk is the EOF chunk.
90
                        // If this was an error it'll be caught on the .recv()
91
                        // on next cycle.
92
6
                        if let Ok(eof_chunk) = rx.peek().await {
  Branch (92:32): [True: 6, False: 0]
  Branch (92:32): [Folded - Ignored]
93
6
                            if !eof_chunk.is_empty() {
  Branch (93:32): [True: 0, False: 6]
  Branch (93:32): [Folded - Ignored]
94
0
                                self.size_verification_failures.inc();
95
0
                                return Err(make_input_err!(
96
0
                                    "Expected EOF chunk when exact size was hit on insert in verify store - {}",
97
0
                                    expected_size,
98
0
                                ));
99
6
                            }
100
0
                        }
101
                    }
102
4
                    std::cmp::Ordering::Less => {}
103
                }
104
10
            }
105
106
            // If is EOF.
107
20
            if chunk.is_empty() {
  Branch (107:16): [True: 9, False: 11]
  Branch (107:16): [Folded - Ignored]
108
9
                if let Some(
expected_size4
) = maybe_expected_digest_size {
  Branch (108:24): [True: 4, False: 5]
  Branch (108:24): [Folded - Ignored]
109
4
                    if sum_size != expected_size {
  Branch (109:24): [True: 1, False: 3]
  Branch (109:24): [Folded - Ignored]
110
1
                        self.size_verification_failures.inc();
111
1
                        return Err(make_input_err!(
112
1
                            "Expected size {} but got size {} on insert",
113
1
                            expected_size,
114
1
                            sum_size
115
1
                        ));
116
3
                    }
117
5
                }
118
8
                if let Some(
hasher5
) = maybe_hasher.as_mut() {
  Branch (118:24): [True: 5, False: 3]
  Branch (118:24): [Folded - Ignored]
119
5
                    let digest = hasher.finalize_digest();
120
5
                    let hash_result = digest.packed_hash();
121
5
                    if original_hash != hash_result {
  Branch (121:24): [True: 2, False: 3]
  Branch (121:24): [Folded - Ignored]
122
2
                        self.hash_verification_failures.inc();
123
2
                        return Err(make_input_err!(
124
2
                            "Hashes do not match, got: {original_hash} but digest hash was {hash_result}",
125
2
                        ));
126
3
                    }
127
3
                }
128
6
                tx.send_eof().err_tip(|| 
"In verify_store::check_update"0
)
?0
;
129
6
                break;
130
11
            }
131
11
132
11
            // This will allows us to hash while sending data to another thread.
133
11
            let write_future = tx.send(chunk.clone());
134
135
11
            if let Some(
hasher5
) = maybe_hasher.as_mut() {
  Branch (135:20): [True: 5, False: 6]
  Branch (135:20): [Folded - Ignored]
136
5
                hasher.update(chunk.as_ref());
137
6
            }
138
139
11
            write_future
140
11
                .await
141
11
                .err_tip(|| 
"Failed to write chunk to inner store in verify store"0
)
?0
;
142
        }
143
6
        Ok(())
144
10
    }
145
}
146
147
#[async_trait]
148
impl StoreDriver for VerifyStore {
149
    async fn has_with_results(
150
        self: Pin<&Self>,
151
        digests: &[StoreKey<'_>],
152
        results: &mut [Option<u64>],
153
0
    ) -> Result<(), Error> {
154
0
        self.inner_store.has_with_results(digests, results).await
155
0
    }
156
157
    async fn update(
158
        self: Pin<&Self>,
159
        key: StoreKey<'_>,
160
        reader: DropCloserReadHalf,
161
        size_info: UploadSizeInfo,
162
10
    ) -> Result<(), Error> {
163
10
        let StoreKey::Digest(digest) = key else {
  Branch (163:13): [True: 10, False: 0]
  Branch (163:13): [Folded - Ignored]
164
0
            return Err(make_input_err!(
165
0
                "Only digests are supported in VerifyStore. Got {key:?}"
166
0
            ));
167
        };
168
10
        let digest_size = digest.size_bytes();
169
10
        if let UploadSizeInfo::ExactSize(expected_size) = size_info {
  Branch (169:16): [True: 10, False: 0]
  Branch (169:16): [Folded - Ignored]
170
10
            if self.verify_size && 
expected_size != digest_size5
{
  Branch (170:16): [True: 5, False: 5]
  Branch (170:36): [True: 0, False: 5]
  Branch (170:16): [Folded - Ignored]
  Branch (170:36): [Folded - Ignored]
171
0
                self.size_verification_failures.inc();
172
0
                return Err(make_input_err!(
173
0
                    "Expected size to match. Got {} but digest says {} on update",
174
0
                    expected_size,
175
0
                    digest_size
176
0
                ));
177
10
            }
178
0
        }
179
180
10
        let mut hasher = if self.verify_hash {
  Branch (180:29): [True: 5, False: 5]
  Branch (180:29): [Folded - Ignored]
181
            Some(
182
5
                ActiveOriginContext::get_value(&ACTIVE_HASHER_FUNC)
183
5
                    .err_tip(|| 
"In verify_store::update"0
)
?0
184
5
                    .map_or_else(default_digest_hasher_func, |v| 
*v2
)
185
5
                    .hasher(),
186
5
            )
187
        } else {
188
5
            None
189
        };
190
191
10
        let maybe_digest_size = if self.verify_size {
  Branch (191:36): [True: 5, False: 5]
  Branch (191:36): [Folded - Ignored]
192
5
            Some(digest_size)
193
        } else {
194
5
            None
195
        };
196
10
        let (tx, rx) = make_buf_channel_pair();
197
10
198
10
        let update_fut = self.inner_store.update(digest, rx, size_info);
199
10
        let check_fut = self.inner_check_update(
200
10
            tx,
201
10
            reader,
202
10
            maybe_digest_size,
203
10
            digest.packed_hash(),
204
10
            hasher.as_mut(),
205
10
        );
206
207
10
        let (update_res, check_res) = tokio::join!(update_fut, check_fut);
208
209
10
        update_res.merge(check_res)
210
20
    }
211
212
    async fn get_part(
213
        self: Pin<&Self>,
214
        key: StoreKey<'_>,
215
        writer: &mut DropCloserWriteHalf,
216
        offset: u64,
217
        length: Option<u64>,
218
0
    ) -> Result<(), Error> {
219
0
        self.inner_store.get_part(key, writer, offset, length).await
220
0
    }
221
222
0
    fn inner_store(&self, _digest: Option<StoreKey>) -> &'_ dyn StoreDriver {
223
0
        self
224
0
    }
225
226
0
    fn as_any<'a>(&'a self) -> &'a (dyn std::any::Any + Sync + Send + 'static) {
227
0
        self
228
0
    }
229
230
0
    fn as_any_arc(self: Arc<Self>) -> Arc<dyn std::any::Any + Sync + Send + 'static> {
231
0
        self
232
0
    }
233
}
234
235
default_health_status_indicator!(VerifyStore);