grafos_observe/metrics.rs
1//! Core metrics types for fabric observability.
2//!
3//! All types use atomics and work in `no_std` environments. The
4//! [`FabricMetrics`] singleton collects system-wide counters and gauges.
5
6use core::sync::atomic::{AtomicI64, AtomicU64, Ordering};
7
8/// Monotonically increasing counter (atomic u64).
9///
10/// Counters only go up — use them for totals like "operations completed"
11/// or "bytes transferred". Thread-safe via relaxed atomic operations.
12///
13/// # Examples
14///
15/// ```
16/// use grafos_observe::MetricCounter;
17///
18/// let c = MetricCounter::new();
19/// c.inc();
20/// c.add(10);
21/// assert_eq!(c.get(), 11);
22///
23/// let prev = c.reset();
24/// assert_eq!(prev, 11);
25/// assert_eq!(c.get(), 0);
26/// ```
27pub struct MetricCounter {
28 value: AtomicU64,
29}
30
31impl Default for MetricCounter {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37impl MetricCounter {
38 /// Create a new counter starting at zero.
39 pub const fn new() -> Self {
40 Self {
41 value: AtomicU64::new(0),
42 }
43 }
44
45 /// Increment the counter by one.
46 pub fn inc(&self) {
47 self.value.fetch_add(1, Ordering::Relaxed);
48 }
49
50 /// Increment the counter by `n`.
51 pub fn add(&self, n: u64) {
52 self.value.fetch_add(n, Ordering::Relaxed);
53 }
54
55 /// Read the current counter value.
56 pub fn get(&self) -> u64 {
57 self.value.load(Ordering::Relaxed)
58 }
59
60 /// Reset the counter to zero. Returns the previous value.
61 pub fn reset(&self) -> u64 {
62 self.value.swap(0, Ordering::Relaxed)
63 }
64}
65
66/// Current-value gauge (atomic i64).
67///
68/// Gauges go up and down — use them for values like "active leases"
69/// or "connections open".
70///
71/// # Examples
72///
73/// ```
74/// use grafos_observe::MetricGauge;
75///
76/// let g = MetricGauge::new();
77/// g.inc();
78/// g.inc();
79/// g.dec();
80/// assert_eq!(g.get(), 1);
81///
82/// g.set(-5);
83/// assert_eq!(g.get(), -5);
84/// ```
85pub struct MetricGauge {
86 value: AtomicI64,
87}
88
89impl Default for MetricGauge {
90 fn default() -> Self {
91 Self::new()
92 }
93}
94
95impl MetricGauge {
96 /// Create a new gauge starting at zero.
97 pub const fn new() -> Self {
98 Self {
99 value: AtomicI64::new(0),
100 }
101 }
102
103 /// Increment the gauge by one.
104 pub fn inc(&self) {
105 self.value.fetch_add(1, Ordering::Relaxed);
106 }
107
108 /// Decrement the gauge by one.
109 pub fn dec(&self) {
110 self.value.fetch_sub(1, Ordering::Relaxed);
111 }
112
113 /// Set the gauge to an absolute value.
114 pub fn set(&self, val: i64) {
115 self.value.store(val, Ordering::Relaxed);
116 }
117
118 /// Read the current gauge value.
119 pub fn get(&self) -> i64 {
120 self.value.load(Ordering::Relaxed)
121 }
122}
123
124/// Fixed-bucket latency histogram.
125///
126/// Tracks the distribution of values (typically durations in microseconds)
127/// across predefined buckets. No allocator needed — the bucket array is
128/// inline. Uses 10 buckets: 100us, 500us, 1ms, 5ms, 10ms, 50ms, 100ms,
129/// 500ms, 1s, +Inf.
130///
131/// Buckets are cumulative: each bucket count includes all observations that
132/// also fall into lower buckets. This matches the Prometheus histogram
133/// convention.
134///
135/// # Examples
136///
137/// ```
138/// use grafos_observe::MetricHistogram;
139///
140/// let h = MetricHistogram::new();
141/// h.observe(50); // 50us — lands in the <=100us bucket
142/// h.observe(2_000); // 2ms — lands in the <=5ms bucket
143///
144/// assert_eq!(h.count(), 2);
145/// assert_eq!(h.sum(), 2050);
146/// assert_eq!(h.bucket_count(0), 1); // <=100us: only the 50us observation
147/// assert_eq!(h.bucket_count(3), 2); // <=5ms: both observations
148/// ```
149pub struct MetricHistogram {
150 /// Bucket upper bounds in microseconds. The last bucket is +Inf (u64::MAX).
151 bounds: [u64; Self::NUM_BUCKETS],
152 /// Counts per bucket (cumulative — each bucket includes all lower buckets).
153 counts: [AtomicU64; Self::NUM_BUCKETS],
154 /// Sum of all observed values.
155 sum: AtomicU64,
156 /// Total number of observations.
157 total: AtomicU64,
158}
159
160impl Default for MetricHistogram {
161 fn default() -> Self {
162 Self::new()
163 }
164}
165
166impl MetricHistogram {
167 /// Number of histogram buckets.
168 pub const NUM_BUCKETS: usize = 10;
169
170 /// Bucket boundaries in microseconds.
171 pub const BUCKET_BOUNDS: [u64; Self::NUM_BUCKETS] = [
172 100, // 100us
173 500, // 500us
174 1_000, // 1ms
175 5_000, // 5ms
176 10_000, // 10ms
177 50_000, // 50ms
178 100_000, // 100ms
179 500_000, // 500ms
180 1_000_000, // 1s
181 u64::MAX, // +Inf
182 ];
183
184 /// Create a new histogram with the default bucket boundaries.
185 #[allow(clippy::declare_interior_mutable_const)]
186 pub const fn new() -> Self {
187 Self {
188 bounds: Self::BUCKET_BOUNDS,
189 counts: [
190 AtomicU64::new(0),
191 AtomicU64::new(0),
192 AtomicU64::new(0),
193 AtomicU64::new(0),
194 AtomicU64::new(0),
195 AtomicU64::new(0),
196 AtomicU64::new(0),
197 AtomicU64::new(0),
198 AtomicU64::new(0),
199 AtomicU64::new(0),
200 ],
201 sum: AtomicU64::new(0),
202 total: AtomicU64::new(0),
203 }
204 }
205
206 /// Record an observation (value in microseconds).
207 ///
208 /// Increments the count for every bucket whose bound is >= the value
209 /// (cumulative histogram, matching Prometheus convention).
210 pub fn observe(&self, value_us: u64) {
211 self.sum.fetch_add(value_us, Ordering::Relaxed);
212 self.total.fetch_add(1, Ordering::Relaxed);
213 for (i, &bound) in self.bounds.iter().enumerate() {
214 if value_us <= bound {
215 self.counts[i].fetch_add(1, Ordering::Relaxed);
216 }
217 }
218 }
219
220 /// Read the cumulative count for a specific bucket index.
221 pub fn bucket_count(&self, index: usize) -> u64 {
222 if index < Self::NUM_BUCKETS {
223 self.counts[index].load(Ordering::Relaxed)
224 } else {
225 0
226 }
227 }
228
229 /// Read the bucket upper bound for a specific bucket index (in microseconds).
230 pub fn bucket_bound(&self, index: usize) -> u64 {
231 if index < Self::NUM_BUCKETS {
232 self.bounds[index]
233 } else {
234 u64::MAX
235 }
236 }
237
238 /// Total number of observations.
239 pub fn count(&self) -> u64 {
240 self.total.load(Ordering::Relaxed)
241 }
242
243 /// Sum of all observed values (in microseconds).
244 pub fn sum(&self) -> u64 {
245 self.sum.load(Ordering::Relaxed)
246 }
247}
248
249/// Global fabric metrics registry.
250///
251/// Tracks system-wide counters and gauges for lease lifecycles,
252/// data-plane operations, and graph rewrites. Access the process-wide
253/// singleton via [`FabricMetrics::global()`].
254///
255/// # Examples
256///
257/// ```
258/// use grafos_observe::FabricMetrics;
259///
260/// let m = FabricMetrics::global();
261/// m.leases_total.inc();
262/// m.leases_active.inc();
263/// m.ops_total.add(5);
264/// m.bytes_read.add(1024);
265/// m.op_latency.observe(300);
266/// ```
267pub struct FabricMetrics {
268 /// Currently active leases (gauge — goes up on acquire, down on drop/expire).
269 pub leases_active: MetricGauge,
270 /// Total leases ever created (counter).
271 pub leases_total: MetricCounter,
272 /// Total leases that expired (counter).
273 pub leases_expired: MetricCounter,
274 /// Total leases that entered fenced state (counter).
275 pub leases_fenced: MetricCounter,
276 /// Total data-plane operations completed (counter).
277 pub ops_total: MetricCounter,
278 /// Total data-plane operations that failed (counter).
279 pub ops_errors: MetricCounter,
280 /// Total bytes read across all data-plane operations (counter).
281 pub bytes_read: MetricCounter,
282 /// Total bytes written across all data-plane operations (counter).
283 pub bytes_written: MetricCounter,
284 /// Total graph rewrites initiated (counter).
285 pub rewrites_total: MetricCounter,
286 /// Histogram of operation latencies in microseconds.
287 pub op_latency: MetricHistogram,
288 /// Total leases explicitly revoked (counter) — distinct from expired.
289 pub leases_revoked: MetricCounter,
290 /// Histogram of lease bind latency in microseconds.
291 pub bind_latency: MetricHistogram,
292 /// Histogram of lease renewal latency in microseconds.
293 pub renew_latency: MetricHistogram,
294 /// Histogram of lease revocation latency in microseconds.
295 pub revoke_latency: MetricHistogram,
296 /// Histogram of teardown execution latency in microseconds.
297 pub teardown_latency: MetricHistogram,
298 /// Total authentication failures (counter).
299 pub auth_failures: MetricCounter,
300 /// Total anti-replay cache rejections (counter).
301 pub replay_rejections: MetricCounter,
302 /// Total capability token validations (counter).
303 pub token_validations: MetricCounter,
304 /// Total capability token validation failures (counter).
305 pub token_failures: MetricCounter,
306 /// Total stale access attempts after revoke/expiry (counter).
307 pub stale_access_rejections: MetricCounter,
308 /// Histogram of control-plane operation latencies in microseconds.
309 pub control_latency: MetricHistogram,
310 /// Histogram of dataplane operation latencies in microseconds.
311 pub dataplane_latency: MetricHistogram,
312 /// Histogram of tasklet submit (full dispatch) latencies in microseconds.
313 pub tasklet_submit_latency: MetricHistogram,
314 /// Histogram of tasklet execution-only latencies in microseconds.
315 pub tasklet_exec_latency: MetricHistogram,
316 /// Total tasklet submissions received (counter).
317 pub tasklet_submits: MetricCounter,
318 /// Total tasklet executions that completed successfully (counter).
319 pub tasklet_completions: MetricCounter,
320 /// Total tasklet executions that failed (counter).
321 pub tasklet_failures: MetricCounter,
322 /// Histogram of tasklet wall-clock duration in microseconds.
323 pub tasklet_duration: MetricHistogram,
324 /// Total module cache hits (counter).
325 pub module_cache_hits: MetricCounter,
326 /// Total module cache misses (counter).
327 pub module_cache_misses: MetricCounter,
328 /// Total module cache stores (counter).
329 pub module_cache_stores: MetricCounter,
330 /// Total module cache hash mismatches (counter).
331 pub module_cache_hash_mismatches: MetricCounter,
332 // ── Phase 219 / slice 20 — audit-chain emit counters ──
333 //
334 // Per-AuditEventKind counters that bump every time a daemon /
335 // scheduler emits an audit record. SREs scrape these via
336 // Prometheus to see lifecycle event RATES (emits/sec) without
337 // tailing the chain itself. The breakdown by kind lets a
338 // dashboard distinguish "lease churn" (lots of allocated +
339 // released) from "instability" (lots of fenced + expired).
340 /// Total `CapabilityIssued` audit records emitted (counter).
341 pub audit_capability_issued: MetricCounter,
342 /// Total `CapabilityRevoked` audit records emitted (counter).
343 pub audit_capability_revoked: MetricCounter,
344 /// Total `LeaseAllocated` audit records emitted (counter).
345 pub audit_lease_allocated: MetricCounter,
346 /// Total `LeaseRenewed` audit records emitted (counter).
347 pub audit_lease_renewed: MetricCounter,
348 /// Total `LeaseReleased` audit records emitted (counter).
349 pub audit_lease_released: MetricCounter,
350 /// Total `LeaseExpired` audit records emitted (counter).
351 pub audit_lease_expired: MetricCounter,
352 /// Total `LeaseFenced` audit records emitted (counter).
353 pub audit_lease_fenced: MetricCounter,
354 /// Total `LeaseTorndown` audit records emitted (counter).
355 pub audit_lease_torndown: MetricCounter,
356 /// Total `AdmissionDecided` audit records emitted (counter).
357 pub audit_admission_decided: MetricCounter,
358 /// Total `Preempted` audit records emitted (counter).
359 pub audit_preempted: MetricCounter,
360 /// Total `DrainInitiated` audit records emitted (counter).
361 pub audit_drain_initiated: MetricCounter,
362 /// Total `ChainAnchored` audit records emitted (counter). A
363 /// non-zero value indicates the chain has been anchored at
364 /// least once — operators monitor this for key-rotation
365 /// boundaries.
366 pub audit_chain_anchored: MetricCounter,
367 /// Total `SoftModeEnabled` audit records emitted (counter).
368 /// **Should normally be ZERO in production**. Any non-zero
369 /// value means a weaker-mode toggle was enabled and warrants
370 /// operator attention.
371 pub audit_soft_mode_enabled: MetricCounter,
372 /// Phase 218–222 / slice 77 — total `TenantCreated` audit
373 /// records emitted (counter). Tenant-CRUD typed kinds were
374 /// added in Stage 2a of the audit-surface migration; producers
375 /// dual-write to both this typed counter and the older
376 /// enterprise_audit log during Stage 2.
377 pub audit_tenant_created: MetricCounter,
378 /// Phase 218–222 / slice 77 — total `TenantDeleted` audit
379 /// records emitted (counter).
380 pub audit_tenant_deleted: MetricCounter,
381 /// Phase 218–222 / slice 77 — total `TenantQuotaUpdated` audit
382 /// records emitted (counter).
383 pub audit_tenant_quota_updated: MetricCounter,
384 /// Phase 218–222 / slice 79 — total `ProviderConformanceRecorded`
385 /// audit records emitted (counter). Stage 2b of the audit-surface
386 /// migration: the orchestrator dual-writes this typed counter
387 /// alongside the older enterprise_audit `kind=admin,
388 /// outcome="provider_conformance_recorded"` row.
389 pub audit_provider_conformance_recorded: MetricCounter,
390 /// Phase 218–222 / slice 79 — total
391 /// `ProviderBootstrapTokenIssued` audit records emitted.
392 pub audit_provider_bootstrap_token_issued: MetricCounter,
393 /// Phase 218–222 / slice 79 — total `ProviderBootstrapExchanged`
394 /// audit records emitted.
395 pub audit_provider_bootstrap_exchanged: MetricCounter,
396 /// Phase 218–222 / slice 79 — total `ProviderCellIdentityIssued`
397 /// audit records emitted.
398 pub audit_provider_cell_identity_issued: MetricCounter,
399 /// Phase 218–222 / slice 79 — total `ProviderCellIdentityRotated`
400 /// audit records emitted.
401 pub audit_provider_cell_identity_rotated: MetricCounter,
402 /// Phase 218–222 / slice 79 — total `ProviderCellIdentityRevoked`
403 /// audit records emitted.
404 pub audit_provider_cell_identity_revoked: MetricCounter,
405 /// Phase 218–222 / slice 80 — total `BearerTokenIssued` audit
406 /// records emitted (counter). Stage 2c of the audit-surface
407 /// migration: cell-side and orchestrator-side mint paths dual-
408 /// write this typed counter alongside the older enterprise_audit
409 /// `kind=token, outcome="created"` row. Distinct from
410 /// `audit_capability_issued`: bearer tokens are HTTP
411 /// Authorization-shaped admin/tenant API keys (not lease-bound
412 /// capability tokens). Operators monitoring token churn or
413 /// alerting on suspicious mint cadence read this counter.
414 pub audit_bearer_token_issued: MetricCounter,
415 /// Phase 218–222 / slice 80 — total `BearerTokenRevoked` audit
416 /// records emitted (counter). Distinct from
417 /// `audit_capability_revoked` (lease-bound capability revoke).
418 /// Operators monitoring token revocation rate or alerting on
419 /// mass-revoke incidents read this counter.
420 pub audit_bearer_token_revoked: MetricCounter,
421 /// Phase 218–222 / slice 81 — total `SchedulerPromoted` audit
422 /// records emitted (counter). Stage 2 mop-up of the audit-surface
423 /// migration: the cell-side `handle_promote` dual-writes this
424 /// typed counter alongside the older enterprise_audit
425 /// `kind=admin, outcome="promoted"` row. A non-zero value (and
426 /// especially a sustained rate) is operator-relevant: each emit
427 /// represents a leadership transition.
428 pub audit_scheduler_promoted: MetricCounter,
429 /// Phase 218–222 / slice 81 — total `BillingRateCardInstalled`
430 /// audit records emitted (counter). Stage 2 mop-up: the only
431 /// Billing-category producer. Lifecycle-axis, low-volume.
432 /// Operators alerting on rate-card swaps (a billing-impacting
433 /// admin action) read this counter.
434 pub audit_billing_rate_card_installed: MetricCounter,
435 /// Slice 261 (EdgeRecord audit-chain integration arc) — total
436 /// `EdgeRewritten` audit records emitted (counter). One emit per
437 /// affected edge per committed rewrite (producer wiring in slice
438 /// 262). High-volume relative to other audit kinds because a
439 /// rewrite typically touches multiple edges; operators reading
440 /// this counter expect throughput proportional to graph mutation
441 /// rate.
442 pub audit_edge_rewritten: MetricCounter,
443 /// Total audit records emitted across all kinds (counter).
444 /// Equal to the sum of the per-kind counters; exposed
445 /// separately for cheap "total emit rate" dashboards.
446 pub audit_records_emitted: MetricCounter,
447
448 /// Phase 219 slice 23 — total audit records persisted to the
449 /// JSONL sink (counter). Bumps once per successful
450 /// `grafos_audit::write_record` call. Difference from
451 /// `audit_records_emitted` indicates JSONL lag or the daemon
452 /// running without `--audit-jsonl-path`.
453 pub audit_jsonl_writes: MetricCounter,
454 /// Phase 219 slice 23 — total audit JSONL writes that failed
455 /// (counter). Bumps once per `write_record` Err. Operator
456 /// attention warranted; chain integrity is preserved but the
457 /// downstream collector is missing records.
458 pub audit_jsonl_write_failed: MetricCounter,
459}
460
461impl Default for FabricMetrics {
462 fn default() -> Self {
463 Self::new()
464 }
465}
466
467impl FabricMetrics {
468 /// Create a new metrics instance with all values at zero.
469 pub const fn new() -> Self {
470 Self {
471 leases_active: MetricGauge::new(),
472 leases_total: MetricCounter::new(),
473 leases_expired: MetricCounter::new(),
474 leases_fenced: MetricCounter::new(),
475 ops_total: MetricCounter::new(),
476 ops_errors: MetricCounter::new(),
477 bytes_read: MetricCounter::new(),
478 bytes_written: MetricCounter::new(),
479 rewrites_total: MetricCounter::new(),
480 op_latency: MetricHistogram::new(),
481 leases_revoked: MetricCounter::new(),
482 bind_latency: MetricHistogram::new(),
483 renew_latency: MetricHistogram::new(),
484 revoke_latency: MetricHistogram::new(),
485 teardown_latency: MetricHistogram::new(),
486 auth_failures: MetricCounter::new(),
487 replay_rejections: MetricCounter::new(),
488 token_validations: MetricCounter::new(),
489 token_failures: MetricCounter::new(),
490 stale_access_rejections: MetricCounter::new(),
491 control_latency: MetricHistogram::new(),
492 dataplane_latency: MetricHistogram::new(),
493 tasklet_submit_latency: MetricHistogram::new(),
494 tasklet_exec_latency: MetricHistogram::new(),
495 tasklet_submits: MetricCounter::new(),
496 tasklet_completions: MetricCounter::new(),
497 tasklet_failures: MetricCounter::new(),
498 tasklet_duration: MetricHistogram::new(),
499 module_cache_hits: MetricCounter::new(),
500 module_cache_misses: MetricCounter::new(),
501 module_cache_stores: MetricCounter::new(),
502 module_cache_hash_mismatches: MetricCounter::new(),
503 audit_capability_issued: MetricCounter::new(),
504 audit_capability_revoked: MetricCounter::new(),
505 audit_lease_allocated: MetricCounter::new(),
506 audit_lease_renewed: MetricCounter::new(),
507 audit_lease_released: MetricCounter::new(),
508 audit_lease_expired: MetricCounter::new(),
509 audit_lease_fenced: MetricCounter::new(),
510 audit_lease_torndown: MetricCounter::new(),
511 audit_admission_decided: MetricCounter::new(),
512 audit_preempted: MetricCounter::new(),
513 audit_drain_initiated: MetricCounter::new(),
514 audit_chain_anchored: MetricCounter::new(),
515 audit_soft_mode_enabled: MetricCounter::new(),
516 audit_tenant_created: MetricCounter::new(),
517 audit_tenant_deleted: MetricCounter::new(),
518 audit_tenant_quota_updated: MetricCounter::new(),
519 audit_provider_conformance_recorded: MetricCounter::new(),
520 audit_provider_bootstrap_token_issued: MetricCounter::new(),
521 audit_provider_bootstrap_exchanged: MetricCounter::new(),
522 audit_provider_cell_identity_issued: MetricCounter::new(),
523 audit_provider_cell_identity_rotated: MetricCounter::new(),
524 audit_provider_cell_identity_revoked: MetricCounter::new(),
525 audit_bearer_token_issued: MetricCounter::new(),
526 audit_bearer_token_revoked: MetricCounter::new(),
527 audit_scheduler_promoted: MetricCounter::new(),
528 audit_billing_rate_card_installed: MetricCounter::new(),
529 audit_edge_rewritten: MetricCounter::new(),
530 audit_records_emitted: MetricCounter::new(),
531 audit_jsonl_writes: MetricCounter::new(),
532 audit_jsonl_write_failed: MetricCounter::new(),
533 }
534 }
535
536 /// Phase 219 slice 20 — bump the per-kind audit-emit counter
537 /// for `kind`, plus the overall `audit_records_emitted` counter.
538 /// Callers (typically `fabricbiosd`'s emit_*_audit helpers and
539 /// the tick thread's inline emit) call this on every successful
540 /// `assemble_record` so the Prometheus dashboard sees the rate.
541 /// The match arms cover every `AuditEventKind` variant — adding
542 /// a new variant in `grafos-core::policy_vocab` MUST also add
543 /// the corresponding counter and an arm here, otherwise the
544 /// new kind's emit rate is invisible.
545 pub fn count_audit_emit(&self, kind: grafos_core::AuditEventKind) {
546 use grafos_core::AuditEventKind as K;
547 let counter = match kind {
548 K::CapabilityIssued => &self.audit_capability_issued,
549 K::CapabilityRevoked => &self.audit_capability_revoked,
550 K::LeaseAllocated => &self.audit_lease_allocated,
551 K::LeaseRenewed => &self.audit_lease_renewed,
552 K::LeaseReleased => &self.audit_lease_released,
553 K::LeaseExpired => &self.audit_lease_expired,
554 K::LeaseFenced => &self.audit_lease_fenced,
555 K::LeaseTorndown => &self.audit_lease_torndown,
556 K::AdmissionDecided => &self.audit_admission_decided,
557 K::Preempted => &self.audit_preempted,
558 K::DrainInitiated => &self.audit_drain_initiated,
559 K::ChainAnchored => &self.audit_chain_anchored,
560 K::SoftModeEnabled => &self.audit_soft_mode_enabled,
561 K::TenantCreated => &self.audit_tenant_created,
562 K::TenantDeleted => &self.audit_tenant_deleted,
563 K::TenantQuotaUpdated => &self.audit_tenant_quota_updated,
564 K::ProviderConformanceRecorded => &self.audit_provider_conformance_recorded,
565 K::ProviderBootstrapTokenIssued => &self.audit_provider_bootstrap_token_issued,
566 K::ProviderBootstrapExchanged => &self.audit_provider_bootstrap_exchanged,
567 K::ProviderCellIdentityIssued => &self.audit_provider_cell_identity_issued,
568 K::ProviderCellIdentityRotated => &self.audit_provider_cell_identity_rotated,
569 K::ProviderCellIdentityRevoked => &self.audit_provider_cell_identity_revoked,
570 K::BearerTokenIssued => &self.audit_bearer_token_issued,
571 K::BearerTokenRevoked => &self.audit_bearer_token_revoked,
572 K::SchedulerPromoted => &self.audit_scheduler_promoted,
573 K::BillingRateCardInstalled => &self.audit_billing_rate_card_installed,
574 K::EdgeRewritten => &self.audit_edge_rewritten,
575 };
576 counter.inc();
577 self.audit_records_emitted.inc();
578 }
579
580 /// Access the global metrics instance.
581 pub fn global() -> &'static FabricMetrics {
582 static INSTANCE: FabricMetrics = FabricMetrics::new();
583 &INSTANCE
584 }
585}
586
587#[cfg(test)]
588mod tests {
589 use super::*;
590
591 #[test]
592 fn counter_inc_and_add() {
593 let c = MetricCounter::new();
594 assert_eq!(c.get(), 0);
595 c.inc();
596 assert_eq!(c.get(), 1);
597 c.add(10);
598 assert_eq!(c.get(), 11);
599 }
600
601 #[test]
602 fn counter_reset() {
603 let c = MetricCounter::new();
604 c.add(42);
605 let prev = c.reset();
606 assert_eq!(prev, 42);
607 assert_eq!(c.get(), 0);
608 }
609
610 #[test]
611 fn gauge_inc_dec_set() {
612 let g = MetricGauge::new();
613 assert_eq!(g.get(), 0);
614 g.inc();
615 g.inc();
616 assert_eq!(g.get(), 2);
617 g.dec();
618 assert_eq!(g.get(), 1);
619 g.set(100);
620 assert_eq!(g.get(), 100);
621 g.set(-5);
622 assert_eq!(g.get(), -5);
623 }
624
625 #[test]
626 fn histogram_observe() {
627 let h = MetricHistogram::new();
628 // Observation of 50us should land in the 100us bucket (index 0) and all above.
629 h.observe(50);
630 assert_eq!(h.count(), 1);
631 assert_eq!(h.sum(), 50);
632 // Bucket 0 (<=100us) should have 1 count.
633 assert_eq!(h.bucket_count(0), 1);
634 // Bucket 9 (+Inf) should also have 1 count.
635 assert_eq!(h.bucket_count(9), 1);
636 }
637
638 #[test]
639 fn histogram_multiple_observations() {
640 let h = MetricHistogram::new();
641 h.observe(50); // <=100us bucket
642 h.observe(200); // <=500us bucket
643 h.observe(2_000); // <=5ms bucket
644 h.observe(999_999); // <=1s bucket
645
646 assert_eq!(h.count(), 4);
647 assert_eq!(h.sum(), 50 + 200 + 2_000 + 999_999);
648
649 // Bucket 0 (<=100us): only the 50us observation.
650 assert_eq!(h.bucket_count(0), 1);
651 // Bucket 1 (<=500us): 50 + 200.
652 assert_eq!(h.bucket_count(1), 2);
653 // Bucket 3 (<=5ms): 50 + 200 + 2000.
654 assert_eq!(h.bucket_count(3), 3);
655 // Bucket 8 (<=1s): all four.
656 assert_eq!(h.bucket_count(8), 4);
657 // +Inf: all four.
658 assert_eq!(h.bucket_count(9), 4);
659 }
660
661 #[test]
662 fn histogram_out_of_bounds_index() {
663 let h = MetricHistogram::new();
664 assert_eq!(h.bucket_count(99), 0);
665 assert_eq!(h.bucket_bound(99), u64::MAX);
666 }
667
668 #[test]
669 fn fabric_metrics_global_is_singleton() {
670 let m1 = FabricMetrics::global();
671 let m2 = FabricMetrics::global();
672 // Same address.
673 assert!(core::ptr::eq(m1, m2));
674 }
675
676 #[test]
677 fn fabric_metrics_fields() {
678 let m = FabricMetrics::new();
679 m.leases_active.inc();
680 m.leases_total.inc();
681 m.ops_total.add(5);
682 m.bytes_read.add(1024);
683 m.bytes_written.add(512);
684 m.rewrites_total.inc();
685 m.leases_expired.inc();
686 m.leases_fenced.inc();
687 m.ops_errors.inc();
688
689 assert_eq!(m.leases_active.get(), 1);
690 assert_eq!(m.leases_total.get(), 1);
691 assert_eq!(m.ops_total.get(), 5);
692 assert_eq!(m.bytes_read.get(), 1024);
693 assert_eq!(m.bytes_written.get(), 512);
694 assert_eq!(m.rewrites_total.get(), 1);
695 assert_eq!(m.leases_expired.get(), 1);
696 assert_eq!(m.leases_fenced.get(), 1);
697 assert_eq!(m.ops_errors.get(), 1);
698
699 m.leases_revoked.inc();
700 m.auth_failures.inc();
701 m.replay_rejections.inc();
702 m.token_validations.add(10);
703 m.token_failures.inc();
704 m.stale_access_rejections.inc();
705 m.bind_latency.observe(100);
706 m.renew_latency.observe(200);
707 m.revoke_latency.observe(300);
708 m.teardown_latency.observe(400);
709
710 assert_eq!(m.leases_revoked.get(), 1);
711 assert_eq!(m.auth_failures.get(), 1);
712 assert_eq!(m.replay_rejections.get(), 1);
713 assert_eq!(m.token_validations.get(), 10);
714 assert_eq!(m.token_failures.get(), 1);
715 assert_eq!(m.stale_access_rejections.get(), 1);
716 assert_eq!(m.bind_latency.count(), 1);
717 assert_eq!(m.renew_latency.count(), 1);
718 assert_eq!(m.revoke_latency.count(), 1);
719 assert_eq!(m.teardown_latency.count(), 1);
720 }
721
722 /// Phase 219 slice 20 — count_audit_emit routes each
723 /// AuditEventKind to the correct per-kind counter AND bumps
724 /// the overall `audit_records_emitted`. A regression in the
725 /// match arms (e.g., a future variant added without an arm)
726 /// would silently miss-route emits — this test pins each
727 /// variant's counter mapping.
728 #[test]
729 fn count_audit_emit_routes_each_kind_to_correct_counter() {
730 use grafos_core::AuditEventKind as K;
731 let m = FabricMetrics::new();
732
733 // Each kind bumps its specific counter and the overall total.
734 // Walk every variant; if a future variant lands without a
735 // match arm in count_audit_emit, this loop would not
736 // exercise it — the compile-time exhaustiveness check on
737 // the match itself is the regression guard, but this test
738 // verifies the runtime mapping for every existing variant.
739 let kinds_and_getters: &[(K, fn(&FabricMetrics) -> u64)] = &[
740 (K::CapabilityIssued, |m| m.audit_capability_issued.get()),
741 (K::CapabilityRevoked, |m| m.audit_capability_revoked.get()),
742 (K::LeaseAllocated, |m| m.audit_lease_allocated.get()),
743 (K::LeaseRenewed, |m| m.audit_lease_renewed.get()),
744 (K::LeaseReleased, |m| m.audit_lease_released.get()),
745 (K::LeaseExpired, |m| m.audit_lease_expired.get()),
746 (K::LeaseFenced, |m| m.audit_lease_fenced.get()),
747 (K::LeaseTorndown, |m| m.audit_lease_torndown.get()),
748 (K::AdmissionDecided, |m| m.audit_admission_decided.get()),
749 (K::Preempted, |m| m.audit_preempted.get()),
750 (K::DrainInitiated, |m| m.audit_drain_initiated.get()),
751 (K::ChainAnchored, |m| m.audit_chain_anchored.get()),
752 (K::SoftModeEnabled, |m| m.audit_soft_mode_enabled.get()),
753 (K::TenantCreated, |m| m.audit_tenant_created.get()),
754 (K::TenantDeleted, |m| m.audit_tenant_deleted.get()),
755 (K::TenantQuotaUpdated, |m| {
756 m.audit_tenant_quota_updated.get()
757 }),
758 (K::ProviderConformanceRecorded, |m| {
759 m.audit_provider_conformance_recorded.get()
760 }),
761 (K::ProviderBootstrapTokenIssued, |m| {
762 m.audit_provider_bootstrap_token_issued.get()
763 }),
764 (K::ProviderBootstrapExchanged, |m| {
765 m.audit_provider_bootstrap_exchanged.get()
766 }),
767 (K::ProviderCellIdentityIssued, |m| {
768 m.audit_provider_cell_identity_issued.get()
769 }),
770 (K::ProviderCellIdentityRotated, |m| {
771 m.audit_provider_cell_identity_rotated.get()
772 }),
773 (K::ProviderCellIdentityRevoked, |m| {
774 m.audit_provider_cell_identity_revoked.get()
775 }),
776 (K::BearerTokenIssued, |m| m.audit_bearer_token_issued.get()),
777 (K::BearerTokenRevoked, |m| {
778 m.audit_bearer_token_revoked.get()
779 }),
780 (K::SchedulerPromoted, |m| m.audit_scheduler_promoted.get()),
781 (K::BillingRateCardInstalled, |m| {
782 m.audit_billing_rate_card_installed.get()
783 }),
784 (K::EdgeRewritten, |m| m.audit_edge_rewritten.get()),
785 ];
786 for (kind, getter) in kinds_and_getters.iter() {
787 let before = getter(&m);
788 m.count_audit_emit(*kind);
789 let after = getter(&m);
790 assert_eq!(
791 after,
792 before + 1,
793 "kind {:?} did not bump its counter",
794 kind
795 );
796 }
797 // Overall counter equals the number of emits we made.
798 assert_eq!(
799 m.audit_records_emitted.get(),
800 kinds_and_getters.len() as u64
801 );
802 }
803}