grafos_std/
gpu.rs

1//! GPU resource module.
2//!
3//! Provides typed access to fabric GPU resources via the persistent
4//! v1 session API ([`GpuSession`]). Programs acquire a [`GpuLease`],
5//! build a [`GpuSession`] on top of it, and drive device memory,
6//! module loads, launches, and synchronisation via the
7//! `fabricbios_gpu_v1` host ABI.
8//!
9//! The legacy one-shot v0 `gpu_submit` SDK builder has been removed;
10//! see `docs/grafos/gpu-bridge-deferred-waves.md` §3 for context.
11
12extern crate alloc;
13use alloc::vec::Vec;
14
15use crate::error::Result;
16use crate::host;
17use crate::lease::{self, LeaseInfo, LeaseStatus, SharedLeaseState};
18
19#[cfg(feature = "observe")]
20const LEASE_TAG_GPU: u8 = 0x05;
21
22/// A GPU lease that auto-frees on drop.
23///
24/// Anchors session-based GPU work: a [`GpuSession`] built from this
25/// lease drives persistent device memory, module loads, launches, and
26/// synchronisation via the `fabricbios_gpu_v1` ABI. Created via
27/// [`GpuBuilder::acquire`].
28pub struct GpuLease {
29    state: SharedLeaseState,
30}
31
32impl GpuLease {
33    /// Lease metadata snapshot (id, creation time, expiry, and status).
34    pub fn info(&self) -> LeaseInfo {
35        lease::info(&self.state)
36    }
37
38    /// Unique lease identifier.
39    pub fn lease_id(&self) -> u128 {
40        lease::lease_id(&self.state)
41    }
42
43    /// Lease creation timestamp (unix seconds).
44    pub fn created_at_unix_secs(&self) -> u64 {
45        lease::created_at_unix_secs(&self.state)
46    }
47
48    /// Lease expiry timestamp (unix seconds).
49    pub fn expires_at_unix_secs(&self) -> u64 {
50        lease::expires_at_unix_secs(&self.state)
51    }
52
53    /// Current lease status.
54    ///
55    /// Queries the host for the authoritative lease status, falling
56    /// back to local tracking on error.
57    pub fn status(&self) -> LeaseStatus {
58        if let Ok(s) = host::gpu_lease_query(lease::lease_id(&self.state)) {
59            match s {
60                0 => LeaseStatus::Active,
61                1 => LeaseStatus::Expired,
62                2 => LeaseStatus::Revoked,
63                _ => LeaseStatus::Revoked,
64            }
65        } else {
66            lease::status(&self.state)
67        }
68    }
69
70    /// Renew the lease TTL by `duration_secs`.
71    pub fn renew(&self, duration_secs: u64) -> Result<()> {
72        host::gpu_lease_renew(lease::lease_id(&self.state), duration_secs as u32)?;
73        lease::renew(&self.state, duration_secs)
74    }
75
76    /// Explicitly revoke/free this lease.
77    pub fn free(&self) {
78        let _ = host::gpu_lease_free(lease::lease_id(&self.state));
79        lease::free(&self.state);
80    }
81}
82
83impl Drop for GpuLease {
84    fn drop(&mut self) {
85        #[cfg(feature = "observe")]
86        crate::observe_hooks::on_lease_dropped(LEASE_TAG_GPU, lease::lease_id(&self.state));
87        let _ = host::gpu_lease_free(lease::lease_id(&self.state));
88        lease::free(&self.state);
89    }
90}
91
92/// Builder for acquiring a fabric GPU lease.
93///
94/// Allows specifying minimum VRAM requirements.
95///
96/// # Examples
97///
98/// ```rust
99/// use grafos_std::gpu::GpuBuilder;
100///
101/// # grafos_std::host::reset_mock();
102/// let lease = GpuBuilder::new().min_vram(1024).acquire()?;
103/// // lease.gpu() is available for submit() calls
104/// # Ok::<(), grafos_std::FabricError>(())
105/// ```
106pub struct GpuBuilder {
107    _min_vram: u64,
108    _lease_secs: u32,
109    _exclusivity: Option<GpuExclusivityClass>,
110    _affinities: Vec<crate::affinity::Affinity>,
111}
112
113/// Per-lease GPU exclusivity class.
114///
115/// Mirrors `fabricbios_core::lease_gpu_exclusivity::GpuExclusivityClass` at
116/// the SDK layer.  See `docs/spec/gpu-exclusivity-wire-format.md` for the
117/// wire format and `docs/spec/resource-isolation-and-exclusivity.md` §5.3
118/// for the semantics.
119///
120/// When passed to [`GpuBuilder::exclusivity`], the runtime emits
121/// `TLV_LEASE_GPU_EXCLUSIVITY` (0x0903) on the `LEASE_ALLOC` request.
122/// Unsupported classes fail closed — the node rejects the lease.
123#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub enum GpuExclusivityClass {
125    /// Device may multiplex other tenants.
126    Shared,
127    /// Exclusive residency for the session lifetime.
128    SessionExclusive,
129    /// Whole device for the lease lifetime.
130    DeviceExclusive,
131    /// Reserved for future MIG/partition isolation.
132    PartitionExclusive,
133}
134
135impl GpuBuilder {
136    /// Create a new builder with no VRAM constraint and no exclusivity
137    /// preference (daemon default applies).
138    pub fn new() -> Self {
139        GpuBuilder {
140            _min_vram: 0,
141            _lease_secs: 300,
142            _exclusivity: None,
143            _affinities: Vec::new(),
144        }
145    }
146
147    /// Set the minimum VRAM required in bytes.
148    pub fn min_vram(mut self, n: u64) -> Self {
149        self._min_vram = n;
150        self
151    }
152
153    /// Request a specific GPU exclusivity class for this lease.
154    ///
155    /// When set, the daemon emits `TLV_LEASE_GPU_EXCLUSIVITY` (0x0903) on the
156    /// `LEASE_ALLOC` request.  When omitted, the daemon's `--gpu-share-mode`
157    /// default applies.
158    ///
159    /// # Examples
160    ///
161    /// ```rust
162    /// use grafos_std::gpu::{GpuBuilder, GpuExclusivityClass};
163    ///
164    /// # grafos_std::host::reset_mock();
165    /// let lease = GpuBuilder::new()
166    ///     .min_vram(1024)
167    ///     .exclusivity(GpuExclusivityClass::DeviceExclusive)
168    ///     .acquire()?;
169    /// # Ok::<(), grafos_std::FabricError>(())
170    /// ```
171    pub fn exclusivity(mut self, class: GpuExclusivityClass) -> Self {
172        self._exclusivity = Some(class);
173        self
174    }
175
176    /// Add an affinity constraint (toward the target).
177    pub fn affinity(mut self, a: crate::affinity::Affinity) -> Self {
178        self._affinities.push(a);
179        self
180    }
181
182    /// Add an anti-affinity constraint (away from the target).
183    pub fn anti_affinity(
184        mut self,
185        strength: crate::affinity::Strength,
186        target: crate::affinity::Target,
187    ) -> Self {
188        self._affinities
189            .push(crate::affinity::Affinity::anti(strength, target));
190        self
191    }
192
193    /// Set the lease TTL in seconds.
194    pub fn lease_secs(mut self, secs: u32) -> Self {
195        self._lease_secs = secs.max(1);
196        self
197    }
198
199    /// Acquire a GPU lease.
200    ///
201    /// On wasm32, allocates via the `gpu_lease_alloc` host import.
202    /// On native, uses the local mock lease allocator.
203    ///
204    /// # Errors
205    ///
206    /// Returns [`crate::error::FabricError::CapacityExceeded`] or
207    /// [`crate::error::FabricError::Disconnected`] if the host cannot satisfy the request.
208    pub fn acquire(self) -> Result<GpuLease> {
209        // resource_id 0 = unspecified / any available GPU
210        let host_lease_id = host::gpu_lease_alloc(0, self._lease_secs)?;
211        let created_at = host::unix_time_secs();
212        let expires_at = created_at.saturating_add(self._lease_secs as u64);
213        let state = lease::new_shared_lease_from_parts(
214            host_lease_id,
215            created_at,
216            expires_at,
217            LeaseStatus::Active,
218        );
219        let lease = GpuLease { state };
220        #[cfg(feature = "observe")]
221        crate::observe_hooks::on_lease_acquired(LEASE_TAG_GPU, lease.lease_id(), "host", 0);
222        Ok(lease)
223    }
224}
225
226impl Default for GpuBuilder {
227    fn default() -> Self {
228        Self::new()
229    }
230}
231
232// ---------------------------------------------------------------------------
233// GPU Session — persistent context tied to lease lifetime
234// ---------------------------------------------------------------------------
235
236/// RAII handle to a device memory allocation within a GPU session.
237///
238/// # Ownership model (SDK polish wave, post-Phase 48.15)
239///
240/// `GpuMemHandle` is a RAII handle. It is [`Clone`] but **not** `Copy`.
241/// When the last clone is dropped it automatically calls
242/// `gpu_session_mem_free` for the underlying device allocation, unless
243/// you explicitly freed it first via [`GpuSession::mem_free`].
244///
245/// The intended pattern is:
246///
247/// ```text
248/// let h = sess.mem_alloc(1024)?;
249/// sess.mem_write(&h, 0, &data)?;
250/// // either explicitly free:
251/// sess.mem_free(h)?;
252/// // ...or just let `h` go out of scope; Drop frees it.
253/// ```
254///
255/// **Migration note:** prior to the SDK polish wave this type was
256/// `Copy`, which meant a `?` between `mem_alloc` and `mem_free` could
257/// silently leak the device allocation until lease expiry. Removing
258/// `Copy` is what makes the RAII guarantee work — if you previously
259/// relied on copying handles around, clone them explicitly. Cloned
260/// handles share the same underlying allocation and the same `freed`
261/// state; only the *last* clone to drop will issue the `mem_free`.
262///
263/// Drop is best-effort: it silently swallows hostcall errors and never
264/// panics. If the lease has already expired, the daemon's
265/// W3.5-validated lease-expiry teardown chain has already freed the
266/// device memory, so Drop suppresses the call entirely in that case.
267///
268/// The raw u64 device pointer is exposed via [`GpuMemHandle::raw`] for
269/// callers who need to pack it into kernel argument buffers manually.
270/// Prefer [`KernelArgs::push_buffer`] which handles the marshalling.
271#[derive(Debug, Clone)]
272pub struct GpuMemHandle {
273    inner: alloc::rc::Rc<core::cell::Cell<GpuHandleInner>>,
274    lease_state: SharedLeaseState,
275}
276
277#[derive(Debug, Clone, Copy)]
278struct GpuHandleInner {
279    handle: u64,
280    freed: bool,
281}
282
283impl GpuMemHandle {
284    /// Raw device pointer value as returned by the bridge
285    /// (`cuMemAlloc` on CUDA, `hipMalloc` on HIP).
286    pub fn raw(&self) -> u64 {
287        self.inner.get().handle
288    }
289
290    fn mark_freed(&self) {
291        let mut v = self.inner.get();
292        v.freed = true;
293        self.inner.set(v);
294    }
295}
296
297impl PartialEq for GpuMemHandle {
298    fn eq(&self, other: &Self) -> bool {
299        self.raw() == other.raw()
300    }
301}
302impl Eq for GpuMemHandle {}
303
304impl Drop for GpuMemHandle {
305    fn drop(&mut self) {
306        // Only the *last* clone of the inner Rc should issue the free.
307        if alloc::rc::Rc::strong_count(&self.inner) > 1 {
308            return;
309        }
310        let inner = self.inner.get();
311        if inner.freed {
312            return;
313        }
314        // If the lease is no longer active the daemon's lease-expiry
315        // teardown has already freed device memory; suppress the call
316        // entirely. Best-effort, never panic.
317        if lease::status(&self.lease_state) != LeaseStatus::Active {
318            return;
319        }
320        let lease_id = lease::lease_id(&self.lease_state);
321        let _ = host::gpu_session_mem_free(lease_id, inner.handle);
322    }
323}
324
325/// RAII handle to a loaded GPU module within a GPU session.
326///
327/// # Ownership model (Phase 48.16 SDK polish)
328///
329/// `GpuModule` is [`Clone`] but **not** `Copy`. When the last clone is
330/// dropped it automatically calls `gpu_session_module_unload` for the
331/// underlying CUDA module, unless you explicitly unloaded it first via
332/// [`GpuSession::module_unload`].
333///
334/// The intended pattern is symmetric with [`GpuMemHandle`]:
335///
336/// ```text
337/// let m = sess.module_load(&ptx)?;
338/// sess.launch(&m, "k", [1,1,1], [1,1,1], &args, &sizes)?;
339/// // either explicitly unload:
340/// sess.module_unload(m)?;
341/// // ...or just let `m` go out of scope; Drop unloads it.
342/// ```
343///
344/// Drop is best-effort: it silently swallows hostcall errors and never
345/// panics. If the lease has already expired, the daemon's
346/// W3.5-validated lease-expiry teardown chain has already unloaded the
347/// module, so Drop suppresses the call entirely in that case.
348///
349/// **Migration note:** prior to the SDK polish wave this type was
350/// `Copy`. Cloned handles share the same underlying CUmodule and the
351/// same `freed` state; only the *last* clone to drop will issue the
352/// `module_unload`.
353#[derive(Debug, Clone)]
354pub struct GpuModule {
355    inner: alloc::rc::Rc<core::cell::Cell<GpuHandleInner>>,
356    lease_state: SharedLeaseState,
357}
358
359impl GpuModule {
360    /// Raw module id as returned by the bridge.
361    pub fn raw(&self) -> u64 {
362        self.inner.get().handle
363    }
364
365    fn mark_freed(&self) {
366        let mut v = self.inner.get();
367        v.freed = true;
368        self.inner.set(v);
369    }
370}
371
372impl PartialEq for GpuModule {
373    fn eq(&self, other: &Self) -> bool {
374        self.raw() == other.raw()
375    }
376}
377impl Eq for GpuModule {}
378
379impl Drop for GpuModule {
380    fn drop(&mut self) {
381        // Only the *last* clone of the inner Rc should issue the unload.
382        if alloc::rc::Rc::strong_count(&self.inner) > 1 {
383            return;
384        }
385        let inner = self.inner.get();
386        if inner.freed {
387            return;
388        }
389        // If the lease is no longer active the daemon's lease-expiry
390        // teardown has already unloaded the module; suppress the call
391        // entirely. Best-effort, never panic.
392        if lease::status(&self.lease_state) != LeaseStatus::Active {
393            return;
394        }
395        let lease_id = lease::lease_id(&self.lease_state);
396        let _ = host::gpu_session_module_unload(lease_id, inner.handle);
397    }
398}
399
400/// Persistent GPU session backed by a lease's CUDA context.
401///
402/// `GpuSession` provides persistent device memory and loaded modules for
403/// multi-kernel workloads. The session is tied to the [`GpuLease`] and
404/// all resources are freed when the lease expires.
405pub struct GpuSession {
406    lease_state: SharedLeaseState,
407}
408
409// Phase 48.15 W1 — `GpuSession` is source-portable across host and wasm32.
410//
411// On native host targets the `host::gpu_session_*` family is mock-backed
412// (see `crates/grafos-std/src/host.rs`). On wasm32 targets the same Rust
413// signatures lower to the `fabricbios_gpu_v1` extern block and translate
414// the i32 wire status into `FabricError` / `FabricError::GpuSessionFailed`.
415//
416// Because both targets expose the identical Rust API, every method below
417// is on the Phase 48.14 **safe-list** and needs no per-target rustdoc tag.
418// The P3.1 (`c078c502`) wasm32 cfg-gate has been removed.
419impl GpuSession {
420    /// Create a session handle from a GPU lease.
421    ///
422    /// # Canonical lifecycle example
423    ///
424    /// This doctest exercises the full session lifecycle against the
425    /// host mock so it runs on every `cargo test` build, with no GPU
426    /// hardware required.
427    ///
428    /// ```rust
429    /// use grafos_std::gpu::{GpuBuilder, GpuSession, KernelArgs};
430    ///
431    /// # grafos_std::host::reset_mock();
432    /// // 1. Acquire a GPU lease.
433    /// let lease = GpuBuilder::new().min_vram(1024).acquire()?;
434    ///
435    /// // 2. Open a persistent session on the lease.
436    /// let mut sess = GpuSession::new(&lease);
437    ///
438    /// // 3. Allocate device memory and write input data.
439    /// let buf = sess.mem_alloc(1024)?;
440    /// sess.mem_write(&buf, 0, &[1u8, 2, 3, 4])?;
441    ///
442    /// // 4. Load a module (PTX/cubin bytes — placeholder under the mock).
443    /// let module = sess.module_load(&[0x7f, 0x45, 0x4c, 0x46])?;
444    ///
445    /// // 5. Launch a kernel using the typed argument builder.
446    /// let args = KernelArgs::new()
447    ///     .push_u32(42)
448    ///     .push_buffer(&buf);
449    /// sess.launch_with_args(&module, "vector_add", [256, 1, 1], [64, 1, 1], args)?;
450    ///
451    /// // 6. Synchronize and read results back.
452    /// sess.sync()?;
453    /// let _out = sess.mem_read(&buf, 0, 4)?;
454    ///
455    /// // 7. `buf` and `module` drop here. Both are RAII-freed (the
456    /// //    daemon calls `cuMemFree` / `cuModuleUnload` on real
457    /// //    hardware). The lease itself drops at end of scope.
458    /// # Ok::<(), grafos_std::FabricError>(())
459    /// ```
460    pub fn new(lease: &GpuLease) -> Self {
461        GpuSession {
462            lease_state: lease.state.clone(),
463        }
464    }
465
466    /// Lease ID for this session (session_id == lease_id).
467    pub fn lease_id(&self) -> u128 {
468        lease::lease_id(&self.lease_state)
469    }
470
471    fn ensure_active(&self) -> Result<()> {
472        lease::ensure_active(&self.lease_state)
473    }
474
475    /// Allocate device memory.
476    pub fn mem_alloc(&mut self, size: u64) -> Result<GpuMemHandle> {
477        self.ensure_active()?;
478        let handle = host::gpu_session_mem_alloc(self.lease_id(), size)?;
479        Ok(GpuMemHandle {
480            inner: alloc::rc::Rc::new(core::cell::Cell::new(GpuHandleInner {
481                handle,
482                freed: false,
483            })),
484            lease_state: self.lease_state.clone(),
485        })
486    }
487
488    /// Write data to device memory.
489    pub fn mem_write(&mut self, handle: &GpuMemHandle, offset: u64, data: &[u8]) -> Result<()> {
490        self.ensure_active()?;
491        host::gpu_session_mem_write(self.lease_id(), handle.raw(), offset, data)
492    }
493
494    /// Read data from device memory.
495    pub fn mem_read(&mut self, handle: &GpuMemHandle, offset: u64, size: u32) -> Result<Vec<u8>> {
496        self.ensure_active()?;
497        host::gpu_session_mem_read(self.lease_id(), handle.raw(), offset, size)
498    }
499
500    /// Free a device memory allocation explicitly.
501    ///
502    /// Consumes the handle. After this returns successfully (or even
503    /// on error), the handle's `Drop` will be a no-op — explicit free
504    /// and Drop are both safe; the latter never double-frees.
505    pub fn mem_free(&mut self, handle: GpuMemHandle) -> Result<()> {
506        self.ensure_active()?;
507        let raw = handle.raw();
508        // Mark freed first so Drop (running at end of this fn) is a no-op,
509        // even if the hostcall errors.
510        handle.mark_freed();
511        host::gpu_session_mem_free(self.lease_id(), raw)
512    }
513
514    /// Load a GPU module (PTX/cubin).
515    pub fn module_load(&mut self, binary: &[u8]) -> Result<GpuModule> {
516        self.ensure_active()?;
517        let module_id = host::gpu_session_module_load(self.lease_id(), binary)?;
518        Ok(GpuModule {
519            inner: alloc::rc::Rc::new(core::cell::Cell::new(GpuHandleInner {
520                handle: module_id,
521                freed: false,
522            })),
523            lease_state: self.lease_state.clone(),
524        })
525    }
526
527    /// Unload a GPU module explicitly (Phase 48.16 SDK polish).
528    ///
529    /// Consumes the handle. After this returns successfully (or even
530    /// on error), the handle's `Drop` will be a no-op — explicit
531    /// unload and Drop are both safe; the latter never double-unloads.
532    pub fn module_unload(&mut self, module: GpuModule) -> Result<()> {
533        self.ensure_active()?;
534        let raw = module.raw();
535        // Mark freed first so Drop (running at end of this fn) is a no-op,
536        // even if the hostcall errors.
537        module.mark_freed();
538        host::gpu_session_module_unload(self.lease_id(), raw)
539    }
540
541    /// Launch a kernel from a loaded module (async — does not wait).
542    pub fn launch(
543        &mut self,
544        module: &GpuModule,
545        kernel: &str,
546        grid: [u32; 3],
547        block: [u32; 3],
548        args: &[u8],
549        arg_sizes: &[u32],
550    ) -> Result<()> {
551        self.ensure_active()?;
552        host::gpu_session_launch(
553            self.lease_id(),
554            module.raw(),
555            kernel,
556            grid,
557            block,
558            args,
559            arg_sizes,
560        )
561    }
562
563    /// Launch a kernel using a [`KernelArgs`] builder.
564    ///
565    /// Convenience over [`GpuSession::launch`]: builds the
566    /// `(args, arg_sizes)` byte/size pair from the typed builder and
567    /// forwards to the same underlying hostcall.
568    pub fn launch_with_args(
569        &mut self,
570        module: &GpuModule,
571        kernel: &str,
572        grid: [u32; 3],
573        block: [u32; 3],
574        args: KernelArgs,
575    ) -> Result<()> {
576        let (bytes, sizes) = args.build();
577        self.launch(module, kernel, grid, block, &bytes, &sizes)
578    }
579
580    /// Synchronize: wait for all outstanding launches to complete.
581    pub fn sync(&mut self) -> Result<()> {
582        self.ensure_active()?;
583        host::gpu_session_sync(self.lease_id())
584    }
585}
586
587// ---------------------------------------------------------------------------
588// KernelArgs — typed launch argument builder
589// ---------------------------------------------------------------------------
590
591/// Typed builder for GPU kernel launch arguments.
592///
593/// `KernelArgs` packs scalar values and device-buffer pointers into the
594/// `(args, arg_sizes)` byte/size pair that the
595/// [`fabricbios_gpu_v1`] bridge expects. Each `push_*` records the
596/// raw bytes and their per-arg size so the daemon can build a CUDA
597/// `kernelParams` array with one pointer per argument.
598///
599/// # Example
600///
601/// ```rust
602/// use grafos_std::gpu::{GpuBuilder, GpuSession, KernelArgs};
603///
604/// # grafos_std::host::reset_mock();
605/// let lease = GpuBuilder::new().acquire()?;
606/// let mut sess = GpuSession::new(&lease);
607/// let buf = sess.mem_alloc(64)?;
608/// let module = sess.module_load(&[0u8; 8])?;
609/// let args = KernelArgs::new()
610///     .push_u32(7)
611///     .push_f32(2.5)
612///     .push_buffer(&buf);
613/// sess.launch_with_args(&module, "k", [1, 1, 1], [1, 1, 1], args)?;
614/// # Ok::<(), grafos_std::FabricError>(())
615/// ```
616///
617/// # Wire format
618///
619/// - Scalars are written little-endian.
620/// - Buffer args are written as 8-byte little-endian device pointers
621///   (the raw `cuMemAlloc` return value carried by [`GpuMemHandle`]).
622/// - The daemon's launch path packs these into a contiguous byte
623///   buffer and walks `arg_sizes` to compute per-arg pointer offsets
624///   for `kernelParams`. See
625///   `crates/fabricbios-platform-linux/src/gpu.rs::launch`.
626#[derive(Debug, Default, Clone)]
627pub struct KernelArgs {
628    bytes: Vec<u8>,
629    sizes: Vec<u32>,
630}
631
632impl KernelArgs {
633    /// Create an empty argument list.
634    pub fn new() -> Self {
635        Self::default()
636    }
637
638    /// Push a `u32` argument.
639    pub fn push_u32(mut self, v: u32) -> Self {
640        self.bytes.extend_from_slice(&v.to_le_bytes());
641        self.sizes.push(4);
642        self
643    }
644
645    /// Push a `u64` argument.
646    pub fn push_u64(mut self, v: u64) -> Self {
647        self.bytes.extend_from_slice(&v.to_le_bytes());
648        self.sizes.push(8);
649        self
650    }
651
652    /// Push an `i32` argument.
653    pub fn push_i32(mut self, v: i32) -> Self {
654        self.bytes.extend_from_slice(&v.to_le_bytes());
655        self.sizes.push(4);
656        self
657    }
658
659    /// Push an `i64` argument.
660    pub fn push_i64(mut self, v: i64) -> Self {
661        self.bytes.extend_from_slice(&v.to_le_bytes());
662        self.sizes.push(8);
663        self
664    }
665
666    /// Push an `f32` argument.
667    pub fn push_f32(mut self, v: f32) -> Self {
668        self.bytes.extend_from_slice(&v.to_le_bytes());
669        self.sizes.push(4);
670        self
671    }
672
673    /// Push an `f64` argument.
674    pub fn push_f64(mut self, v: f64) -> Self {
675        self.bytes.extend_from_slice(&v.to_le_bytes());
676        self.sizes.push(8);
677        self
678    }
679
680    /// Push a device buffer argument (the raw 8-byte device pointer).
681    ///
682    /// This is the typed equivalent of manually appending
683    /// `handle.raw().to_le_bytes()` with size 8. The daemon will pass
684    /// the underlying device pointer to the kernel as the
685    /// corresponding `kernelParams` entry.
686    pub fn push_buffer(mut self, handle: &GpuMemHandle) -> Self {
687        self.bytes.extend_from_slice(&handle.raw().to_le_bytes());
688        self.sizes.push(8);
689        self
690    }
691
692    /// Push a value of arbitrary type by reinterpreting its bytes.
693    ///
694    /// # Safety
695    ///
696    /// `T` must satisfy all of the following:
697    /// - `Copy` and trivially-destructible.
698    /// - No interior padding bytes whose value matters to the kernel.
699    ///   For C-ABI structs, this generally means `#[repr(C)]` plus
700    ///   manual layout review.
701    /// - No interior pointers, references, or non-`'static` lifetimes.
702    /// - Endianness matches what the kernel expects (host order is
703    ///   little-endian on every platform fabricBIOS targets).
704    ///
705    /// In short: the value must be a "POD" (plain-old-data) blob that
706    /// can be byte-copied into a CUDA `kernelParams` slot. Prefer the
707    /// typed `push_*` helpers for primitive scalars; reach for
708    /// `push_raw` only when packing a `#[repr(C)]` struct argument.
709    pub unsafe fn push_raw<T: Copy>(mut self, value: &T) -> Self {
710        let size = core::mem::size_of::<T>();
711        let ptr = value as *const T as *const u8;
712        // SAFETY: caller upholds the contract above; the slice
713        // is read-only and lives for the duration of this call.
714        let slice = core::slice::from_raw_parts(ptr, size);
715        self.bytes.extend_from_slice(slice);
716        self.sizes.push(size as u32);
717        self
718    }
719
720    /// Consume the builder and return the raw `(bytes, sizes)` pair
721    /// suitable for [`GpuSession::launch`].
722    pub fn build(self) -> (Vec<u8>, Vec<u32>) {
723        (self.bytes, self.sizes)
724    }
725}
726
727#[cfg(test)]
728mod tests {
729    use super::*;
730    use crate::host;
731
732    #[test]
733    fn gpu_builder_acquires() {
734        host::reset_mock();
735        let lease = GpuBuilder::new().min_vram(1024).acquire();
736        assert!(lease.is_ok());
737    }
738
739    #[test]
740    fn gpu_lease_lifecycle_alloc_status_renew_free() {
741        host::reset_mock();
742        let lease = GpuBuilder::new().lease_secs(60).acquire().expect("acquire");
743        let id = lease.lease_id();
744        assert_ne!(id, 0);
745
746        // Status should be Active right after acquisition.
747        assert_eq!(lease.status(), LeaseStatus::Active);
748
749        // Renew should succeed for an active lease.
750        assert!(lease.renew(30).is_ok());
751
752        // Free and verify host-side status becomes expired.
753        lease.free();
754        let status = host::gpu_lease_query(id).expect("query");
755        assert_eq!(status, 1); // expired (removed from active set)
756    }
757
758    #[test]
759    fn gpu_lease_drop_frees_host_lease() {
760        host::reset_mock();
761        let id;
762        {
763            let lease = GpuBuilder::new().acquire().expect("acquire");
764            id = lease.lease_id();
765            assert_eq!(host::gpu_lease_query(id).unwrap(), 0); // active
766        }
767        // After drop, lease should be freed on host side.
768        assert_eq!(host::gpu_lease_query(id).unwrap(), 1); // expired
769    }
770
771    // ── Phase 48.16 SDK polish: GpuMemHandle RAII tests ──────────────
772
773    #[test]
774    fn mem_handle_explicit_free_works() {
775        host::reset_mock();
776        let lease = GpuBuilder::new().acquire().expect("acquire");
777        let mut sess = GpuSession::new(&lease);
778        let h = sess.mem_alloc(1024).expect("alloc");
779        let before = host::test_mock::_gpu_session_mem_free_count();
780        sess.mem_free(h).expect("explicit free");
781        let after = host::test_mock::_gpu_session_mem_free_count();
782        assert_eq!(
783            after,
784            before + 1,
785            "explicit mem_free issues exactly one host call"
786        );
787    }
788
789    #[test]
790    fn mem_handle_drop_frees_on_early_return() {
791        host::reset_mock();
792        let lease = GpuBuilder::new().acquire().expect("acquire");
793        let mut sess = GpuSession::new(&lease);
794        let before = host::test_mock::_gpu_session_mem_free_count();
795        // Simulate the early-return-after-alloc path: alloc then drop.
796        {
797            let _h = sess.mem_alloc(1024).expect("alloc");
798            // _h drops here.
799        }
800        let after = host::test_mock::_gpu_session_mem_free_count();
801        assert_eq!(after, before + 1, "Drop on GpuMemHandle issues mem_free");
802    }
803
804    #[test]
805    fn mem_handle_double_free_is_noop() {
806        host::reset_mock();
807        let lease = GpuBuilder::new().acquire().expect("acquire");
808        let mut sess = GpuSession::new(&lease);
809        let h = sess.mem_alloc(1024).expect("alloc");
810        let before = host::test_mock::_gpu_session_mem_free_count();
811        sess.mem_free(h).expect("explicit free");
812        // The handle was consumed by mem_free; nothing else to drop.
813        // Verify exactly one free call was issued — i.e. Drop did not
814        // issue a second free during the in-scope drop of `h` inside
815        // `mem_free`.
816        let after = host::test_mock::_gpu_session_mem_free_count();
817        assert_eq!(
818            after,
819            before + 1,
820            "explicit free + Drop = exactly one host free"
821        );
822    }
823
824    #[test]
825    fn mem_handle_drop_after_lease_expiry_is_silent() {
826        host::reset_mock();
827        host::mock_set_unix_time_secs(5_000);
828        let lease = GpuBuilder::new().lease_secs(5).acquire().expect("acquire");
829        let mut sess = GpuSession::new(&lease);
830        let h = sess.mem_alloc(1024).expect("alloc");
831        // Advance past lease expiry — Drop should observe the expired
832        // lease and skip the hostcall entirely (the daemon's
833        // lease-expiry teardown owns cleanup at that point).
834        host::mock_advance_time_secs(10);
835        let before = host::test_mock::_gpu_session_mem_free_count();
836        drop(h); // must not panic, must not log, must not call free.
837        let after = host::test_mock::_gpu_session_mem_free_count();
838        assert_eq!(after, before, "Drop after lease expiry suppresses mem_free");
839    }
840
841    #[test]
842    fn mem_handle_clone_only_last_drops() {
843        host::reset_mock();
844        let lease = GpuBuilder::new().acquire().expect("acquire");
845        let mut sess = GpuSession::new(&lease);
846        let h = sess.mem_alloc(1024).expect("alloc");
847        let h2 = h.clone();
848        assert_eq!(h, h2);
849        let before = host::test_mock::_gpu_session_mem_free_count();
850        drop(h);
851        let mid = host::test_mock::_gpu_session_mem_free_count();
852        assert_eq!(mid, before, "non-last clone does not free");
853        drop(h2);
854        let after = host::test_mock::_gpu_session_mem_free_count();
855        assert_eq!(after, before + 1, "last clone frees exactly once");
856    }
857
858    // ── Phase 48.16 SDK polish: GpuModule RAII tests ─────────────────
859
860    #[test]
861    fn module_explicit_unload_works() {
862        host::reset_mock();
863        let lease = GpuBuilder::new().acquire().expect("acquire");
864        let mut sess = GpuSession::new(&lease);
865        let m = sess.module_load(&[0u8; 8]).expect("module_load");
866        let before = host::test_mock::_gpu_session_module_unload_count();
867        sess.module_unload(m).expect("explicit unload");
868        let after = host::test_mock::_gpu_session_module_unload_count();
869        assert_eq!(
870            after,
871            before + 1,
872            "explicit module_unload issues exactly one host call"
873        );
874    }
875
876    #[test]
877    fn module_drop_unloads_on_early_return() {
878        host::reset_mock();
879        let lease = GpuBuilder::new().acquire().expect("acquire");
880        let mut sess = GpuSession::new(&lease);
881        let before = host::test_mock::_gpu_session_module_unload_count();
882        {
883            let _m = sess.module_load(&[0u8; 8]).expect("module_load");
884            // _m drops here.
885        }
886        let after = host::test_mock::_gpu_session_module_unload_count();
887        assert_eq!(after, before + 1, "Drop on GpuModule issues module_unload");
888    }
889
890    #[test]
891    fn module_double_unload_is_noop() {
892        host::reset_mock();
893        let lease = GpuBuilder::new().acquire().expect("acquire");
894        let mut sess = GpuSession::new(&lease);
895        let m = sess.module_load(&[0u8; 8]).expect("module_load");
896        let before = host::test_mock::_gpu_session_module_unload_count();
897        sess.module_unload(m).expect("explicit unload");
898        let after = host::test_mock::_gpu_session_module_unload_count();
899        assert_eq!(
900            after,
901            before + 1,
902            "explicit unload + Drop = exactly one host unload"
903        );
904    }
905
906    #[test]
907    fn module_drop_after_lease_expiry_is_silent() {
908        host::reset_mock();
909        host::mock_set_unix_time_secs(5_000);
910        let lease = GpuBuilder::new().lease_secs(5).acquire().expect("acquire");
911        let mut sess = GpuSession::new(&lease);
912        let m = sess.module_load(&[0u8; 8]).expect("module_load");
913        host::mock_advance_time_secs(10);
914        let before = host::test_mock::_gpu_session_module_unload_count();
915        drop(m); // must not panic, must not call unload.
916        let after = host::test_mock::_gpu_session_module_unload_count();
917        assert_eq!(
918            after, before,
919            "Drop after lease expiry suppresses module_unload"
920        );
921    }
922
923    #[test]
924    fn module_clone_only_last_drops() {
925        host::reset_mock();
926        let lease = GpuBuilder::new().acquire().expect("acquire");
927        let mut sess = GpuSession::new(&lease);
928        let m = sess.module_load(&[0u8; 8]).expect("module_load");
929        let m2 = m.clone();
930        assert_eq!(m, m2);
931        let before = host::test_mock::_gpu_session_module_unload_count();
932        drop(m);
933        let mid = host::test_mock::_gpu_session_module_unload_count();
934        assert_eq!(mid, before, "non-last clone does not unload");
935        drop(m2);
936        let after = host::test_mock::_gpu_session_module_unload_count();
937        assert_eq!(after, before + 1, "last clone unloads exactly once");
938    }
939
940    // ── KernelArgs tests ─────────────────────────────────────────────
941
942    #[test]
943    fn kernel_args_typed_push_layout() {
944        host::reset_mock();
945        let lease = GpuBuilder::new().acquire().expect("acquire");
946        let mut sess = GpuSession::new(&lease);
947        let buf = sess.mem_alloc(1024).expect("alloc");
948        let raw = buf.raw();
949
950        let (bytes, sizes) = KernelArgs::new()
951            .push_u32(0x1122_3344)
952            .push_f32(2.5)
953            .push_buffer(&buf)
954            .build();
955
956        // Sizes: u32=4, f32=4, buffer=8.
957        assert_eq!(sizes, vec![4, 4, 8]);
958        // Bytes: little-endian concatenation.
959        let mut expected = Vec::new();
960        expected.extend_from_slice(&0x1122_3344u32.to_le_bytes());
961        expected.extend_from_slice(&2.5f32.to_le_bytes());
962        expected.extend_from_slice(&raw.to_le_bytes());
963        assert_eq!(bytes, expected);
964
965        // explicit free so the test doesn't lean on Drop semantics.
966        sess.mem_free(buf).expect("free");
967    }
968
969    #[test]
970    fn kernel_args_launch_with_args_round_trip() {
971        host::reset_mock();
972        let lease = GpuBuilder::new().acquire().expect("acquire");
973        let mut sess = GpuSession::new(&lease);
974        let module = sess.module_load(&[0u8; 8]).expect("module_load");
975        let buf = sess.mem_alloc(64).expect("alloc");
976        let args = KernelArgs::new().push_u64(99).push_buffer(&buf);
977        sess.launch_with_args(&module, "k", [1, 1, 1], [1, 1, 1], args)
978            .expect("launch");
979        sess.mem_free(buf).expect("free");
980    }
981}