sled/pagecache/
pagetable.rs

1//! A simple wait-free, grow-only pagetable, assumes a dense keyspace.
2#![allow(unsafe_code)]
3
4use std::{
5    alloc::{alloc_zeroed, Layout},
6    convert::TryFrom,
7    mem::{align_of, size_of},
8    sync::atomic::Ordering::{Acquire, Relaxed, Release},
9};
10
11use crossbeam_epoch::{pin, Atomic, Guard, Owned, Shared};
12
13use crate::{
14    debug_delay,
15    pagecache::{constants::MAX_PID_BITS, Page, PageView},
16    Measure, M,
17};
18
19#[allow(unused)]
20#[doc(hidden)]
21pub const PAGETABLE_NODE_SZ: usize = size_of::<Node1>();
22
23const NODE2_FAN_FACTOR: usize = 18;
24const NODE1_FAN_OUT: usize = 1 << (MAX_PID_BITS - NODE2_FAN_FACTOR);
25const NODE2_FAN_OUT: usize = 1 << NODE2_FAN_FACTOR;
26const FAN_MASK: u64 = (NODE2_FAN_OUT - 1) as u64;
27
28pub type PageId = u64;
29
30struct Node1 {
31    children: [Atomic<Node2>; NODE1_FAN_OUT],
32}
33
34struct Node2 {
35    children: [Atomic<Page>; NODE2_FAN_OUT],
36}
37
38impl Node1 {
39    fn new() -> Owned<Self> {
40        let size = size_of::<Self>();
41        let align = align_of::<Self>();
42
43        unsafe {
44            let layout = Layout::from_size_align_unchecked(size, align);
45
46            #[allow(clippy::cast_ptr_alignment)]
47            let ptr = alloc_zeroed(layout) as *mut Self;
48
49            Owned::from_raw(ptr)
50        }
51    }
52}
53
54impl Node2 {
55    fn new() -> Owned<Node2> {
56        let size = size_of::<Self>();
57        let align = align_of::<Self>();
58
59        unsafe {
60            let layout = Layout::from_size_align_unchecked(size, align);
61
62            #[allow(clippy::cast_ptr_alignment)]
63            let ptr = alloc_zeroed(layout) as *mut Self;
64
65            Owned::from_raw(ptr)
66        }
67    }
68}
69
70impl Drop for Node1 {
71    fn drop(&mut self) {
72        drop_iter(self.children.iter());
73    }
74}
75
76impl Drop for Node2 {
77    fn drop(&mut self) {
78        drop_iter(self.children.iter());
79    }
80}
81
82fn drop_iter<T>(iter: core::slice::Iter<'_, Atomic<T>>) {
83    let guard = pin();
84    for child in iter {
85        let shared_child = child.load(Relaxed, &guard);
86        if shared_child.is_null() {
87            // this does not leak because the PageTable is
88            // assumed to be dense.
89            break;
90        }
91        unsafe {
92            drop(shared_child.into_owned());
93        }
94    }
95}
96
97/// A simple lock-free radix tree.
98pub struct PageTable {
99    head: Atomic<Node1>,
100}
101
102impl Default for PageTable {
103    fn default() -> Self {
104        let head = Node1::new();
105        Self { head: Atomic::from(head) }
106    }
107}
108
109impl PageTable {
110    /// # Panics
111    ///
112    /// will panic if the item is not null already,
113    /// which represents a serious failure to
114    /// properly handle lifecycles of pages in the
115    /// using system.
116    pub(crate) fn insert<'g>(
117        &self,
118        pid: PageId,
119        item: Page,
120        guard: &'g Guard,
121    ) -> PageView<'g> {
122        debug_delay();
123        let tip = self.traverse(pid, guard);
124
125        let shared = Owned::new(item).into_shared(guard);
126        let old = tip.swap(shared, Release, guard);
127        assert!(old.is_null());
128
129        PageView { read: shared, entry: tip }
130    }
131
132    /// Try to get a value from the tree.
133    pub(crate) fn get<'g>(
134        &self,
135        pid: PageId,
136        guard: &'g Guard,
137    ) -> Option<PageView<'g>> {
138        let _measure = Measure::new(&M.get_pagetable);
139        debug_delay();
140        let tip = self.traverse(pid, guard);
141
142        debug_delay();
143        let res = tip.load(Acquire, guard);
144        if res.is_null() {
145            None
146        } else {
147            let page_view = PageView { read: res, entry: tip };
148
149            Some(page_view)
150        }
151    }
152
153    fn traverse<'g>(&self, k: PageId, guard: &'g Guard) -> &'g Atomic<Page> {
154        let (l1k, l2k) = split_fanout(k);
155
156        debug_delay();
157        let head = self.head.load(Acquire, guard);
158
159        debug_delay();
160        let l1 = unsafe { &head.deref().children };
161
162        debug_delay();
163        let mut l2_ptr = l1[l1k].load(Acquire, guard);
164
165        if l2_ptr.is_null() {
166            let next_child = Node2::new();
167
168            debug_delay();
169            let ret = l1[l1k].compare_and_set(
170                Shared::null(),
171                next_child,
172                Release,
173                guard,
174            );
175
176            l2_ptr = match ret {
177                Ok(next_child) => next_child,
178                Err(returned) => {
179                    drop(returned.new);
180                    returned.current
181                }
182            };
183        }
184
185        debug_delay();
186        let l2 = unsafe { &l2_ptr.deref().children };
187
188        &l2[l2k]
189    }
190}
191
192#[inline]
193fn split_fanout(id: PageId) -> (usize, usize) {
194    // right shift 32 on 32-bit pointer systems panics
195    #[cfg(target_pointer_width = "64")]
196    assert!(
197        id <= 1 << MAX_PID_BITS,
198        "trying to access key of {}, which is \
199         higher than 2 ^ {}",
200        id,
201        MAX_PID_BITS,
202    );
203
204    let left = id >> NODE2_FAN_FACTOR;
205    let right = id & FAN_MASK;
206
207    (safe_usize(left), safe_usize(right))
208}
209
210#[inline]
211fn safe_usize(value: PageId) -> usize {
212    usize::try_from(value).unwrap()
213}
214
215impl Drop for PageTable {
216    fn drop(&mut self) {
217        let guard = pin();
218        let head = self.head.load(Relaxed, &guard);
219        unsafe {
220            drop(head.into_owned());
221        }
222    }
223}
224
225#[test]
226fn test_split_fanout() {
227    assert_eq!(
228        split_fanout(0b11_1111_1111_1111_1111),
229        (0, 0b11_1111_1111_1111_1111)
230    );
231    assert_eq!(
232        split_fanout(0b111_1111_1111_1111_1111),
233        (0b1, 0b11_1111_1111_1111_1111)
234    );
235}