Skip to content

Commit

Permalink
perf: Add new efficient APIs read_unsafe and read_to_vec (#248)
Browse files Browse the repository at this point in the history
I found that a source of significant performance loss is the read method
of `Memory`. The `read` method takes a mutable buffer which it fills
with values read from the stable memory. According to Rust rules, the
buffer passed to read must be initialized before it's passed to read
(buffers containing uninitialized values are unsound and can cause UB).

The usual pattern is to create a properly sized Vec, eg. by using
`vec![0; size]` or `vec.resize(size, 0)` and pass that to `read`.
However, initializing the bytes with values that get overwritten by
`read` is only necessary in order to be sound and requires significant
number of instructions.

This PR introduces a new method `read_unsafe` which allows passing in a
raw pointer and a `count` parameter. Implementations can be more
efficient by reading directly and skipping initialization. This can lead
to instruction reductions of up to 40%.

The PR also introduces a helper method `read_to_vec` which is a safe
wrapper around `read_unsafe` for the most common use-case: reading into
a `Vec`. Clients can for example pass an empty `Vec` and profit from the
extra efficiency without having to call unsafe methods.

---------

Co-authored-by: Andriy Berestovskyy <[email protected]>
  • Loading branch information
frankdavid and berestovskyy authored Nov 22, 2024
1 parent 0b510c0 commit 2129c23
Show file tree
Hide file tree
Showing 13 changed files with 305 additions and 160 deletions.
212 changes: 106 additions & 106 deletions canbench_results.yml

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions src/base_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
//! bytes required to represent integers up to that max size.
use crate::storable::{bounds, bytes_to_store_size_bounded};
use crate::{
read_u32, read_u64, safe_write, write, write_u32, write_u64, Address, GrowFailed, Memory,
Storable,
read_to_vec, read_u32, read_u64, safe_write, write, write_u32, write_u64, Address, GrowFailed,
Memory, Storable,
};
use std::borrow::{Borrow, Cow};
use std::cmp::min;
Expand Down Expand Up @@ -245,11 +245,10 @@ impl<T: Storable, M: Memory> BaseVec<T, M> {
}

/// Reads the item at the specified index without any bound checks.
fn read_entry_to(&self, index: u64, buf: &mut std::vec::Vec<u8>) {
fn read_entry_to(&self, index: u64, buf: &mut Vec<u8>) {
let offset = DATA_OFFSET + slot_size::<T>() as u64 * index;
let (data_offset, data_size) = self.read_entry_size(offset);
buf.resize(data_size, 0);
self.memory.read(data_offset, &mut buf[..]);
read_to_vec(&self.memory, data_offset.into(), buf, data_size);
}

/// Sets the vector's length.
Expand Down
13 changes: 9 additions & 4 deletions src/btreemap/node.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
btreemap::Allocator,
read_struct, read_u32, read_u64,
read_struct, read_to_vec, read_u32, read_u64,
storable::Storable,
types::{Address, Bytes},
write, write_struct, write_u32, Memory,
Expand Down Expand Up @@ -190,7 +190,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
value.take_or_load(|offset| self.load_value_from_memory(offset, memory))
}

/// Loads a value from stable memory at the given offset.
/// Loads a value from stable memory at the given offset of this node.
fn load_value_from_memory<M: Memory>(&self, offset: Bytes, memory: &M) -> Vec<u8> {
let reader = NodeReader {
address: self.address,
Expand All @@ -200,8 +200,13 @@ impl<K: Storable + Ord + Clone> Node<K> {
};

let value_len = read_u32(&reader, Address::from(offset.get())) as usize;
let mut bytes = vec![0; value_len];
reader.read((offset + U32_SIZE).get(), &mut bytes);
let mut bytes = vec![];
read_to_vec(
&reader,
Address::from((offset + U32_SIZE).get()),
&mut bytes,
value_len,
);

bytes
}
Expand Down
32 changes: 22 additions & 10 deletions src/btreemap/node/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,21 @@ pub struct NodeReader<'a, M: Memory> {
// Note: The `Memory` interface is implemented so that helper methods such `read_u32`,
// `read_struct`, etc. can be used with a `NodeReader` directly.
impl<'a, M: Memory> Memory for NodeReader<'a, M> {
fn read(&self, offset: u64, dst: &mut [u8]) {
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
// If the read is only in the initial page, then read it directly in one go.
// This is a performance enhancement to avoid the cost of creating a `NodeIterator`.
if (offset + dst.len() as u64) < self.page_size.get() as u64 {
self.memory.read(self.address.get() + offset, dst);
if (offset + count as u64) < self.page_size.get() as u64 {
self.memory
.read_unsafe(self.address.get() + offset, dst, count);
return;
}

// The read is split across several pages. Create a `NodeIterator` to to read from
// The read is split across several pages. Create a `NodeIterator` to read from
// each of the individual pages.
let iter = NodeIterator::new(
VirtualSegment {
address: Address::from(offset),
length: Bytes::from(dst.len() as u64),
length: Bytes::from(count as u64),
},
Bytes::from(self.page_size.get()),
);
Expand All @@ -43,22 +44,33 @@ impl<'a, M: Memory> Memory for NodeReader<'a, M> {
length,
} in iter
{
// SAFETY: read_unsafe() is safe to call iff bytes_read + length <= count since the
// caller guarantees that we can write `count` number of bytes to `dst`.
assert!(bytes_read + length.get() as usize <= count);
if page_idx == 0 {
self.memory.read(
self.memory.read_unsafe(
(self.address + offset).get(),
&mut dst[bytes_read as usize..(bytes_read + length.get()) as usize],
dst.add(bytes_read),
length.get() as usize,
);
} else {
self.memory.read(
self.memory.read_unsafe(
(self.overflows[page_idx - 1] + offset).get(),
&mut dst[bytes_read as usize..(bytes_read + length.get()) as usize],
dst.add(bytes_read),
length.get() as usize,
);
}

bytes_read += length.get();
bytes_read += length.get() as usize;
}
}

#[inline]
fn read(&self, offset: u64, dst: &mut [u8]) {
// SAFETY: since dst is dst.len() long, it fulfills the safety requirements of read_unsafe.
unsafe { self.read_unsafe(offset, dst.as_mut_ptr(), dst.len()) }
}

fn write(&self, _: u64, _: &[u8]) {
unreachable!("NodeReader does not support write")
}
Expand Down
7 changes: 3 additions & 4 deletions src/btreemap/node/v1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,14 @@ impl<K: Storable + Ord + Clone> Node<K> {
// Load the entries.
let mut keys_encoded_values = Vec::with_capacity(header.num_entries as usize);
let mut offset = NodeHeader::size();
let mut buf = Vec::with_capacity(max_key_size.max(max_value_size) as usize);
let mut buf = vec![];
for _ in 0..header.num_entries {
// Read the key's size.
let key_size = read_u32(memory, address + offset);
offset += U32_SIZE;

// Read the key.
buf.resize(key_size as usize, 0);
memory.read((address + offset).get(), &mut buf);
read_to_vec(memory, address + offset, &mut buf, key_size as usize);
offset += Bytes::from(max_key_size);
let key = K::from_bytes(Cow::Borrowed(&buf));
// Values are loaded lazily. Store a reference and skip loading it.
Expand All @@ -86,7 +85,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
let mut children = vec![];
if header.node_type == INTERNAL_NODE_TYPE {
// The number of children is equal to the number of entries + 1.
children.reserve(header.num_entries as usize + 1);
children.reserve_exact(header.num_entries as usize + 1);
for _ in 0..header.num_entries + 1 {
let child = Address::from(read_u64(memory, address + offset));
offset += Address::size();
Expand Down
4 changes: 2 additions & 2 deletions src/btreemap/node/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
let mut children = vec![];
if node_type == NodeType::Internal {
// The number of children is equal to the number of entries + 1.
children.reserve_exact(num_entries + 1);
for _ in 0..num_entries + 1 {
let child = Address::from(read_u64(&reader, offset));
offset += Address::size();
Expand All @@ -164,8 +165,7 @@ impl<K: Storable + Ord + Clone> Node<K> {
};

// Load the key.
buf.resize(key_size as usize, 0);
reader.read(offset.get(), &mut buf);
read_to_vec(&reader, offset, &mut buf, key_size as usize);
let key = K::from_bytes(Cow::Borrowed(&buf));
offset += Bytes::from(key_size);
keys_encoded_values.push((key, Value::by_ref(Bytes::from(0usize))));
Expand Down
6 changes: 3 additions & 3 deletions src/cell.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! A serializable value stored in the stable memory.
use crate::storable::Storable;
use crate::{Memory, WASM_PAGE_SIZE};
use crate::{read_to_vec, Memory, WASM_PAGE_SIZE};
use std::borrow::{Borrow, Cow};
use std::fmt;

Expand Down Expand Up @@ -132,8 +132,8 @@ impl<T: Storable, M: Memory> Cell<T, M> {
///
/// PRECONDITION: memory is large enough to contain the value.
fn read_value(memory: &M, len: u32) -> T {
let mut buf = vec![0; len as usize];
memory.read(HEADER_V1_SIZE, &mut buf);
let mut buf = vec![];
read_to_vec(memory, HEADER_V1_SIZE.into(), &mut buf, len as usize);
T::from_bytes(Cow::Owned(buf))
}

Expand Down
6 changes: 6 additions & 0 deletions src/ic0_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ impl Memory for Ic0StableMemory {
unsafe { stable64_read(dst.as_ptr() as u64, offset, dst.len() as u64) }
}

#[inline]
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
// SAFETY: This is safe because of the ic0 api guarantees.
stable64_read(dst as u64, offset, count as u64);
}

#[inline]
fn write(&self, offset: u64, src: &[u8]) {
// SAFETY: This is safe because of the ic0 api guarantees.
Expand Down
79 changes: 61 additions & 18 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub use file_mem::FileMemory;
pub use ic0_memory::Ic0StableMemory;
use std::error;
use std::fmt::{Display, Formatter};
use std::mem::MaybeUninit;
pub use storable::Storable;
use types::Address;
pub use vec_mem::VectorMemory;
Expand All @@ -46,42 +47,81 @@ pub trait Memory {
/// pages. (One WebAssembly page is 64Ki bytes.)
fn size(&self) -> u64;

/// Tries to grow the memory by new_pages many pages containing
/// Tries to grow the memory by `pages` many pages containing
/// zeroes. If successful, returns the previous size of the
/// memory (in pages). Otherwise, returns -1.
fn grow(&self, pages: u64) -> i64;

/// Copies the data referred to by offset out of the stable memory
/// and replaces the corresponding bytes in dst.
/// Copies the data referred to by `offset` out of the stable memory
/// and replaces the corresponding bytes in `dst`.
fn read(&self, offset: u64, dst: &mut [u8]);

/// Copies the data referred to by src and replaces the
/// Copies `count` number of bytes of the data starting from `offset` out of the stable memory
/// into the buffer starting at `dst`.
///
/// This method is an alternative to `read` which does not require initializing a buffer and may
/// therefore be faster.
///
/// # Safety
///
/// Callers must guarantee that
/// * it is valid to write `count` number of bytes starting from `dst`,
/// * `dst..dst + count` does not overlap with `self`.
///
/// Implementations must guarantee that before the method returns, `count` number of bytes
/// starting from `dst` will be initialized.
#[inline]
unsafe fn read_unsafe(&self, offset: u64, dst: *mut u8, count: usize) {
// Initialize the buffer to make the slice valid.
std::ptr::write_bytes(dst, 0, count);
let slice = std::slice::from_raw_parts_mut(dst, count);
self.read(offset, slice)
}

/// Copies the data referred to by `src` and replaces the
/// corresponding segment starting at offset in the stable memory.
fn write(&self, offset: u64, src: &[u8]);
}

// A helper function that reads a single 32bit integer encoded as
// little-endian from the specified memory at the specified offset.
/// Copies `count` bytes of data starting from `addr` out of the stable memory into `dst`.
///
/// Callers are allowed to pass vectors in any state (e.g. empty vectors).
/// After the method returns, `dst.len() == count`.
/// This method is an alternative to `read` which does not require initializing a buffer and may
/// therefore be faster.
#[inline]
fn read_to_vec<M: Memory>(m: &M, addr: Address, dst: &mut std::vec::Vec<u8>, count: usize) {
dst.clear();
dst.reserve_exact(count);
unsafe {
m.read_unsafe(addr.get(), dst.as_mut_ptr(), count);
// SAFETY: read_unsafe guarantees to initialize the first `count` bytes
dst.set_len(count);
}
}

/// A helper function that reads a single 32bit integer encoded as
/// little-endian from the specified memory at the specified offset.
fn read_u32<M: Memory>(m: &M, addr: Address) -> u32 {
let mut buf: [u8; 4] = [0; 4];
m.read(addr.get(), &mut buf);
u32::from_le_bytes(buf)
}

// A helper function that reads a single 64bit integer encoded as
// little-endian from the specified memory at the specified offset.
/// A helper function that reads a single 64bit integer encoded as
/// little-endian from the specified memory at the specified offset.
fn read_u64<M: Memory>(m: &M, addr: Address) -> u64 {
let mut buf: [u8; 8] = [0; 8];
m.read(addr.get(), &mut buf);
u64::from_le_bytes(buf)
}

// Writes a single 32-bit integer encoded as little-endian.
/// Writes a single 32-bit integer encoded as little-endian.
fn write_u32<M: Memory>(m: &M, addr: Address, val: u32) {
write(m, addr.get(), &val.to_le_bytes());
}

// Writes a single 64-bit integer encoded as little-endian.
/// Writes a single 64-bit integer encoded as little-endian.
fn write_u64<M: Memory>(m: &M, addr: Address, val: u64) {
write(m, addr.get(), &val.to_le_bytes());
}
Expand Down Expand Up @@ -148,17 +188,20 @@ fn write<M: Memory>(memory: &M, offset: u64, bytes: &[u8]) {
}
}

// Reads a struct from memory.
/// Reads a struct from memory.
fn read_struct<T, M: Memory>(addr: Address, memory: &M) -> T {
let mut t: T = unsafe { core::mem::zeroed() };
let t_slice = unsafe {
core::slice::from_raw_parts_mut(&mut t as *mut _ as *mut u8, core::mem::size_of::<T>())
};
memory.read(addr.get(), t_slice);
t
let mut value = MaybeUninit::<T>::uninit();
unsafe {
memory.read_unsafe(
addr.get(),
value.as_mut_ptr() as *mut u8,
core::mem::size_of::<T>(),
);
value.assume_init()
}
}

// Writes a struct to memory.
/// Writes a struct to memory.
fn write_struct<T, M: Memory>(t: &T, addr: Address, memory: &M) {
let slice = unsafe {
core::slice::from_raw_parts(t as *const _ as *const u8, core::mem::size_of::<T>())
Expand Down
5 changes: 2 additions & 3 deletions src/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
//! ----------------------------------------
//! Unallocated space
//! ```
use crate::{read_u64, safe_write, write_u64, Address, GrowFailed, Memory, Storable};
use crate::{read_to_vec, read_u64, safe_write, write_u64, Address, GrowFailed, Memory, Storable};
use std::borrow::Cow;
use std::cell::RefCell;
use std::fmt;
Expand Down Expand Up @@ -331,8 +331,7 @@ impl<T: Storable, INDEX: Memory, DATA: Memory> Log<T, INDEX, DATA> {
/// ignores the result.
pub fn read_entry(&self, idx: u64, buf: &mut Vec<u8>) -> Result<(), NoSuchEntry> {
let (offset, len) = self.entry_meta(idx).ok_or(NoSuchEntry)?;
buf.resize(len, 0);
self.data_memory.read(HEADER_OFFSET + offset, buf);
read_to_vec(&self.data_memory, (HEADER_OFFSET + offset).into(), buf, len);
Ok(())
}

Expand Down
15 changes: 10 additions & 5 deletions src/memory_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
//! assert_eq!(bytes, vec![4, 5, 6]);
//! ```
use crate::{
read_struct,
read_struct, read_to_vec,
types::{Address, Bytes},
write, write_struct, Memory, WASM_PAGE_SIZE,
};
Expand Down Expand Up @@ -239,9 +239,9 @@ impl<M: Memory> MemoryManagerInner<M> {
}

// Check if the magic in the memory corresponds to this object.
let mut dst = vec![0; 3];
let mut dst = [0; 3];
memory.read(0, &mut dst);
if dst != MAGIC {
if &dst != MAGIC {
// No memory manager found. Create a new instance.
MemoryManagerInner::new(memory, bucket_size_in_pages)
} else {
Expand Down Expand Up @@ -277,8 +277,13 @@ impl<M: Memory> MemoryManagerInner<M> {
assert_eq!(&header.magic, MAGIC, "Bad magic.");
assert_eq!(header.version, LAYOUT_VERSION, "Unsupported version.");

let mut buckets = vec![0; MAX_NUM_BUCKETS as usize];
memory.read(bucket_allocations_address(BucketId(0)).get(), &mut buckets);
let mut buckets = vec![];
read_to_vec(
&memory,
bucket_allocations_address(BucketId(0)),
&mut buckets,
MAX_NUM_BUCKETS as usize,
);

let mut memory_buckets = BTreeMap::new();
for (bucket_idx, memory) in buckets.into_iter().enumerate() {
Expand Down
Loading

0 comments on commit 2129c23

Please sign in to comment.