Skip to content

Commit

Permalink
introduce Table and use for interned values
Browse files Browse the repository at this point in the history
  • Loading branch information
nikomatsakis committed Aug 8, 2024
1 parent 0c1d8b6 commit 9a3111c
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 73 deletions.
18 changes: 9 additions & 9 deletions components/salsa-macro-rules/src/setup_interned_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ macro_rules! setup_interned_struct {
$(#[$attr])*
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
$vis struct $Struct<$db_lt>(
std::ptr::NonNull<salsa::plumbing::interned::Value < $Struct<'static> >>,
salsa::Id,
std::marker::PhantomData < & $db_lt salsa::plumbing::interned::Value < $Struct<'static> > >
);

Expand All @@ -66,11 +66,11 @@ macro_rules! setup_interned_struct {
const DEBUG_NAME: &'static str = stringify!($Struct);
type Data<$db_lt> = ($($field_ty,)*);
type Struct<$db_lt> = $Struct<$db_lt>;
unsafe fn struct_from_raw<'db>(ptr: std::ptr::NonNull<$zalsa_struct::Value<Self>>) -> Self::Struct<'db> {
$Struct(ptr, std::marker::PhantomData)
fn struct_from_id<'db>(id: salsa::Id) -> Self::Struct<'db> {
$Struct(id, std::marker::PhantomData)
}
fn deref_struct(s: Self::Struct<'_>) -> &$zalsa_struct::Value<Self> {
unsafe { s.0.as_ref() }
fn deref_struct(s: Self::Struct<'_>) -> salsa::Id {
s.0
}
}

Expand All @@ -89,13 +89,13 @@ macro_rules! setup_interned_struct {

impl $zalsa::AsId for $Struct<'_> {
fn as_id(&self) -> salsa::Id {
unsafe { self.0.as_ref() }.as_id()
self.0
}
}

impl<$db_lt> $zalsa::LookupId<$db_lt> for $Struct<$db_lt> {
fn lookup_id(id: salsa::Id, db: &$db_lt dyn $zalsa::Database) -> Self {
$Configuration::ingredient(db).interned_value(id)
Self(id, std::marker::PhantomData)
}
}

Expand Down Expand Up @@ -144,7 +144,7 @@ macro_rules! setup_interned_struct {
// FIXME(rust-lang/rust#65991): The `db` argument *should* have the type `dyn Database`
$Db: ?Sized + $zalsa::Database,
{
let fields = $Configuration::ingredient(db).fields(self);
let fields = $Configuration::ingredient(db).fields(db.as_dyn_database(), self);
$zalsa::maybe_clone!(
$field_option,
$field_ty,
Expand All @@ -156,7 +156,7 @@ macro_rules! setup_interned_struct {
/// Default debug formatting for this struct (may be useful if you define your own `Debug` impl)
pub fn default_debug_fmt(this: Self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
$zalsa::with_attached_database(|db| {
let fields = $Configuration::ingredient(db).fields(this);
let fields = $Configuration::ingredient(db).fields(db.as_dyn_database(), this);
let mut f = f.debug_struct(stringify!($Struct));
$(
let f = f.field(stringify!($field_id), &fields.$field_index);
Expand Down
14 changes: 7 additions & 7 deletions components/salsa-macro-rules/src/setup_tracked_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ macro_rules! setup_tracked_fn {
if $needs_interner {
#[derive(Copy, Clone)]
struct $InternedData<$db_lt>(
std::ptr::NonNull<$zalsa::interned::Value<$Configuration>>,
salsa::Id,
std::marker::PhantomData<&$db_lt $zalsa::interned::Value<$Configuration>>,
);

Expand All @@ -114,14 +114,14 @@ macro_rules! setup_tracked_fn {

type Struct<$db_lt> = $InternedData<$db_lt>;

unsafe fn struct_from_raw<$db_lt>(
ptr: std::ptr::NonNull<$zalsa::interned::Value<Self>>,
fn struct_from_id<$db_lt>(
id: salsa::Id,
) -> Self::Struct<$db_lt> {
$InternedData(ptr, std::marker::PhantomData)
$InternedData(id, std::marker::PhantomData)
}

fn deref_struct(s: Self::Struct<'_>) -> &$zalsa::interned::Value<Self> {
unsafe { s.0.as_ref() }
fn deref_struct(s: Self::Struct<'_>) -> salsa::Id {
s.0
}
}
} else {
Expand Down Expand Up @@ -191,7 +191,7 @@ macro_rules! setup_tracked_fn {
fn id_to_input<$db_lt>(db: &$db_lt Self::DbView, key: salsa::Id) -> Self::Input<$db_lt> {
$zalsa::macro_if! {
if $needs_interner {
$Configuration::intern_ingredient(db).data(key).clone()
$Configuration::intern_ingredient(db).data(db, key).clone()
} else {
$zalsa::LookupId::lookup_id(key, db.as_dyn_database())
}
Expand Down
77 changes: 20 additions & 57 deletions src/interned.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use crossbeam::atomic::AtomicCell;
use std::fmt;
use std::hash::Hash;
use std::marker::PhantomData;
use std::ptr::NonNull;

use crate::alloc::Alloc;
use crate::durability::Durability;
use crate::id::AsId;
use crate::ingredient::fmt_index;
Expand All @@ -27,24 +24,16 @@ pub trait Configuration: Sized + 'static {
/// The end user struct
type Struct<'db>: Copy;

/// Create an end-user struct from the underlying raw pointer.
/// Create an end-user struct from the salsa id
///
/// This call is an "end-step" to the tracked struct lookup/creation
/// process in a given revision: it occurs only when the struct is newly
/// created or, if a struct is being reused, after we have updated its
/// fields (or confirmed it is green and no updates are required).
///
/// # Safety
///
/// Requires that `ptr` represents a "confirmed" value in this revision,
/// which means that it will remain valid and immutable for the remainder of this
/// revision, represented by the lifetime `'db`.
unsafe fn struct_from_raw<'db>(ptr: NonNull<Value<Self>>) -> Self::Struct<'db>;

/// Deref the struct to yield the underlying value struct.
/// Since we are still part of the `'db` lifetime in which the struct was created,
/// this deref is safe, and the value-struct fields are immutable and verified.
fn deref_struct(s: Self::Struct<'_>) -> &Value<Self>;
fn struct_from_id<'db>(id: Id) -> Self::Struct<'db>;

/// Deref the struct to yield the underlying id.
fn deref_struct(s: Self::Struct<'_>) -> Id;
}

pub trait InternedData: Sized + Eq + Hash + Clone {}
Expand All @@ -66,14 +55,6 @@ pub struct IngredientImpl<C: Configuration> {
/// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
key_map: FxDashMap<C::Data<'static>, Id>,

/// Maps from an interned id to its data.
///
/// Deadlock requirement: We access `value_map` while holding lock on `key_map`, but not vice versa.
value_map: FxDashMap<Id, Alloc<Value<C>>>,

/// counter for the next id.
counter: AtomicCell<u32>,

/// Stores the revision when this interned ingredient was last cleared.
/// You can clear an interned table at any point, deleting all its entries,
/// but that will make anything dependent on those entries dirty and in need
Expand Down Expand Up @@ -112,8 +93,6 @@ where
Self {
ingredient_index,
key_map: Default::default(),
value_map: Default::default(),
counter: AtomicCell::default(),
reset_at: Revision::start(),
}
}
Expand All @@ -122,6 +101,10 @@ where
unsafe { std::mem::transmute(data) }
}

unsafe fn from_internal_data<'db>(&'db self, data: &C::Data<'static>) -> &C::Data<'db> {
unsafe { std::mem::transmute(data) }
}

pub fn intern_id<'db>(
&'db self,
db: &'db dyn crate::Database,
Expand Down Expand Up @@ -149,66 +132,46 @@ where
if let Some(guard) = self.key_map.get(&internal_data) {
let id = *guard;
drop(guard);
return self.interned_value(id);
return C::struct_from_id(id);
}

match self.key_map.entry(internal_data.clone()) {
// Data has been interned by a racing call, use that ID instead
dashmap::mapref::entry::Entry::Occupied(entry) => {
let id = *entry.get();
drop(entry);
self.interned_value(id)
C::struct_from_id(id)
}

// We won any races so should intern the data
dashmap::mapref::entry::Entry::Vacant(entry) => {
let next_id = self.counter.fetch_add(1);
let next_id = crate::id::Id::from_u32(next_id);
let value = self.value_map.entry(next_id).or_insert(Alloc::new(Value {
id: next_id,
fields: internal_data,
}));
let value_raw = value.as_raw();
drop(value);
let zalsa = db.zalsa();
let table = zalsa.table();
let next_id = zalsa_local.allocate(table, self.ingredient_index, internal_data);
entry.insert(next_id);
// SAFETY: Items are only removed from the `value_map` with an `&mut self` reference.
unsafe { C::struct_from_raw(value_raw) }
C::struct_from_id(next_id)
}
}
}

pub fn interned_value(&self, id: Id) -> C::Struct<'_> {
let r = self.value_map.get(&id).unwrap();

// SAFETY: Items are only removed from the `value_map` with an `&mut self` reference.
unsafe { C::struct_from_raw(r.as_raw()) }
}

/// Lookup the data for an interned value based on its id.
/// Rarely used since end-users generally carry a struct with a pointer directly
/// to the interned item.
pub fn data(&self, id: Id) -> &C::Data<'_> {
C::deref_struct(self.interned_value(id)).data()
pub fn data<'db>(&'db self, db: &'db dyn Database, id: Id) -> &'db C::Data<'db> {
let internal_data = db.zalsa().table().get::<C::Data<'static>>(id);
unsafe { self.from_internal_data(internal_data) }
}

/// Lookup the fields from an interned struct.
/// Note that this is not "leaking" since no dependency edge is required.
pub fn fields<'db>(&'db self, s: C::Struct<'db>) -> &'db C::Data<'db> {
C::deref_struct(s).data()
}

/// Variant of `data` that takes a (unnecessary) database argument.
/// This exists because tracked functions sometimes use true interning and sometimes use
/// [`IdentityInterner`][], which requires the database argument.
pub fn data_with_db<'db, DB: ?Sized>(&'db self, id: Id, _db: &'db DB) -> &'db C::Data<'db> {
self.data(id)
pub fn fields<'db>(&'db self, db: &'db dyn Database, s: C::Struct<'db>) -> &'db C::Data<'db> {
self.data(db, C::deref_struct(s))
}

pub fn reset(&mut self, revision: Revision) {
assert!(revision > self.reset_at);
self.reset_at = revision;
self.key_map.clear();
self.value_map.clear();
}
}

Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ mod revision;
mod runtime;
mod salsa_struct;
mod storage;
mod table;
mod tracked_struct;
mod update;
mod views;
Expand Down
133 changes: 133 additions & 0 deletions src/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use std::{any::Any, mem::MaybeUninit};

use append_only_vec::AppendOnlyVec;
use crossbeam::atomic::AtomicCell;
use parking_lot::Mutex;

use crate::{Id, IngredientIndex};

const PAGE_LEN_BITS: usize = 10;
const PAGE_LEN_MASK: usize = PAGE_LEN - 1;
const PAGE_LEN: usize = 1 << PAGE_LEN_BITS;

pub struct Table {
pages: AppendOnlyVec<Box<dyn Any + Send + Sync>>,
}

pub struct Page<T: Any + Send + Sync> {
/// The ingredient for elements on this page.
#[allow(dead_code)] // pretty sure we'll need this eventually
ingredient: IngredientIndex,

/// Number of elements of `data` that are initialized.
allocated: AtomicCell<usize>,

/// The "allocation lock" is held when we allocate a new entry.
///
/// It ensures that we can load the index, initialize it, and then update the length atomically
/// with respect to other allocations.
///
/// We could avoid it if we wanted, we'd just have to be a bit fancier in our reasoning
/// (for example, the bounds check in `Page::get` no longer suffices to truly guarantee
/// that the data is initialized).
allocation_lock: Mutex<()>,

/// Vector with data. This is always created with the capacity/length of `PAGE_LEN`
/// and uninitialized data. As we initialize new entries, we increment `allocated`.
data: Vec<MaybeUninit<T>>,
}

#[derive(Copy, Clone)]
pub struct PageIndex(usize);

#[derive(Copy, Clone)]
pub struct SlotIndex(usize);

impl Default for Table {
fn default() -> Self {
Self {
pages: AppendOnlyVec::new(),
}
}
}

impl Table {
pub fn get<T: Any + Send + Sync>(&self, id: Id) -> &T {
let (page, slot) = split_id(id);
let page_ref = self.page::<T>(page);
page_ref.get(slot)
}

pub fn page<T: Any + Send + Sync>(&self, page: PageIndex) -> &Page<T> {
self.pages[page.0].downcast_ref::<Page<T>>().unwrap()
}

pub fn push_page<T: Any + Send + Sync>(&self, ingredient: IngredientIndex) -> PageIndex {
let page = Box::new(<Page<T>>::new(ingredient));
PageIndex(self.pages.push(page))
}
}

impl<T: Any + Send + Sync> Page<T> {
fn new(ingredient: IngredientIndex) -> Self {
let mut data = Vec::with_capacity(PAGE_LEN);
unsafe {
data.set_len(PAGE_LEN);
}
Self {
ingredient,
allocated: Default::default(),
allocation_lock: Default::default(),
data,
}
}

pub(crate) fn get(&self, slot: SlotIndex) -> &T {
let len = self.allocated.load();
assert!(slot.0 < len);
unsafe { self.data[slot.0].assume_init_ref() }
}

pub(crate) fn allocate(&self, page: PageIndex, value: T) -> Result<Id, T> {
let guard = self.allocation_lock.lock();
let index = self.allocated.load();
if index == PAGE_LEN {
return Err(value);
}

let data = &self.data[index];
let data = data.as_ptr() as *mut T;
unsafe { std::ptr::write(data, value) };

self.allocated.store(index + 1);
drop(guard);

Ok(make_id(page, SlotIndex(index)))
}
}

impl<T: Any + Send + Sync> Drop for Page<T> {
fn drop(&mut self) {
let mut data = std::mem::replace(&mut self.data, vec![]);
let len = self.allocated.load();
unsafe {
data.set_len(len);
drop(std::mem::transmute::<Vec<MaybeUninit<T>>, Vec<T>>(data));
}
}
}

fn make_id(page: PageIndex, slot: SlotIndex) -> Id {
assert!(slot.0 < PAGE_LEN);
assert!(page.0 < (1 << (32 - PAGE_LEN_BITS)));
let page = page.0 as u32;
let slot = slot.0 as u32;
Id::from_u32(page << PAGE_LEN_BITS | slot)
}

fn split_id(id: Id) -> (PageIndex, SlotIndex) {
let id = id.as_u32() as usize;
let slot = id & PAGE_LEN_MASK;
let page = id >> PAGE_LEN_BITS;
(PageIndex(page), SlotIndex(slot))
}
Loading

0 comments on commit 9a3111c

Please sign in to comment.