From 5dc91d691b801a816f594c76753da5308c2f3ffc Mon Sep 17 00:00:00 2001 From: missing Date: Wed, 11 May 2022 13:26:53 -0500 Subject: [PATCH] Initial commit --- .gitignore | 15 ++ Cargo.toml | 8 + rust-toolchain | 1 + src/lib.rs | 621 +++++++++++++++++++++++++++++++++++++++++++++++++ src/prelude.rs | 1 + src/test.rs | 123 ++++++++++ 6 files changed, 769 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 rust-toolchain create mode 100644 src/lib.rs create mode 100644 src/prelude.rs create mode 100644 src/test.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..61ae5aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# ---> Rust +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c6c6ec1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "dyn_vec" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..632a270 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly-2022-01-21-x86_64-apple-darwin \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5b688da --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,621 @@ +#![feature(ptr_metadata)] +#![feature(layout_for_ptr)] +#![feature(coerce_unsized)] + +#[cfg(test)] +mod test; + +pub mod prelude; + +use core::panic; +use std::{ptr::{NonNull, Pointee, self, drop_in_place, metadata}, marker::PhantomData, alloc::{alloc, Layout, dealloc}, mem::{size_of, size_of_val, align_of_val, self, size_of_val_raw}, slice, fmt::Debug, ops::{CoerceUnsized, Index, IndexMut}}; + +/// Alias for metadata of a pointer to `T`. +pub type Meta = ::Metadata; + +/// Copy `size` bytes of memory from `src` to `dst`. +/// +/// # Safety +/// +/// `src` must be valid for reads, `dst` must be valid for writes, etc, you get the idea. +// TODO: inline me! i didnt realize it was avaliable as `copy_from` until the code was mostly complete. +unsafe fn memcpy(src: *const u8, dst: *mut u8, size: usize) { + dst.copy_from(src, size); +} + +fn align_up(ptr: *const T, align: usize) -> *const T { + let (mut data, meta) = ptr.to_raw_parts(); + data = ((data as usize + align - 1) & !(align - 1)) as _; + ptr::from_raw_parts(data, meta) +} + +fn align_up_mut(ptr: *mut T, align: usize) -> *mut T { + align_up(ptr as _, align) as _ +} + +/// A heap allocated, dynamically sized collection of `?Sized` elements. +/// +/// See [`::alloc::vec::Vec`] (the standard library `Vec` type) for more information. +pub struct Vec { + ptr: NonNull, + len: usize, + capacity: usize, + end_ptr: NonNull, + _phantom: PhantomData +} + +impl std::fmt::Debug for Vec { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +// Vec == Vec +impl, U: ?Sized> PartialEq> for Vec { + fn eq(&self, other: &Vec) -> bool { + if self.len != other.len { return false } + for (el, el2) in self.iter().zip(other.iter()) { + if el != el2 { return false } + } + true + } +} + +impl Eq for Vec {} + +// Vec == &[U] +impl, U> PartialEq<&[U]> for Vec { + fn eq(&self, other: &&[U]) -> bool { + if self.len != other.len() { return false } + for (el, el2) in self.iter().zip(other.iter()) { + if el != el2 { return false } + } + true + } +} + +// &[U] == Vec +impl, U> PartialEq> for &[U] { + fn eq(&self, other: &Vec) -> bool { + other == self + } +} + +// Vec == [U; N] +impl, U, const N: usize> PartialEq<[U; N]> for Vec { + fn eq(&self, other: &[U; N]) -> bool { + *self == &other[..] + } +} + +// [U; N] == Vec +impl, U, const N: usize> PartialEq> for [U; N] { + fn eq(&self, other: &Vec) -> bool { + other == self + } +} + +impl Vec { + /// Creates a new, empty `Vec`. + pub fn new() -> Self { + let ptr = NonNull::dangling(); + Self { + ptr, + len: 0, + capacity: 0, + end_ptr: ptr, + _phantom: PhantomData + } + } + + /// Appends an element to the end of the `Vec`. + pub fn push(&mut self, v: T) where T: Sized { + unsafe { self.push_raw(&v) } + mem::forget(v); + } + + /// Appends an (possibly unsized) boxed element to the end of the `Vec`. + pub fn push_box(&mut self, v: Box) { + let ptr = Box::into_raw(v); + let layout = unsafe { Layout::for_value_raw(ptr) }; + unsafe { + self.push_raw(ptr); + dealloc(ptr.cast(), layout); + } + } + + /// Appends a sized element of type `U` to the end of the `Vec`, given that it can be coerced to an unsized `T`. + pub fn push_unsize(&mut self, v: U) where for<'a> &'a U: CoerceUnsized<&'a T> { + let v_unsized: &T = &v; + unsafe { self.push_raw(v_unsized) }; + mem::forget(v); + } + + unsafe fn push_raw(&mut self, v: *const T) { + let size = size_of_val(&*v); + + if !self.will_fit(&*v) { + // oh no! allocation too small! + + // make sure we have enough space for a new element, but also space for future elements + // this bit is tricky, we must make sure we have enough space for padding too, so its probably UB somehow + // FIXME: ^^^ + let new_alloc_size = self.capacity * 2 + size * 2 + size_of::<*const T>(); + self.realloc(new_alloc_size); + } + + self.push_raw_unchecked(v); + } + + /// Given an element, returns a pointer to where it would be written if it was pushed, assuming no reallocation is needed. + /// + /// The pointer will be aligned, but writing to it may overwrite data belonging to the Vec. + /// To check for this, call `will_fit`. + pub fn get_next_elem_ptr(&self, v: &T) -> *mut u8 { + align_up_mut(self.end_ptr.as_ptr(), align_of_val(v)) + } + + /// Checks if a given element will fill in the `Vec` without reallocations. + pub fn will_fit(&self, v: &T) -> bool { + let remaining_space = self.get_ptr_to_ptr(self.len) as usize - self.end_ptr.as_ptr() as usize; + let needed_space = size_of_val(v) + size_of::<*const T>(); + remaining_space >= needed_space + } + + unsafe fn push_raw_unchecked(&mut self, v: *const T) { + let size = size_of_val(&*v); + let dest = self.get_next_elem_ptr(&*v); // this is mentioned by the `// SAFETY:` in `as_slice_flatten` + + memcpy(v.cast(), dest, size); + + let new_ptr = ptr::from_raw_parts::(dest.cast(), metadata(v)); + self.get_ptr_to_ptr(self.len + 1).write(new_ptr); + + self.end_ptr = NonNull::new_unchecked(dest.wrapping_add(size)); + self.len += 1; + } + + unsafe fn realloc(&mut self, size: usize) { + let layout = Layout::from_size_align_unchecked(size, 8).pad_to_align(); + if self.capacity == 0 { + // will panic if OOM + self.ptr = NonNull::new(alloc(layout)).unwrap(); + + self.end_ptr = self.ptr; + } else { + // cannot use realloc here + + let new_alloc = NonNull::new(alloc(layout)).unwrap(); + + // data + let mut ptr = new_alloc.as_ptr(); + for i in 0..self.len { + let v = self.get_unchecked(i); + + let size = size_of_val(v); + ptr = align_up_mut(ptr, align_of_val(v)); + memcpy(v as *const _ as _, ptr, size); + let meta = self.get_ptr(i).to_raw_parts().1; + self.get_ptr_to_ptr(i + 1).write(ptr::from_raw_parts(ptr.cast(), meta)); + ptr = ptr.wrapping_add(size); + } + self.end_ptr = NonNull::new_unchecked(ptr); + + // metadata + let meta_src = self.get_ptr_to_ptr(self.len); + let meta_dst = { + let current_alloc_end = self.ptr.as_ptr().wrapping_add(self.capacity); + let new_alloc_end = new_alloc.as_ptr().wrapping_add(layout.size()); + let meta_len = current_alloc_end as usize - meta_src as usize; + new_alloc_end.wrapping_sub(meta_len) + }; + let meta_size = self.len * size_of::<*const T>(); + memcpy(meta_src.cast(), meta_dst, meta_size); + + dealloc(self.ptr.as_ptr(), Layout::from_size_align_unchecked(self.capacity, 8)); + + self.ptr = new_alloc; + } + + self.capacity = layout.size(); + } + + /// for internal use + /// + /// NOTE: 1-indexed, to allow getting a pointer to the end of the alloc easily + fn get_ptr_to_ptr(&self, index: usize) -> *mut *const T { + self.ptr.as_ptr() + .wrapping_add(self.capacity) + .cast::<*const T>() + .wrapping_sub(index) + } + + /// for internal use + unsafe fn get_ptr(&self, index: usize) -> *const T { + *self.get_ptr_to_ptr(index + 1) + } + + pub fn get(&self, index: usize) -> Option<&T> { + if index < self.len { + Some(unsafe { self.get_unchecked(index) }) + } else { + None + } + } + + pub unsafe fn get_unchecked(&self, index: usize) -> &T { + &*self.get_ptr(index) + } + + pub fn get_mut(&mut self, index: usize) -> Option<&mut T> { + if index < self.len { + Some(unsafe { self.get_unchecked_mut(index) }) + } else { + None + } + } + + pub unsafe fn get_unchecked_mut(&mut self, index: usize) -> &mut T { + &mut *(self.get_ptr(index) as *mut _) + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn capacity(&self) -> usize { + self.capacity + } + + pub fn as_ptr(&self) -> *const u8 { + self.ptr.as_ptr() + } + + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.ptr.as_ptr() + } + + pub fn iter(&self) -> Iter { + Iter::new(self) + } + + pub fn iter_mut(&mut self) -> IterMut { + IterMut::new(self) + } + + pub fn unsize(self) -> Vec where for<'a> &'a T: CoerceUnsized<&'a U> { + let new_vec = Vec:: { + ptr: self.ptr, + len: self.len, + capacity: self.capacity, + end_ptr: self.end_ptr, + _phantom: PhantomData, + }; + + println!("sizeof(*const U) = {}, sizeof(*const T) = {}", size_of::<*const U>(), size_of::<*const T>()); + + if size_of::<*const U>() > size_of::<*const T>() { + // new meta larger than old meta, must go from back to front + + // 1 indexed moment + for i in (1..=self.len).rev() { + let current = unsafe { &*self.get_ptr_to_ptr(i).read() }; + unsafe { new_vec.get_ptr_to_ptr(i).write(current as &U) } + } + } else { + // net meta smaller or same size as old meta, must go from front to back + + // 1 indexed moment + for i in 1..=self.len { + let current = unsafe { &*self.get_ptr_to_ptr(i).read() }; + unsafe { new_vec.get_ptr_to_ptr(i).write(current as &U) } + } + } + + mem::forget(self); + new_vec + } +} + +impl Vec<[T]> { + pub fn as_slice_flatten(&self) -> &[T] { + assert!(self.len > 0); + + // SAFETY: the slices should be contiguous by the logic of `push_raw_unchecked` + unsafe { + slice::from_raw_parts(self.get_ptr(0).to_raw_parts().0 as _, { + let start = self.get_ptr(0).to_raw_parts().0 as usize; + let end = self.end_ptr.as_ptr() as usize; + (end - start) / size_of::() // integer division! + }) + } + } + + pub fn as_mut_slice_flatten(&mut self) -> &mut [T] { + assert!(self.len > 0); + + // SAFETY: the slices should be contiguous by the logic of `push_raw_unchecked` + unsafe { + slice::from_raw_parts_mut(self.get_ptr(0).to_raw_parts().0 as _, { + let start = self.get_ptr(0).to_raw_parts().0 as usize; + let end = self.end_ptr.as_ptr() as usize; + (end - start) / size_of::() // integer division! + }) + } + } +} + +impl Drop for Vec { + fn drop(&mut self) { + unsafe { + for i in 0..self.len { + drop_in_place(self.get_unchecked_mut(i)); + } + + dealloc(self.ptr.as_ptr(), Layout::from_size_align_unchecked(self.capacity, 8)); + } + } +} + + +// Iteration +struct BaseIter { + ptr: *const *mut T, + ptr_end: *const *mut T +} + +impl BaseIter { + fn new(vec: &Vec) -> Self { + Self { ptr: vec.get_ptr_to_ptr(vec.len).cast(), ptr_end: vec.get_ptr_to_ptr(0).cast() } + } +} + +impl Iterator for BaseIter { + type Item = *mut T; + + fn next(&mut self) -> Option { + if self.ptr == self.ptr_end { + return None + } + + self.ptr_end = self.ptr_end.wrapping_sub(1); + Some(unsafe { self.ptr_end.read() }) + } +} + +impl DoubleEndedIterator for BaseIter { + fn next_back(&mut self) -> Option { + if self.ptr == self.ptr_end { + return None + } + + let el = unsafe { self.ptr_end.read() }; + self.ptr = self.ptr.wrapping_add(1); + Some(el) + } +} + + +// By-ref iteration +impl<'a, T: ?Sized> IntoIterator for &'a Vec { + type Item = &'a T; + + type IntoIter = Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +pub struct Iter<'a, T: ?Sized> { + base: BaseIter, + _phantom: PhantomData<&'a T> +} + +impl<'a, T: ?Sized> Iter<'a, T> { + pub fn new(vec: &'a Vec) -> Self { + Self { base: BaseIter::new(vec), _phantom: PhantomData } + } +} + +impl<'a, T: ?Sized> Iterator for Iter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + unsafe { self.base.next().map(|v| &*v) } + } +} + +impl<'a, T: ?Sized> DoubleEndedIterator for Iter<'a, T> { + fn next_back(&mut self) -> Option { + unsafe { self.base.next_back().map(|v| &*v) } + } +} + + +// By-mut iteration +impl<'a, T: ?Sized> IntoIterator for &'a mut Vec { + type Item = &'a mut T; + + type IntoIter = IterMut<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +pub struct IterMut<'a, T: ?Sized> { + base: BaseIter, + _phantom: PhantomData<&'a mut T> +} + +impl<'a, T: ?Sized> IterMut<'a, T> { + pub fn new(vec: &'a mut Vec) -> Self { + Self { base: BaseIter::new(vec), _phantom: PhantomData } + } +} + +impl<'a, T: ?Sized> Iterator for IterMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + unsafe { self.base.next().map(|v| &mut *v) } + } +} + +impl<'a, T: ?Sized> DoubleEndedIterator for IterMut<'a, T> { + fn next_back(&mut self) -> Option { + unsafe { self.base.next_back().map(|v| &mut *v) } + } +} + + +// By-value iteration +impl IntoIterator for Vec { + type Item = Box; + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter::new(self) + } +} + +pub struct IntoIter { + ptr: NonNull, + capacity: usize, + base: BaseIter +} + +impl IntoIter { + pub fn new(vec: Vec) -> Self { + let this = Self { + ptr: vec.ptr, + capacity: vec.capacity, + base: BaseIter::new(&vec) + }; + mem::forget(vec); + this + } +} + +impl Iterator for IntoIter { + type Item = Box; + + fn next(&mut self) -> Option { + let ptr = self.base.next()?; + unsafe { + let alloc = alloc(Layout::for_value_raw(ptr)); + memcpy(ptr.cast(), alloc, size_of_val_raw(ptr)); + Some(Box::from_raw(ptr::from_raw_parts_mut(alloc.cast(), metadata(ptr)))) + } + } +} + +impl DoubleEndedIterator for IntoIter { + fn next_back(&mut self) -> Option { + let ptr = self.base.next_back()?; + unsafe { + let alloc = alloc(Layout::for_value_raw(ptr)); + memcpy(ptr.cast(), alloc, size_of_val_raw(ptr)); + Some(Box::from_raw(ptr::from_raw_parts_mut(alloc.cast(), metadata(ptr)))) + } + } +} + +impl Drop for IntoIter { + fn drop(&mut self) { + unsafe { dealloc(self.ptr.as_ptr(), Layout::from_size_align_unchecked(self.capacity, 8)) } + } +} + + +// // this implementation will collect *while unsizing*, and would conflict with the other +// impl FromIterator for Vec where for<'a> &'a U: CoerceUnsized<&'a T> { +// fn from_iter>(iter: I) -> Self { +// let mut vec = Vec::new(); + +// for item in iter.into_iter() { +// vec.push_unsize(item); +// } + +// vec +// } +// } + +impl FromIterator for Vec { + fn from_iter>(iter: I) -> Self { + let mut vec = Vec::new(); + + for item in iter.into_iter() { + vec.push(item); + } + + vec + } +} + + +impl Index for Vec { + type Output = T; + + #[track_caller] + fn index(&self, index: usize) -> &Self::Output { + match self.get(index) { + Some(v) => v, + None => panic!("index out of bounds: the len is {} but the index is {}", self.len, index), + } + } +} + +impl IndexMut for Vec { + #[track_caller] + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + let len = self.len; + match self.get_mut(index) { + Some(v) => v, + None => panic!("index out of bounds: the len is {} but the index is {}", len, index), + } + } +} + + +/// Creates a [`Vec`]. +/// +/// # Examples +/// +/// ``` +/// # use dyn_vec::prelude::{vec, Vec}; +/// # use std::fmt::Debug; +/// let vec1: Vec = vec![1, 2, 3].unsize(); +/// let vec2: Vec = vec![box: +/// Box::new(1) as _, +/// Box::new(String::from("foo")) as _, +/// Box::new(true) as _ +/// ]; +/// let vec3: Vec = vec![unsized: 1, String::from("foo"), true]; +/// ``` +#[macro_export] +macro_rules! vec { + () => { + $crate::Vec::new(); + }; + (box: $($elem:expr),+ $(,)?) => {{ + let mut vec = $crate::Vec::new(); + $(vec.push_box($elem);)+ + vec + }}; + (unsized: $($elem:expr),+ $(,)?) => {{ + let mut vec = $crate::Vec::new(); + $(vec.push_unsize($elem);)+ + vec + }}; + ($elem:expr; $n:expr) => { + unimplemented!("vec![T; N] is currently not supported"); + }; + ($($elem:expr),+ $(,)?) => {{ + let mut vec = $crate::Vec::new(); + $(vec.push($elem);)+ + vec + }}; +} \ No newline at end of file diff --git a/src/prelude.rs b/src/prelude.rs new file mode 100644 index 0000000..d6eaf78 --- /dev/null +++ b/src/prelude.rs @@ -0,0 +1 @@ +pub use super::{Vec, vec}; \ No newline at end of file diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..a7eb99f --- /dev/null +++ b/src/test.rs @@ -0,0 +1,123 @@ +use super::prelude::{Vec, vec}; +use std::{fmt::Debug, sync::atomic::{AtomicBool, Ordering}}; + +trait DebugExt: Debug { + fn debug(&self) -> String { + format!("{:?}", self) + } +} + +impl DebugExt for T {} + +#[test] +fn basic_push() { + let mut vec: Vec = Vec::new(); + vec.push(3); + vec.push(5); + vec.push(7); + assert_eq!(vec, [3, 5, 7]); +} + +#[test] +fn box_push() { + let mut vec: Vec = Vec::new(); + vec.push_box(Box::new(1)); + vec.push_box(Box::new(String::from("foo"))); + vec.push_box(Box::new(true)); + assert_eq!(vec.debug(), "[1, \"foo\", true]"); +} + +#[test] +fn unsize_push() { + let mut vec: Vec = Vec::new(); + vec.push_unsize(1); + vec.push_unsize(String::from("foo")); + vec.push_unsize(true); + assert_eq!(vec.debug(), "[1, \"foo\", true]"); +} + +#[test] +fn all_macro() { + let vec: Vec = vec![3, 5, 7]; + assert_eq!(vec, [3, 5, 7]); + + let vec2: Vec = vec![box: + Box::new(1) as _, + Box::new(String::from("foo")) as _, + Box::new(true) as _, + ]; + let vec3: Vec = vec![unsized: 1, String::from("foo"), true]; + // assert_eq!(vec2, vec3); // doesnt compile, but would theoretically work + assert_eq!(vec2.debug(), vec3.debug()); +} + +#[test] +fn dropped() { + static DROPPED: AtomicBool = AtomicBool::new(false); + + #[derive(Debug)] // for dyn Debug + struct FunkyDrop; + impl Drop for FunkyDrop { + fn drop(&mut self) { + DROPPED.store(true, Ordering::SeqCst); + } + } + + let vec: Vec = vec![unsized: 1, FunkyDrop, true]; + + assert_eq!(DROPPED.load(Ordering::SeqCst), false); + + drop(vec); + + assert_eq!(DROPPED.load(Ordering::SeqCst), true); +} + +#[test] +fn get() { + let vec: Vec = vec![3, 5, 7]; + assert_eq!(vec.get(0).copied(), Some(3)); + assert_eq!(vec.get(1).copied(), Some(5)); + assert_eq!(vec.get(2).copied(), Some(7)); + assert_eq!(vec.get(3).copied(), None); +} + +#[test] +#[should_panic = "index out of bounds: the len is 3 but the index is 3"] +fn index() { + let vec: Vec = vec![3, 5, 7]; + assert_eq!(vec[0], 3); + assert_eq!(vec[1], 5); + assert_eq!(vec[2], 7); + vec[3]; +} + +#[test] +fn slice_flatten() { + let mut vec: Vec<[i32]> = vec![unsized: [1, 2, 3], [4, 5], [6, 7, 8, 9]]; + assert_eq!(vec.as_slice_flatten(), [1, 2, 3, 4, 5, 6, 7, 8, 9]); + vec.as_mut_slice_flatten()[4] = 10; + assert_eq!(vec[1], [4, 10]); +} + +#[test] +fn iteration() { + let mut vec: Vec = vec![unsized: 1, String::from("foo"), true]; + + let mut iter = vec.iter(); + assert_eq!(iter.next().unwrap().debug(), "1"); + assert_eq!(iter.next().unwrap().debug(), "\"foo\""); + assert_eq!(iter.next().unwrap().debug(), "true"); + assert_eq!(iter.next().map(|_|()), None); + + let mut iter = vec.iter_mut(); // TODO: create a trait to properly test this + assert_eq!(iter.next().unwrap().debug(), "1"); + assert_eq!(iter.next().unwrap().debug(), "\"foo\""); + assert_eq!(iter.next().unwrap().debug(), "true"); + assert_eq!(iter.next().map(|_|()), None); + + let mut debugs = Vec::new(); // using custom vec instead of std vec >:) + for item in vec { + debugs.push(item.debug()); + } + assert_eq!(debugs, ["1", "\"foo\"", "true"]); +} \ No newline at end of file