commit e99bd05378bbc6790016eee8389eff2ed50ee0d5 Author: missing Date: Tue Dec 20 22:55:43 2022 -0600 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..9a30d08 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "stringish" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9b95767 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "stringish" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/impls.rs b/src/impls.rs new file mode 100644 index 0000000..4b04d94 --- /dev/null +++ b/src/impls.rs @@ -0,0 +1,103 @@ +use std::{ + borrow::Cow, + fmt::{Debug, Display}, + hash::Hash, +}; + +use crate::Stringish; + +impl Debug for Stringish { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if f.alternate() { + if self.cap == 0 { + f.write_str("Borrowed(")?; + } else { + f.write_str("Owned(")?; + } + } + + Debug::fmt(&self[..], f)?; + + if f.alternate() { + f.write_str(")")?; + } + + Ok(()) + } +} + +impl Display for Stringish { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Display::fmt(&self[..], f) + } +} + +impl PartialEq for Stringish { + fn eq(&self, other: &Self) -> bool { + self[..] == other[..] + } +} + +impl Eq for Stringish {} + +impl PartialOrd for Stringish { + fn partial_cmp(&self, other: &Self) -> Option { + self[..].partial_cmp(&other[..]) + } +} + +impl Ord for Stringish { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self[..].cmp(&other[..]) + } +} + +impl Hash for Stringish { + fn hash(&self, state: &mut H) { + self[..].hash(state); + } +} + +impl Clone for Stringish { + fn clone(&self) -> Self { + if let Some(true) | None = self.is_borrowed() { + Self { ..*self } + } else { + let mut self_borrowed = Self { cap: 0, ..*self }; + self_borrowed.make_owned(); + self_borrowed + } + } +} + +macro_rules! impl_eq_ord { + ($($other:ty),* $(,)?) => { + $( + impl<'a> PartialEq<$other> for Stringish { + fn eq(&self, other: &$other) -> bool { + self[..] == other[..] + } + } + + impl<'a> PartialEq for $other { + fn eq(&self, other: &Stringish) -> bool { + self[..] == other[..] + } + } + + impl<'a> PartialOrd<$other> for Stringish { + fn partial_cmp(&self, other: &$other) -> Option { + self[..].partial_cmp(&other[..]) + } + } + + impl<'a> PartialOrd for $other { + fn partial_cmp(&self, other: &Stringish) -> Option { + self[..].partial_cmp(&other[..]) + } + } + )* + }; +} + +impl_eq_ord!(str, &'a str, String, Cow<'a, str>); diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8f4ee44 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,345 @@ +#![allow(unknown_lints)] // `warn(rustdoc::all)` triggers this for weird nightly reasons +#![warn(rustdoc::all)] +#![warn(clippy::all)] +#![warn(clippy::pedantic)] + +//! A smaller [`Cow<'static, str>`](Cow). +//! +//! See more at the type documentation for [`Stringish`]. + +mod impls; +#[cfg(test)] +mod test; + +use std::{ + borrow::Cow, + mem::{self, ManuallyDrop}, + ops::{Deref, DerefMut}, + ptr::NonNull, + slice, str, +}; + +/// A smaller [`Cow<'static, str>`](Cow). +/// +/// This type can represent either an [`&'static str`](prim@str), or a [`String`], and fits within 3 words +/// (the same size as `String`). It does this by storing a capacity of `0` when it is borrowed. +/// However, this does mean that it is impossible to distinguish between borrowed and owned when +/// `capacity == len == 0`, but in that case it really doesn't matter much anyway since an owned +/// `String` with capacity equal to zero has not yet allocated. +/// +/// When doing non-mutating actions such as [`Deref::deref`], the string remains in whatever state it +/// was in. When doing mutating actions such as [`DerefMut::deref_mut`], the string becomes owned. +/// +/// ## Mutation as a `String` +/// +/// Since `Stringish` does not store a `String` internally, it cannot implement +/// `DerefMut`. As a result, [`Stringish::mutate`] exists instead. When `mutate` +/// is called, `self` is replaced with `Stringish::new()`, and the original value is converted to +/// a `String` and stored in the returned `MutationGuard`. When the guard is dropped, that `String` +/// is converted back into a `Stringish` and put back into where it came from. This allows clients +/// to mutate a `Stringish` as if it were a `String`. +pub struct Stringish { + ptr: NonNull, + len: usize, + cap: usize, +} + +impl Drop for Stringish { + fn drop(&mut self) { + if let Some(true) = self.is_owned() { + drop(mem::take(self).into_owned()); + } + } +} + +impl Default for Stringish { + fn default() -> Self { + Self::new() + } +} + +impl Stringish { + ////////////// + // Creation // + ////////////// + + /// Creates a new empty `Stringish`. + #[must_use] + pub fn new() -> Self { + Self { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + } + } + + /// Creates a new borrowed `Stringish` from the provided [`&'static str`](prim@str). + /// + /// Alternatively, use the [`Ish::ish`] method. + #[must_use] + pub fn new_borrowed(s: &'static str) -> Self { + Self { + // SAFETY: `s.as_ptr()` is never null + ptr: unsafe { NonNull::new_unchecked(s.as_ptr() as *mut u8) }, + len: s.len(), + cap: 0, + } + } + + /// Creates a new owned `Stringish` from the provided [`String`]. + /// + /// Alternatively, use the [`Ish::ish`] method. + #[must_use] + pub fn new_owned(s: String) -> Self { + let mut v = ManuallyDrop::new(s.into_bytes()); + Self { + // SAFETY: `v.as_mut_ptr()` is never null + ptr: unsafe { NonNull::new_unchecked(v.as_mut_ptr()) }, + len: v.len(), + cap: v.capacity(), + } + } + + /// Creates a new `Stringish` from the provided [`Cow<'static, str>`](Cow). + /// + /// If the given `Cow` is [`Cow::Borrowed`], the returned `Stringish` is borrowed. Otherwise, + /// it is owned. + /// + /// Alternatively, use the [`Ish::ish`] method. + #[must_use] + pub fn from_cow(cow: Cow<'static, str>) -> Self { + match cow { + Cow::Borrowed(s) => Self::new_borrowed(s), + Cow::Owned(s) => Self::new_owned(s), + } + } + + /////////////////// + // Borrowed-ness // + /////////////////// + + /// Checks if the `Stringish` is borrowed. + /// + /// If the `Stringish` is empty, returns [`None`]. Otherwise, returns `true` if the `Stringish` + /// is borrowed and `false` otherwise. + #[must_use] + pub fn is_borrowed(&self) -> Option { + if self.len == 0 { + None + } else if self.cap == 0 { + Some(true) + } else { + Some(false) + } + } + + /// Checks if the `Stringish` is owned. + /// + /// If the `Stringish` is empty, returns [`None`]. Otherwise, returns `true` if the `Stringish` + /// is owned and `false` otherwise. + #[must_use] + pub fn is_owned(&self) -> Option { + self.is_borrowed().map(|v| !v) + } + + /// Converts a borrowed `Stringish` into an owned `Stringish`, in-place. + /// + /// If the `Stringish` is borrowed, the data is copied into a new allocation and the + /// `Stringish` becomes owned. Otherwise, nothing happens. + pub fn make_owned(&mut self) { + if let None | Some(true) = self.is_owned() { + return; + } + + let mut s = String::with_capacity(self.len); + s.push_str(self); + *self = Stringish::new_owned(s); + } + + /// Converts a `Stringish` into an owned [`String`]. + /// + /// Calls [`Stringish::make_owned`] and then converts into a `String`. + #[must_use = "`self` will be dropped if the result is not used"] + pub fn into_owned(mut self) -> String { + self.make_owned(); + + let this = ManuallyDrop::new(self); + + // SAFETY: after `make_owned`, we basically act just like a `String` and uphold the same invariants + unsafe { String::from_raw_parts(this.ptr.as_ptr(), this.len, this.cap) } + } + + // Reborrowing + + /// Returns a byte slice of this `Stringish`'s contents. + #[must_use] + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: `self.ptr` always points to `self.len` valid bytes + unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) } + } + + /// Returns a mutable byte slice of this `Stringish`'s contents. + /// + /// If the `Stringish`, it is made owned. + /// + /// # Safety + /// + /// Mutating the bytes such that they are no longer valid UTF-8 is unsound. + #[must_use] + pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] { + self.make_owned(); + + // SAFETY: `self.ptr` always points to `self.len` valid bytes + unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) } + } + + /// Returns a string slice of this `Stringish`'s contents. + #[must_use] + pub fn as_str(&self) -> &str { + // SAFETY: `self.ptr` always points to valid UTF-8 + unsafe { str::from_utf8_unchecked(self.as_bytes()) } + } + + /// Returns a mutable string slice of this `Stringish`'s contents. + /// + /// If the `Stringish`, it is made owned. + #[must_use] + pub fn as_mut_str(&mut self) -> &mut str { + // SAFETY: `self.ptr` always points to valid UTF-8 and we never mutate the bytes into invalid UTF-8 + unsafe { str::from_utf8_unchecked_mut(self.as_bytes_mut()) } + } +} + +/// Creates a new borrowed `Stringish` from the given [`&'static str`](prim@str). +/// +/// See also: [`Stringish::new_borrowed`], [`<&'static str>::ish`] +impl From<&'static str> for Stringish { + fn from(s: &'static str) -> Self { + Self::new_borrowed(s) + } +} + +/// Creates a new owned `Stringish` from the given [`String`]. +/// +/// See also: [`Stringish::new_owned`], [`String::ish`] +impl From for Stringish { + fn from(s: String) -> Self { + Self::new_owned(s) + } +} + +/// Creates a new `Stringish` from the given [`Cow<'static, str>`](Cow). +/// +/// See also: [`Stringish::from_cow`], [`Cow<'static, str>::ish`] +impl From> for Stringish { + fn from(cow: Cow<'static, str>) -> Self { + Self::from_cow(cow) + } +} + +impl Deref for Stringish { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +impl DerefMut for Stringish { + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_mut_str() + } +} + +impl Stringish { + // Mutation as a `String` + + /// Allows mutation of this `Stringish` as a `String` using a guard. + /// + /// Returns a [`MutationGuard`] that implements [`DerefMut`](DerefMut). + /// + /// See more in [Mutation as a `String`](Stringish#mutation-as-a-string). + #[must_use = "call `make_owned` if you do not use the returned `MutationGuard`"] + pub fn mutate(&mut self) -> MutationGuard<'_> { + let this = mem::take(self); + MutationGuard { + stringish: self, + string: ManuallyDrop::new(this.into_owned()), + } + } + + /// Allows mutation of this `Stringish` as a `String` using a closure. + /// + /// See more in [Mutation as a `String`](Stringish#mutation-as-a-string). + pub fn mutate_with(&mut self, f: impl FnOnce(&mut String)) { + let mut guard = self.mutate(); + f(&mut guard); + drop(guard); // explicitly drop guard for clarity + } +} + +/// Updates the borrowed [`Stringish`] when dropped with the [`String`] stored inside. +/// +/// Implements [`DerefMut`](DerefMut) to allow mutating the `Stringish` as a `String`. +/// +/// See more in [Mutation as a `String`](Stringish#mutation-as-a-string). +pub struct MutationGuard<'a> { + stringish: &'a mut Stringish, + string: ManuallyDrop, +} + +impl Deref for MutationGuard<'_> { + type Target = String; + + fn deref(&self) -> &Self::Target { + &self.string + } +} + +impl DerefMut for MutationGuard<'_> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.string + } +} + +impl Drop for MutationGuard<'_> { + fn drop(&mut self) { + // SAFETY: `self.string.value` is never used again since we are being dropped + let string = unsafe { ManuallyDrop::take(&mut self.string) }; + *self.stringish = Stringish::new_owned(string); + } +} + +mod sealant { + use std::borrow::Cow; + + pub trait Sealed {} + impl Sealed for &'static str {} + impl Sealed for String {} + impl Sealed for Cow<'static, str> {} +} + +/// Converts something string-ish to a [`Stringish`]. +/// +/// Implemented for [`&'static str`](prim@str), [`String`], and [`Cow<'static, str>`](Cow). +pub trait Ish: sealant::Sealed { + fn ish(self) -> Stringish; +} + +impl Ish for &'static str { + fn ish(self) -> Stringish { + Stringish::new_borrowed(self) + } +} + +impl Ish for String { + fn ish(self) -> Stringish { + Stringish::new_owned(self) + } +} + +impl Ish for Cow<'static, str> { + fn ish(self) -> Stringish { + Stringish::from_cow(self) + } +} diff --git a/src/test.rs b/src/test.rs new file mode 100644 index 0000000..ae6b18a --- /dev/null +++ b/src/test.rs @@ -0,0 +1,99 @@ +use std::{borrow::Cow, collections::HashMap}; + +use crate::{Ish, Stringish}; + +#[test] +fn construction() { + let a = "Hello, world!".ish(); + let b = String::from("abracadabra").ish(); + let c = Stringish::new(); + + assert_eq!(a, "Hello, world!"); + assert_eq!(b, "abracadabra"); + assert!(c.is_empty()); +} + +#[test] +fn equality() { + let a = "foobar".ish(); + let b = String::from("foobar").ish(); + + assert_eq!(a, b); + assert_eq!(a, "foobar"); + assert_eq!(b, "foobar"); + assert_eq!(a, *"foobar"); + assert_eq!(b, *"foobar"); + assert_eq!(a, String::from("foobar")); + assert_eq!(b, String::from("foobar")); + assert_eq!(a, Cow::Borrowed("foobar")); + assert_eq!(b, Cow::Borrowed("foobar")); + assert_eq!(a, Cow::Owned(String::from("foobar"))); + assert_eq!(b, Cow::Owned(String::from("foobar"))); +} + +#[test] +fn clone_hash() { + let a = "sdyajshdiask".ish(); + let b = String::from("iujhioasd").ish(); + let c = a.clone(); + let d = b.clone(); + + assert_eq!(a, c); + assert_eq!(a.as_ptr(), c.as_ptr()); + assert_eq!(b, d); + assert_ne!(b.as_ptr(), d.as_ptr()); + + let mut map = HashMap::new(); + map.insert(a, 1); + map.insert(b, 2); + + assert_eq!(map[&c], 1); + assert_eq!(map[&d], 2); +} + +#[test] +fn borrowedness() { + let mut a = "abcdef".ish(); + let mut b = String::from("abcdef").ish(); + let mut c = "".ish(); + let mut d = String::new().ish(); + + assert_eq!(a.is_borrowed(), Some(true)); + assert_eq!(b.is_owned(), Some(true)); + assert_eq!(c.is_borrowed(), None); + assert_eq!(d.is_owned(), None); + + a.make_owned(); + b.make_owned(); + c.make_owned(); + d.make_owned(); + + assert_eq!(a.is_borrowed(), Some(false)); + assert_eq!(b.is_owned(), Some(true)); + assert_eq!(c.is_borrowed(), None); + assert_eq!(d.is_owned(), None); + + a = "abcdef".ish(); + c = "".ish(); + + assert_eq!(a.into_owned(), "abcdef"); + assert_eq!(b.into_owned(), "abcdef"); + assert_eq!(c.into_owned(), ""); + assert_eq!(d.into_owned(), ""); +} + +#[test] +fn mutation() { + let mut a = "Hello".ish(); + assert_eq!(a, "Hello"); + + a.mutate().push_str(", world!"); + assert_eq!(a, "Hello, world!"); + + a.mutate_with(|s| { + s.truncate(1); + s.push('i'); + s.make_ascii_lowercase(); + }); + assert_eq!(a, "hi"); +}