stringish/src/lib.rs

406 lines
12 KiB
Rust

#![allow(unknown_lints)] // `warn(rustdoc::all)` triggers this for some reason
#![warn(rustdoc::all)]
#![warn(clippy::all)]
#![warn(clippy::pedantic)]
//! A smaller [`Cow<'static, str>`](Cow).
//!
//! See more at the type documentation for [`Stringish`].
mod impls;
#[cfg(test)]
mod test;
use std::{
borrow::Cow,
mem::{self, ManuallyDrop},
ops::{Deref, DerefMut},
ptr::NonNull,
slice, str,
};
/// A smaller [`Cow<'static, str>`](Cow).
///
/// This type can represent either an [`&'static str`](prim@str), or a [`String`], and fits within
/// 3 words (the same size as `String`). It does this by storing a capacity of `0` when it is
/// borrowed. However, this does mean that it is impossible to distinguish between borrowed and
/// owned when `capacity == len == 0`, but in that case it really doesn't matter much anyway since
/// an owned `String` with capacity equal to zero has not yet allocated.
///
/// Note that this method of distinguishing a borrowed string from an owned string still allows
/// storing a [`NonNull`] pointer, So this type is *still* 3 words when in an [`Option`].
///
/// When doing non-mutating actions such as [`Deref::deref`], the string remains in whatever state
/// it was in. When doing mutating actions such as [`DerefMut::deref_mut`], the string is made owned.
///
/// ## Mutation as a `String`
///
/// Since `Stringish` does not store a `String` internally, it cannot implement
/// `DerefMut<Target = String>`. As a result, [`Stringish::mutate`] exists instead. When `mutate`
/// is called, `self` is replaced with `Stringish::new()`, and the original value is converted to
/// a `String` and stored in the returned `MutationGuard`. When the guard is dropped, that `String`
/// is converted back into a `Stringish` and put back into where it came from. This allows clients
/// to mutate a `Stringish` as if it were a `String`.
pub struct Stringish {
ptr: NonNull<u8>,
len: usize,
cap: usize,
}
// SAFETY: this can be `Send + Sync` for the same reasons `String` can be
unsafe impl Send for Stringish {}
unsafe impl Sync for Stringish {}
impl Drop for Stringish {
fn drop(&mut self) {
if let Some(true) = self.is_owned() {
drop(mem::take(self).into_owned());
}
}
}
impl Default for Stringish {
fn default() -> Self {
Self::new()
}
}
impl Stringish {
//////////////
// Creation //
//////////////
/// Creates a new empty `Stringish`.
#[must_use]
pub const fn new() -> Self {
Self {
ptr: NonNull::dangling(),
len: 0,
cap: 0,
}
}
/// Creates a new borrowed `Stringish` from the provided [`&'static str`](prim@str).
///
/// Alternatively, use the [`Ish::ish`] method.
#[must_use]
pub const fn new_borrowed(s: &'static str) -> Self {
Self {
// SAFETY: `s.as_ptr()` is never null
ptr: unsafe { NonNull::new_unchecked(s.as_ptr() as *mut u8) },
len: s.len(),
cap: 0,
}
}
/// Creates a new owned `Stringish` from the provided [`String`].
///
/// Alternatively, use the [`Ish::ish`] method.
#[must_use]
pub fn new_owned(s: String) -> Self {
// converting to a `Vec` is necessary since `s.as_mut_ptr()` would call `str::as_mut_ptr`
// through deref coercion, which would only give us a pointer with provenance to the
// initialized part of the `String`, which is not enough to deallocate.
let mut v = ManuallyDrop::new(s.into_bytes());
Self {
// SAFETY: `v.as_mut_ptr()` is never null
ptr: unsafe { NonNull::new_unchecked(v.as_mut_ptr()) },
len: v.len(),
cap: v.capacity(),
}
}
/// Creates a new `Stringish` from the provided [`Cow<'static, str>`](Cow).
///
/// If the given `Cow` is [`Cow::Borrowed`], the returned `Stringish` is borrowed. Otherwise,
/// it is owned.
///
/// Alternatively, use the [`Ish::ish`] method.
#[must_use]
pub fn from_cow(cow: Cow<'static, str>) -> Self {
match cow {
Cow::Borrowed(s) => Self::new_borrowed(s),
Cow::Owned(s) => Self::new_owned(s),
}
}
///////////////////
// Borrowed-ness //
///////////////////
/// Checks if the `Stringish` is borrowed.
///
/// If the `Stringish` is empty, returns [`None`]. Otherwise, returns `true` if the `Stringish`
/// is borrowed and `false` otherwise.
#[must_use]
pub const fn is_borrowed(&self) -> Option<bool> {
if self.len == 0 {
None
} else {
Some(self.cap == 0)
}
}
/// Checks if the `Stringish` is owned.
///
/// If the `Stringish` is empty, returns [`None`]. Otherwise, returns `true` if the `Stringish`
/// is owned and `false` otherwise.
#[must_use]
pub const fn is_owned(&self) -> Option<bool> {
if self.len == 0 {
None
} else {
Some(self.cap != 0)
}
}
/// Converts a borrowed `Stringish` into an owned `Stringish`, in-place.
///
/// If the `Stringish` is borrowed, the data is copied into a new allocation and the
/// `Stringish` becomes owned. Otherwise, nothing happens.
pub fn make_owned(&mut self) {
if let Some(s) = self.as_static_str() {
*self = Stringish::new_owned(String::from(s));
}
}
/// Converts a `Stringish` into an owned [`String`].
///
/// Calls [`Stringish::make_owned`] and then converts into a `String`.
#[must_use = "`self` will be dropped if the result is not used"]
pub fn into_owned(mut self) -> String {
self.make_owned();
let this = ManuallyDrop::new(self);
// SAFETY: after `make_owned`, we basically act just like a `String` and uphold the same invariants
unsafe { String::from_raw_parts(this.ptr.as_ptr(), this.len, this.cap) }
}
/// Returns the inner [`&'static str`](prim@str) if the `Stringish` is borrowed.
#[must_use]
pub fn as_static_str(&self) -> Option<&'static str> {
if let None | Some(true) = self.is_borrowed() {
// SAFETY: if we are borrowed then we borrow a `&'static str`, so lifetime extension is safe
Some(unsafe { &*(self.as_str() as *const str) })
} else {
None
}
}
/// Converts the `Stringish` into a [`Cow<'static, str>`](Cow).
///
/// This is the inverse of [`Stringish::from_cow`]
#[must_use]
pub fn into_cow(self) -> Cow<'static, str> {
if let Some(s) = self.as_static_str() {
Cow::Borrowed(s)
} else {
Cow::Owned(self.into_owned())
}
}
/// Consumes and leaks the `Stringish`, returning an immutable reference to the contents.
///
/// If the `Stringish` is borrowed, no allocation needs to be created since it already contains
/// a [`&'static str`](prim@str).
///
/// If you wish to get a mutable reference, see [`String::leak`].
#[must_use]
pub fn leak(self) -> &'static str {
if let Some(s) = self.as_static_str() {
s
} else {
// SAFETY: workaround for `String::leak` being unstable
unsafe { str::from_utf8_unchecked(self.into_owned().into_bytes().leak()) }
}
}
/////////////////
// Reborrowing //
/////////////////
/// Returns a byte slice of this `Stringish`'s contents.
#[must_use]
pub const fn as_bytes(&self) -> &[u8] {
// SAFETY: `self.ptr` always points to `self.len` valid bytes
unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) }
}
/// Returns a mutable byte slice of this `Stringish`'s contents.
///
/// If the `Stringish`, it is made owned.
///
/// # Safety
///
/// Mutating the bytes such that they are no longer valid UTF-8 is unsound.
#[must_use]
pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
self.make_owned();
// SAFETY: `self.ptr` always points to `self.len` valid bytes
unsafe { slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
}
/// Returns a string slice of this `Stringish`'s contents.
#[must_use]
pub const fn as_str(&self) -> &str {
// SAFETY: `self.ptr` always points to valid UTF-8
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
/// Returns a mutable string slice of this `Stringish`'s contents.
///
/// If the `Stringish`, it is made owned.
#[must_use]
pub fn as_mut_str(&mut self) -> &mut str {
// SAFETY: `self.ptr` always points to valid UTF-8 and we never mutate the bytes into invalid UTF-8
unsafe { str::from_utf8_unchecked_mut(self.as_bytes_mut()) }
}
}
/// Creates a new borrowed `Stringish` from the given [`&'static str`](prim@str).
///
/// See also: [`Stringish::new_borrowed`], [`<&'static str>::ish`]
impl From<&'static str> for Stringish {
fn from(s: &'static str) -> Self {
Self::new_borrowed(s)
}
}
/// Creates a new owned `Stringish` from the given [`String`].
///
/// See also: [`Stringish::new_owned`], [`String::ish`]
impl From<String> for Stringish {
fn from(s: String) -> Self {
Self::new_owned(s)
}
}
/// Creates a new `Stringish` from the given [`Cow<'static, str>`](Cow).
///
/// See also: [`Stringish::from_cow`], [`Cow<'static, str>::ish`]
impl From<Cow<'static, str>> for Stringish {
fn from(cow: Cow<'static, str>) -> Self {
Self::from_cow(cow)
}
}
/// Converts a `Stringish` into a [`Cow<'static, str>`](Cow).
///
/// See also: [`Stringish::into_cow`]
impl From<Stringish> for Cow<'static, str> {
fn from(stringish: Stringish) -> Self {
stringish.into_cow()
}
}
impl Deref for Stringish {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl DerefMut for Stringish {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut_str()
}
}
impl Stringish {
////////////////////////////
// Mutation as a `String` //
////////////////////////////
/// Allows mutation of this `Stringish` as a `String` using a guard.
///
/// Returns a [`MutationGuard`] that implements [`DerefMut<Target = String>`](DerefMut).
///
/// See more in [Mutation as a `String`](Stringish#mutation-as-a-string).
#[must_use = "call `make_owned` if you do not use the returned `MutationGuard`"]
pub fn mutate(&mut self) -> MutationGuard<'_> {
let this = mem::take(self);
MutationGuard {
stringish: self,
string: ManuallyDrop::new(this.into_owned()),
}
}
/// Allows mutation of this `Stringish` as a `String` using a closure.
///
/// See more in [Mutation as a `String`](Stringish#mutation-as-a-string).
pub fn mutate_with(&mut self, f: impl FnOnce(&mut String)) {
let mut guard = self.mutate();
f(&mut guard);
drop(guard); // explicitly drop guard for clarity
}
}
/// Updates the borrowed [`Stringish`] when dropped with the [`String`] stored inside.
///
/// Implements [`DerefMut<Target = String>`](DerefMut) to allow mutating the `Stringish` as a `String`.
///
/// See more in [Mutation as a `String`](Stringish#mutation-as-a-string).
pub struct MutationGuard<'a> {
stringish: &'a mut Stringish,
string: ManuallyDrop<String>,
}
impl Deref for MutationGuard<'_> {
type Target = String;
fn deref(&self) -> &Self::Target {
&self.string
}
}
impl DerefMut for MutationGuard<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.string
}
}
impl Drop for MutationGuard<'_> {
fn drop(&mut self) {
// SAFETY: `self.string.value` is never used again since we are being dropped
let string = unsafe { ManuallyDrop::take(&mut self.string) };
*self.stringish = Stringish::new_owned(string);
}
}
mod sealant {
use std::borrow::Cow;
pub trait Sealed {}
impl Sealed for &'static str {}
impl Sealed for String {}
impl Sealed for Cow<'static, str> {}
}
/// Converts something string-ish to a [`Stringish`].
///
/// Implemented for [`&'static str`](prim@str), [`String`], and [`Cow<'static, str>`](Cow).
pub trait Ish: sealant::Sealed {
fn ish(self) -> Stringish;
}
impl Ish for &'static str {
fn ish(self) -> Stringish {
Stringish::new_borrowed(self)
}
}
impl Ish for String {
fn ish(self) -> Stringish {
Stringish::new_owned(self)
}
}
impl Ish for Cow<'static, str> {
fn ish(self) -> Stringish {
Stringish::from_cow(self)
}
}