From 9b540c4299a6567d0e7642a4664dc76291dc8cbc Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Thu, 22 Feb 2024 16:26:40 +0100 Subject: [PATCH] gccrs: format-args: Start storing string in Rust memory gcc/rust/ChangeLog: * ast/rust-fmt.cc (ffi::RustHamster::to_string): New. (Pieces::collect): Adapt to use new handle API. (Pieces::~Pieces): Likewise. (Pieces::Pieces): Likewise. (Pieces::operator=): Likewise. * ast/rust-fmt.h (struct RustString): Add members. (struct FormatArgsHandle): New. (clone_pieces): Adapt for new FFI API. (destroy_pieces): Likewise. (struct Pieces): Store new FormatArgsHandle type. * expand/rust-expand-format-args.cc (expand_format_args): Use proper namespace. * resolve/rust-ast-resolve-base.cc (ResolverBase::visit): FormatArgs nodes are already resolved, so do nothing. libgrust/ChangeLog: * libformat_parser/src/lib.rs: Use new Handle struct and expose it. --- gcc/rust/ast/rust-fmt.cc | 34 ++++--- gcc/rust/ast/rust-fmt.h | 46 ++++++--- gcc/rust/expand/rust-expand-format-args.cc | 4 +- gcc/rust/resolve/rust-ast-resolve-base.cc | 5 +- libgrust/libformat_parser/src/lib.rs | 105 ++++++++++++++++----- 5 files changed, 134 insertions(+), 60 deletions(-) diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc index b82e089fc41..fda02e530fd 100644 --- a/gcc/rust/ast/rust-fmt.cc +++ b/gcc/rust/ast/rust-fmt.cc @@ -22,44 +22,48 @@ namespace Rust { namespace Fmt { -Pieces -Pieces::collect (std::string &&to_parse, bool append_newline) +std::string +ffi::RustHamster::to_string () const { - auto piece_slice = collect_pieces (to_parse.c_str (), append_newline); + return std::string (ptr, len); +} + +Pieces +Pieces::collect (const std::string &to_parse, bool append_newline) +{ + auto handle = ffi::collect_pieces (to_parse.c_str (), append_newline); // this performs multiple copies, can we avoid them maybe? // TODO: Instead of just creating a vec of, basically, `ffi::Piece`s, we // should transform them into the proper C++ type which we can work with. so // transform all the strings into C++ strings? all the Option into // tl::optional? - auto pieces = std::vector (piece_slice.base_ptr, - piece_slice.base_ptr + piece_slice.len); + auto pieces_vector = std::vector (handle.piece_slice.base_ptr, + handle.piece_slice.base_ptr + + handle.piece_slice.len); - return Pieces (std::move (pieces), piece_slice, std::move (to_parse)); + return Pieces (handle, std::move (pieces_vector)); } -Pieces::~Pieces () { destroy_pieces (slice); } +Pieces::~Pieces () { ffi::destroy_pieces (handle); } -Pieces::Pieces (const Pieces &other) - : pieces_vector (other.pieces_vector), to_parse (other.to_parse) +Pieces::Pieces (const Pieces &other) : pieces_vector (other.pieces_vector) { - slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap); + handle = ffi::clone_pieces (other.handle); } Pieces & Pieces::operator= (const Pieces &other) { - slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap); - to_parse = other.to_parse; + handle = ffi::clone_pieces (other.handle); + pieces_vector = other.pieces_vector; return *this; } Pieces::Pieces (Pieces &&other) : pieces_vector (std::move (other.pieces_vector)), - slice ( - clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap)), - to_parse (std::move (other.to_parse)) + handle (clone_pieces (other.handle)) {} } // namespace Fmt diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h index ba412f9958c..6a0c116b841 100644 --- a/gcc/rust/ast/rust-fmt.h +++ b/gcc/rust/ast/rust-fmt.h @@ -20,15 +20,21 @@ #define RUST_FMT_H #include "rust-system.h" +#include // FIXME: How to encode Option? namespace Rust { namespace Fmt { +namespace ffi { + struct RustHamster { - // hehe + const char *ptr; + size_t len; + + std::string to_string () const; }; /// Enum of alignments which are supported. @@ -240,21 +246,36 @@ struct PieceSlice size_t cap; }; +struct RustString +{ + const unsigned char *ptr; + size_t len; + size_t cap; +}; + +struct FormatArgsHandle +{ + PieceSlice piece_slice; + RustString rust_string; +}; + extern "C" { -PieceSlice +FormatArgsHandle collect_pieces (const char *input, bool append_newline); -PieceSlice -clone_pieces (const Piece *base_ptr, size_t len, size_t cap); +FormatArgsHandle +clone_pieces (const FormatArgsHandle &); -void destroy_pieces (PieceSlice); +void destroy_pieces (FormatArgsHandle); } // extern "C" +} // namespace ffi + struct Pieces { - static Pieces collect (std::string &&to_parse, bool append_newline); + static Pieces collect (const std::string &to_parse, bool append_newline); ~Pieces (); Pieces (const Pieces &other); @@ -262,7 +283,7 @@ struct Pieces Pieces (Pieces &&other); - const std::vector &get_pieces () const { return pieces_vector; } + const std::vector &get_pieces () const { return pieces_vector; } // { // slice = clone_pieces (&other.slice); @@ -272,19 +293,16 @@ struct Pieces // } private: - Pieces (std::vector &&pieces_vector, PieceSlice slice, - std::string &&to_parse) - : pieces_vector (std::move (pieces_vector)), slice (slice), - to_parse (std::move (to_parse)) + Pieces (ffi::FormatArgsHandle handle, std::vector &&pieces_vector) + : pieces_vector (std::move (pieces_vector)), handle (handle) {} - std::vector pieces_vector; + std::vector pieces_vector; // this memory is held for FFI reasons - it needs to be released and cloned // precisely, so try to not access it/modify it if possible. you should // instead work with `pieces_vector` - PieceSlice slice; - std::string to_parse; + ffi::FormatArgsHandle handle; }; } // namespace Fmt diff --git a/gcc/rust/expand/rust-expand-format-args.cc b/gcc/rust/expand/rust-expand-format-args.cc index 52249cb17e3..276ffd58c50 100644 --- a/gcc/rust/expand/rust-expand-format-args.cc +++ b/gcc/rust/expand/rust-expand-format-args.cc @@ -28,10 +28,10 @@ expand_format_args (AST::FormatArgs &fmt) { switch (node.tag) { - case Fmt::Piece::Tag::String: + case Fmt::ffi::Piece::Tag::String: // rust_debug ("[ARTHUR]: %s", node.string._0.c_str ()); - case Fmt::Piece::Tag::NextArgument: + case Fmt::ffi::Piece::Tag::NextArgument: rust_debug ("[ARTHUR]: NextArgument"); break; } diff --git a/gcc/rust/resolve/rust-ast-resolve-base.cc b/gcc/rust/resolve/rust-ast-resolve-base.cc index 1939a2056e4..fa1be197941 100644 --- a/gcc/rust/resolve/rust-ast-resolve-base.cc +++ b/gcc/rust/resolve/rust-ast-resolve-base.cc @@ -648,10 +648,7 @@ ResolverBase::visit (AST::FunctionParam &) void ResolverBase::visit (AST::FormatArgs &fmt) -{ - rust_sorry_at (fmt.get_locus (), "%s:%u: unimplemented FormatArgs visitor", - __FILE__, __LINE__); -} +{} } // namespace Resolver } // namespace Rust diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs index 6bf78c4f2a8..84fac38e224 100644 --- a/libgrust/libformat_parser/src/lib.rs +++ b/libgrust/libformat_parser/src/lib.rs @@ -27,6 +27,23 @@ where mod ffi { use super::IntoFFI; + // FIXME: We need to ensure we deal with memory properly - whether it's owned by the C++ side or the Rust side + #[derive(Copy, Clone, PartialEq, Eq, Debug)] + #[repr(C)] + pub struct RustHamster { + ptr: *const u8, + len: usize, + } + + impl<'a> From<&'a str> for RustHamster { + fn from(s: &'a str) -> RustHamster { + RustHamster { + ptr: s.as_ptr(), + len: s.len(), + } + } + } + // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -81,7 +98,7 @@ mod ffi { #[repr(C)] pub enum Piece<'a> { /// A literal string which should directly be emitted - String(&'a str), + String(RustHamster), /// This describes that formatting should process the next argument (as /// specified inside) for emission. // do we need a pointer here? we're doing big cloning anyway @@ -201,7 +218,7 @@ mod ffi { impl<'a> From> for Piece<'a> { fn from(old: generic_format_parser::Piece<'a>) -> Self { match old { - generic_format_parser::Piece::String(x) => Piece::String(x), + generic_format_parser::Piece::String(x) => Piece::String(x.into()), generic_format_parser::Piece::NextArgument(x) => { // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have @@ -336,53 +353,91 @@ pub struct PieceSlice { cap: usize, } -#[no_mangle] -pub extern "C" fn collect_pieces(input: *const libc::c_char, append_newline: bool) -> PieceSlice { - dbg!(input); +#[repr(C)] +// FIXME: we should probably use FFIString here +pub struct RustString { + ptr: *const u8, + len: usize, + cap: usize, +} +#[repr(C)] +pub struct FormatArgsHandle(PieceSlice, RustString); + +#[no_mangle] +pub extern "C" fn collect_pieces( + input: *const libc::c_char, + append_newline: bool, +) -> FormatArgsHandle { // FIXME: Add comment let str = unsafe { CStr::from_ptr(input) }; + let str = str.to_str().unwrap().to_owned(); + + // we are never going to free this string here (we leak it later on), so we can extend its lifetime + // to send it across an FFI boundary. + // FIXME: Is that correct? + let s = &str; + let s = unsafe { std::mem::transmute::<&'_ str, &'static str>(s) }; // FIXME: No unwrap - let pieces: Vec> = - rust::collect_pieces(str.to_str().unwrap(), None, None, append_newline) - .into_iter() - .map(Into::into) - .collect(); + let pieces: Vec> = rust::collect_pieces(s, None, None, append_newline) + .into_iter() + .map(Into::into) + .collect(); - println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len()); - - PieceSlice { + let piece_slice = PieceSlice { len: pieces.len(), cap: pieces.capacity(), base_ptr: pieces.leak().as_mut_ptr(), - } + }; + let rust_string = RustString { + len: str.len(), + cap: str.capacity(), + ptr: str.leak().as_ptr(), + }; + + FormatArgsHandle(piece_slice, rust_string) } #[no_mangle] -pub unsafe extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) { - eprintln!("[ARTHUR] destroying pieces: {base_ptr:?} {len} {cap}"); +pub unsafe extern "C" fn destroy_pieces(FormatArgsHandle(piece_slice, s): FormatArgsHandle) { + let PieceSlice { base_ptr, len, cap } = piece_slice; drop(Vec::from_raw_parts(base_ptr, len, cap)); + + let RustString { ptr, len, cap } = s; + drop(String::from_raw_parts(ptr as *mut u8, len, cap)); } #[no_mangle] pub extern "C" fn clone_pieces( - base_ptr: *mut ffi::Piece<'static>, - len: usize, - cap: usize, -) -> PieceSlice { - eprintln!("[ARTHUR] cloning pieces: {base_ptr:?} {len} {cap}"); + FormatArgsHandle(piece_slice, s): &FormatArgsHandle, +) -> FormatArgsHandle { + let PieceSlice { base_ptr, len, cap } = *piece_slice; let v = unsafe { Vec::from_raw_parts(base_ptr, len, cap) }; - let cloned_v = v.clone(); // FIXME: Add documentation v.leak(); - PieceSlice { + let piece_slice = PieceSlice { len: cloned_v.len(), cap: cloned_v.capacity(), - base_ptr: dbg!(cloned_v.leak().as_mut_ptr()), - } + base_ptr: cloned_v.leak().as_mut_ptr(), + }; + + let RustString { ptr, len, cap } = *s; + let s = unsafe { String::from_raw_parts(ptr as *mut u8, len, cap) }; + let cloned_s = s.clone(); + + // FIXME: Documentation + s.leak(); + + let rust_string = RustString { + len: cloned_s.len(), + cap: cloned_s.capacity(), + ptr: cloned_s.leak().as_ptr(), + }; + + FormatArgsHandle(piece_slice, rust_string) }