gccrs: format-args: Start storing string in Rust memory

gcc/rust/ChangeLog:

	* ast/rust-fmt.cc (ffi::RustHamster::to_string): New.
	(Pieces::collect): Adapt to use new handle API.
	(Pieces::~Pieces): Likewise.
	(Pieces::Pieces): Likewise.
	(Pieces::operator=): Likewise.
	* ast/rust-fmt.h (struct RustString): Add members.
	(struct FormatArgsHandle): New.
	(clone_pieces): Adapt for new FFI API.
	(destroy_pieces): Likewise.
	(struct Pieces): Store new FormatArgsHandle type.
	* expand/rust-expand-format-args.cc (expand_format_args): Use proper
	namespace.
	* resolve/rust-ast-resolve-base.cc (ResolverBase::visit): FormatArgs
	nodes are already resolved, so do nothing.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Use new Handle struct and expose it.
This commit is contained in:
Arthur Cohen 2024-02-22 16:26:40 +01:00
parent 5ed71ad2a2
commit 9b540c4299
5 changed files with 134 additions and 60 deletions

View File

@ -22,44 +22,48 @@
namespace Rust { namespace Rust {
namespace Fmt { namespace Fmt {
Pieces std::string
Pieces::collect (std::string &&to_parse, bool append_newline) ffi::RustHamster::to_string () const
{ {
auto piece_slice = collect_pieces (to_parse.c_str (), append_newline); return std::string (ptr, len);
}
Pieces
Pieces::collect (const std::string &to_parse, bool append_newline)
{
auto handle = ffi::collect_pieces (to_parse.c_str (), append_newline);
// this performs multiple copies, can we avoid them maybe? // this performs multiple copies, can we avoid them maybe?
// TODO: Instead of just creating a vec of, basically, `ffi::Piece`s, we // TODO: Instead of just creating a vec of, basically, `ffi::Piece`s, we
// should transform them into the proper C++ type which we can work with. so // should transform them into the proper C++ type which we can work with. so
// transform all the strings into C++ strings? all the Option<T> into // transform all the strings into C++ strings? all the Option<T> into
// tl::optional<T>? // tl::optional<T>?
auto pieces = std::vector<Piece> (piece_slice.base_ptr, auto pieces_vector = std::vector<ffi::Piece> (handle.piece_slice.base_ptr,
piece_slice.base_ptr + piece_slice.len); handle.piece_slice.base_ptr
+ handle.piece_slice.len);
return Pieces (std::move (pieces), piece_slice, std::move (to_parse)); return Pieces (handle, std::move (pieces_vector));
} }
Pieces::~Pieces () { destroy_pieces (slice); } Pieces::~Pieces () { ffi::destroy_pieces (handle); }
Pieces::Pieces (const Pieces &other) Pieces::Pieces (const Pieces &other) : pieces_vector (other.pieces_vector)
: pieces_vector (other.pieces_vector), to_parse (other.to_parse)
{ {
slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap); handle = ffi::clone_pieces (other.handle);
} }
Pieces & Pieces &
Pieces::operator= (const Pieces &other) Pieces::operator= (const Pieces &other)
{ {
slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap); handle = ffi::clone_pieces (other.handle);
to_parse = other.to_parse; pieces_vector = other.pieces_vector;
return *this; return *this;
} }
Pieces::Pieces (Pieces &&other) Pieces::Pieces (Pieces &&other)
: pieces_vector (std::move (other.pieces_vector)), : pieces_vector (std::move (other.pieces_vector)),
slice ( handle (clone_pieces (other.handle))
clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap)),
to_parse (std::move (other.to_parse))
{} {}
} // namespace Fmt } // namespace Fmt

View File

@ -20,15 +20,21 @@
#define RUST_FMT_H #define RUST_FMT_H
#include "rust-system.h" #include "rust-system.h"
#include <cstddef>
// FIXME: How to encode Option? // FIXME: How to encode Option?
namespace Rust { namespace Rust {
namespace Fmt { namespace Fmt {
namespace ffi {
struct RustHamster struct RustHamster
{ {
// hehe const char *ptr;
size_t len;
std::string to_string () const;
}; };
/// Enum of alignments which are supported. /// Enum of alignments which are supported.
@ -240,21 +246,36 @@ struct PieceSlice
size_t cap; size_t cap;
}; };
struct RustString
{
const unsigned char *ptr;
size_t len;
size_t cap;
};
struct FormatArgsHandle
{
PieceSlice piece_slice;
RustString rust_string;
};
extern "C" { extern "C" {
PieceSlice FormatArgsHandle
collect_pieces (const char *input, bool append_newline); collect_pieces (const char *input, bool append_newline);
PieceSlice FormatArgsHandle
clone_pieces (const Piece *base_ptr, size_t len, size_t cap); clone_pieces (const FormatArgsHandle &);
void destroy_pieces (PieceSlice); void destroy_pieces (FormatArgsHandle);
} // extern "C" } // extern "C"
} // namespace ffi
struct Pieces struct Pieces
{ {
static Pieces collect (std::string &&to_parse, bool append_newline); static Pieces collect (const std::string &to_parse, bool append_newline);
~Pieces (); ~Pieces ();
Pieces (const Pieces &other); Pieces (const Pieces &other);
@ -262,7 +283,7 @@ struct Pieces
Pieces (Pieces &&other); Pieces (Pieces &&other);
const std::vector<Piece> &get_pieces () const { return pieces_vector; } const std::vector<ffi::Piece> &get_pieces () const { return pieces_vector; }
// { // {
// slice = clone_pieces (&other.slice); // slice = clone_pieces (&other.slice);
@ -272,19 +293,16 @@ struct Pieces
// } // }
private: private:
Pieces (std::vector<Piece> &&pieces_vector, PieceSlice slice, Pieces (ffi::FormatArgsHandle handle, std::vector<ffi::Piece> &&pieces_vector)
std::string &&to_parse) : pieces_vector (std::move (pieces_vector)), handle (handle)
: pieces_vector (std::move (pieces_vector)), slice (slice),
to_parse (std::move (to_parse))
{} {}
std::vector<Piece> pieces_vector; std::vector<ffi::Piece> pieces_vector;
// this memory is held for FFI reasons - it needs to be released and cloned // this memory is held for FFI reasons - it needs to be released and cloned
// precisely, so try to not access it/modify it if possible. you should // precisely, so try to not access it/modify it if possible. you should
// instead work with `pieces_vector` // instead work with `pieces_vector`
PieceSlice slice; ffi::FormatArgsHandle handle;
std::string to_parse;
}; };
} // namespace Fmt } // namespace Fmt

View File

@ -28,10 +28,10 @@ expand_format_args (AST::FormatArgs &fmt)
{ {
switch (node.tag) switch (node.tag)
{ {
case Fmt::Piece::Tag::String: case Fmt::ffi::Piece::Tag::String:
// rust_debug ("[ARTHUR]: %s", node.string._0.c_str ()); // rust_debug ("[ARTHUR]: %s", node.string._0.c_str ());
case Fmt::Piece::Tag::NextArgument: case Fmt::ffi::Piece::Tag::NextArgument:
rust_debug ("[ARTHUR]: NextArgument"); rust_debug ("[ARTHUR]: NextArgument");
break; break;
} }

View File

@ -648,10 +648,7 @@ ResolverBase::visit (AST::FunctionParam &)
void void
ResolverBase::visit (AST::FormatArgs &fmt) ResolverBase::visit (AST::FormatArgs &fmt)
{ {}
rust_sorry_at (fmt.get_locus (), "%s:%u: unimplemented FormatArgs visitor",
__FILE__, __LINE__);
}
} // namespace Resolver } // namespace Resolver
} // namespace Rust } // namespace Rust

View File

@ -27,6 +27,23 @@ where
mod ffi { mod ffi {
use super::IntoFFI; use super::IntoFFI;
// FIXME: We need to ensure we deal with memory properly - whether it's owned by the C++ side or the Rust side
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
#[repr(C)]
pub struct RustHamster {
ptr: *const u8,
len: usize,
}
impl<'a> From<&'a str> for RustHamster {
fn from(s: &'a str) -> RustHamster {
RustHamster {
ptr: s.as_ptr(),
len: s.len(),
}
}
}
// Note: copied from rustc_span // Note: copied from rustc_span
/// Range inside of a `Span` used for diagnostics when we only have access to relative positions. /// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
#[derive(Copy, Clone, PartialEq, Eq, Debug)] #[derive(Copy, Clone, PartialEq, Eq, Debug)]
@ -81,7 +98,7 @@ mod ffi {
#[repr(C)] #[repr(C)]
pub enum Piece<'a> { pub enum Piece<'a> {
/// A literal string which should directly be emitted /// A literal string which should directly be emitted
String(&'a str), String(RustHamster),
/// This describes that formatting should process the next argument (as /// This describes that formatting should process the next argument (as
/// specified inside) for emission. /// specified inside) for emission.
// do we need a pointer here? we're doing big cloning anyway // do we need a pointer here? we're doing big cloning anyway
@ -201,7 +218,7 @@ mod ffi {
impl<'a> From<generic_format_parser::Piece<'a>> for Piece<'a> { impl<'a> From<generic_format_parser::Piece<'a>> for Piece<'a> {
fn from(old: generic_format_parser::Piece<'a>) -> Self { fn from(old: generic_format_parser::Piece<'a>) -> Self {
match old { match old {
generic_format_parser::Piece::String(x) => Piece::String(x), generic_format_parser::Piece::String(x) => Piece::String(x.into()),
generic_format_parser::Piece::NextArgument(x) => { generic_format_parser::Piece::NextArgument(x) => {
// FIXME: This is problematic - if we do this, then we probably run into the issue that the Box // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box
// is freed at the end of the call to collect_pieces. if we just .leak() it, then we have // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have
@ -336,53 +353,91 @@ pub struct PieceSlice {
cap: usize, cap: usize,
} }
#[no_mangle] #[repr(C)]
pub extern "C" fn collect_pieces(input: *const libc::c_char, append_newline: bool) -> PieceSlice { // FIXME: we should probably use FFIString here
dbg!(input); pub struct RustString {
ptr: *const u8,
len: usize,
cap: usize,
}
#[repr(C)]
pub struct FormatArgsHandle(PieceSlice, RustString);
#[no_mangle]
pub extern "C" fn collect_pieces(
input: *const libc::c_char,
append_newline: bool,
) -> FormatArgsHandle {
// FIXME: Add comment // FIXME: Add comment
let str = unsafe { CStr::from_ptr(input) }; let str = unsafe { CStr::from_ptr(input) };
let str = str.to_str().unwrap().to_owned();
// we are never going to free this string here (we leak it later on), so we can extend its lifetime
// to send it across an FFI boundary.
// FIXME: Is that correct?
let s = &str;
let s = unsafe { std::mem::transmute::<&'_ str, &'static str>(s) };
// FIXME: No unwrap // FIXME: No unwrap
let pieces: Vec<ffi::Piece<'_>> = let pieces: Vec<ffi::Piece<'_>> = rust::collect_pieces(s, None, None, append_newline)
rust::collect_pieces(str.to_str().unwrap(), None, None, append_newline) .into_iter()
.into_iter() .map(Into::into)
.map(Into::into) .collect();
.collect();
println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len()); let piece_slice = PieceSlice {
PieceSlice {
len: pieces.len(), len: pieces.len(),
cap: pieces.capacity(), cap: pieces.capacity(),
base_ptr: pieces.leak().as_mut_ptr(), base_ptr: pieces.leak().as_mut_ptr(),
} };
let rust_string = RustString {
len: str.len(),
cap: str.capacity(),
ptr: str.leak().as_ptr(),
};
FormatArgsHandle(piece_slice, rust_string)
} }
#[no_mangle] #[no_mangle]
pub unsafe extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) { pub unsafe extern "C" fn destroy_pieces(FormatArgsHandle(piece_slice, s): FormatArgsHandle) {
eprintln!("[ARTHUR] destroying pieces: {base_ptr:?} {len} {cap}"); let PieceSlice { base_ptr, len, cap } = piece_slice;
drop(Vec::from_raw_parts(base_ptr, len, cap)); drop(Vec::from_raw_parts(base_ptr, len, cap));
let RustString { ptr, len, cap } = s;
drop(String::from_raw_parts(ptr as *mut u8, len, cap));
} }
#[no_mangle] #[no_mangle]
pub extern "C" fn clone_pieces( pub extern "C" fn clone_pieces(
base_ptr: *mut ffi::Piece<'static>, FormatArgsHandle(piece_slice, s): &FormatArgsHandle,
len: usize, ) -> FormatArgsHandle {
cap: usize, let PieceSlice { base_ptr, len, cap } = *piece_slice;
) -> PieceSlice {
eprintln!("[ARTHUR] cloning pieces: {base_ptr:?} {len} {cap}");
let v = unsafe { Vec::from_raw_parts(base_ptr, len, cap) }; let v = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
let cloned_v = v.clone(); let cloned_v = v.clone();
// FIXME: Add documentation // FIXME: Add documentation
v.leak(); v.leak();
PieceSlice { let piece_slice = PieceSlice {
len: cloned_v.len(), len: cloned_v.len(),
cap: cloned_v.capacity(), cap: cloned_v.capacity(),
base_ptr: dbg!(cloned_v.leak().as_mut_ptr()), base_ptr: cloned_v.leak().as_mut_ptr(),
} };
let RustString { ptr, len, cap } = *s;
let s = unsafe { String::from_raw_parts(ptr as *mut u8, len, cap) };
let cloned_s = s.clone();
// FIXME: Documentation
s.leak();
let rust_string = RustString {
len: cloned_s.len(),
cap: cloned_s.capacity(),
ptr: cloned_s.leak().as_ptr(),
};
FormatArgsHandle(piece_slice, rust_string)
} }