gccrs: format-args: Fix Rust interface and add input parsing.

gcc/rust/ChangeLog:

	* ast/rust-ast.cc: Make FormatArgs inherit from AST::Expr
	* ast/rust-builtin-ast-nodes.h: Improve FormatArg* nodes and helpers.
	* ast/rust-fmt.cc (Pieces::collect): Fix interface to match FFI function.
	* ast/rust-fmt.h (collect_pieces): Likewise.
	(struct Pieces): Add append_newline parameter.
	* expand/rust-macro-builtins.cc: Add proper parsing of format_args
	input.
	* hir/rust-ast-lower-base.cc: Include diagnostics header.

libgrust/ChangeLog:

	* libformat_parser/src/lib.rs: Switch interface to use more parser
	parameters.
	* libformat_parser/src/bin.rs: Use new interface.
This commit is contained in:
Arthur Cohen 2024-02-16 18:27:22 +01:00
parent d9fa4153c8
commit 68cb878c1d
8 changed files with 416 additions and 78 deletions

View File

@ -19,6 +19,7 @@ along with GCC; see the file COPYING3. If not see
#include "rust-ast.h"
#include "optional.h"
#include "rust-builtin-ast-nodes.h"
#include "rust-system.h"
#include "rust-ast-full.h"
#include "rust-diagnostics.h"
@ -5054,6 +5055,56 @@ FormatArgs::accept_vis (ASTVisitor &vis)
vis.visit (*this);
}
std::string
FormatArgs::as_string () const
{
// FIXME(Arthur): Improve
return "FormatArgs";
}
location_t
FormatArgs::get_locus () const
{
rust_unreachable ();
}
bool
FormatArgs::is_expr_without_block () const
{
return false;
}
void
FormatArgs::mark_for_strip ()
{
marked_for_strip = true;
}
bool
FormatArgs::is_marked_for_strip () const
{
return marked_for_strip;
}
std::vector<Attribute> &
FormatArgs::get_outer_attrs ()
{
rust_unreachable ();
}
void FormatArgs::set_outer_attrs (std::vector<Attribute>)
{
rust_unreachable ();
}
Expr *
FormatArgs::clone_expr_impl () const
{
std::cerr << "[ARTHUR] cloning FormatArgs! " << std::endl;
return new FormatArgs (*this);
}
} // namespace AST
std::ostream &

View File

@ -59,17 +59,10 @@ namespace AST {
// └─┘ └─┘
// positions (could be names, numbers, empty, or `*`)
// FIXME: Merge with the class below this one?
class FormatArgumentKind
{
public:
Identifier &get_ident ()
{
rust_assert (kind == Kind::Captured || kind == Kind::Named);
return ident.value ();
}
private:
enum class Kind
{
Normal,
@ -77,17 +70,97 @@ private:
Captured,
} kind;
Identifier &get_ident ()
{
rust_assert (kind == Kind::Captured || kind == Kind::Named);
return ident.value ();
}
FormatArgumentKind (Kind kind, tl::optional<Identifier> ident)
: kind (kind), ident (ident)
{}
FormatArgumentKind (const FormatArgumentKind &other)
{
kind = other.kind;
ident = other.ident;
}
FormatArgumentKind operator= (const FormatArgumentKind &other)
{
kind = other.kind;
ident = other.ident;
return *this;
}
private:
tl::optional<Identifier> ident;
};
class FormatArgument
{
public:
static FormatArgument normal (std::unique_ptr<Expr> expr)
{
return FormatArgument (FormatArgumentKind::Kind::Normal, tl::nullopt,
std::move (expr));
}
static FormatArgument named (Identifier ident, std::unique_ptr<Expr> expr)
{
return FormatArgument (FormatArgumentKind::Kind::Named, ident,
std::move (expr));
}
static FormatArgument captured (Identifier ident, std::unique_ptr<Expr> expr)
{
return FormatArgument (FormatArgumentKind::Kind::Captured, ident,
std::move (expr));
}
FormatArgument (const FormatArgument &other)
: kind (other.kind), expr (other.expr->clone_expr ())
{}
FormatArgument operator= (const FormatArgument &other)
{
kind = other.kind;
expr = other.expr->clone_expr ();
return *this;
}
private:
FormatArgument (FormatArgumentKind::Kind kind, tl::optional<Identifier> ident,
std::unique_ptr<Expr> expr)
: kind (FormatArgumentKind (kind, ident)), expr (std::move (expr))
{}
FormatArgumentKind kind;
std::unique_ptr<Expr> expr;
};
class FormatArguments
{
public:
FormatArguments () {}
FormatArguments (FormatArguments &&) = default;
FormatArguments (const FormatArguments &other)
{
args = std::vector<FormatArgument> ();
args.reserve (other.args.size ());
for (const auto &arg : other.args)
args.emplace_back (arg);
};
FormatArguments &operator= (const FormatArguments &other) = default;
void push (FormatArgument &&elt) { args.emplace_back (std::move (elt)); }
private:
std::vector<FormatArgument> args;
};
@ -100,7 +173,7 @@ class FormatArguments
// format_args!("result: {}", some_result))` -> `format_args!("heyo result: {}",
// some_result)`
// FIXME: Move to rust-macro.h
class FormatArgs : public Visitable
class FormatArgs : public Expr
{
public:
enum class Newline
@ -109,18 +182,56 @@ public:
No
};
FormatArgs (location_t loc, Fmt::PieceSlice template_str,
FormatArguments arguments)
FormatArgs (location_t loc, Fmt::Pieces &&template_str,
FormatArguments &&arguments)
: loc (loc), template_str (std::move (template_str)),
arguments (std::move (arguments))
{}
void accept_vis (AST::ASTVisitor &vis);
FormatArgs (FormatArgs &&other)
: loc (std::move (other.loc)),
template_str (std::move (other.template_str)),
arguments (std::move (other.arguments))
{
std::cerr << "[ARTHUR] moving FormatArgs" << std::endl;
}
// FIXME: This might be invalid - we are reusing the same memory allocated
// on the Rust side for `other`. This is probably valid as long as we only
// ever read that memory and never write to it.
FormatArgs (const FormatArgs &other)
: loc (other.loc), template_str (other.template_str),
arguments (other.arguments)
{
std::cerr << "[ARTHUR] copying FormatArgs" << std::endl;
}
// FormatArgs &operator= (const FormatArgs &other) = default;
// : template_str (other.template_str), arguments (other.arguments)
// {}
void accept_vis (AST::ASTVisitor &vis) override;
private:
location_t loc;
Fmt::PieceSlice template_str;
// FIXME: This probably needs to be a separate type - it is one in rustc's
// expansion of format_args!(). There is extra handling associated with it.
// we can maybe do that in rust-fmt.cc? in collect_pieces()? like do the
// transformation into something we can handle better
Fmt::Pieces template_str;
FormatArguments arguments;
bool marked_for_strip = false;
protected:
virtual std::string as_string () const override;
virtual location_t get_locus () const override;
virtual bool is_expr_without_block () const override;
virtual void mark_for_strip () override;
virtual bool is_marked_for_strip () const override;
virtual std::vector<Attribute> &get_outer_attrs () override;
virtual void set_outer_attrs (std::vector<Attribute>) override;
virtual Expr *clone_expr_impl () const override;
};
} // namespace AST

View File

@ -23,9 +23,9 @@ namespace Rust {
namespace Fmt {
Pieces
Pieces::collect (std::string &&to_parse)
Pieces::collect (std::string &&to_parse, bool append_newline)
{
auto piece_slice = collect_pieces (to_parse.c_str ());
auto piece_slice = collect_pieces (to_parse.c_str (), append_newline);
rust_debug ("[ARTHUR] %p, %lu", (const void *) piece_slice.base_ptr,
piece_slice.len);
@ -37,7 +37,39 @@ Pieces::collect (std::string &&to_parse)
return Pieces (piece_slice, std::move (to_parse));
}
Pieces::~Pieces () { destroy_pieces (slice); }
Pieces::~Pieces ()
{
std::cerr << "Arthur: destoying pieces. this: " << (void *) this
<< " slice: " << slice.base_ptr << std::endl;
destroy_pieces (slice);
}
Pieces::Pieces (const Pieces &other) : to_parse (other.to_parse)
{
slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap);
std::cerr << "Arthur: copying pieces: other.to_parse: "
<< (void *) other.to_parse.c_str ()
<< " ours to_parse: " << (void *) to_parse.c_str () << std::endl;
// auto pieces = std::vector (slice.base_ptr, slice.base_ptr + slice.len);
}
Pieces &
Pieces::operator= (const Pieces &other)
{
slice = clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap);
to_parse = other.to_parse;
return *this;
}
Pieces::Pieces (Pieces &&other)
: slice (
clone_pieces (other.slice.base_ptr, other.slice.len, other.slice.cap)),
to_parse (std::move (other.to_parse))
{
std::cerr << "Arthur: moving pieces. to_parse: " << (void *) to_parse.c_str ()
<< " base_ptr/slice: " << (void *) slice.base_ptr << std::endl;
}
} // namespace Fmt
} // namespace Rust

View File

@ -222,7 +222,7 @@ struct Piece
struct NextArgument_Body
{
const Argument *_0;
Argument _0;
};
Tag tag;
@ -243,7 +243,10 @@ struct PieceSlice
extern "C" {
PieceSlice
collect_pieces (const char *input);
collect_pieces (const char *input, bool append_newline);
PieceSlice
clone_pieces (const Piece *base_ptr, size_t len, size_t cap);
void destroy_pieces (PieceSlice);
@ -251,9 +254,21 @@ void destroy_pieces (PieceSlice);
struct Pieces
{
static Pieces collect (std::string &&to_parse);
static Pieces collect (std::string &&to_parse, bool append_newline);
~Pieces ();
Pieces (const Pieces &other);
Pieces &operator= (const Pieces &other);
Pieces (Pieces &&other);
// {
// slice = clone_pieces (&other.slice);
// to_parse = other.to_parse;
// return *this;
// }
private:
Pieces (PieceSlice slice, std::string &&to_parse)
: slice (slice), to_parse (std::move (to_parse))

View File

@ -16,6 +16,7 @@
// along with GCC; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#include "expected.h"
#include "libproc_macro_internal/tokenstream.h"
#include "rust-ast-full-decls.h"
#include "rust-builtin-ast-nodes.h"
@ -35,6 +36,7 @@
#include "rust-session-manager.h"
#include "rust-attribute-values.h"
#include "rust-fmt.h"
#include "rust-token.h"
namespace Rust {
@ -956,33 +958,115 @@ MacroBuiltin::stringify_handler (location_t invoc_locus,
return AST::Fragment ({node}, std::move (token));
}
struct FormatArgsInput
{
std::unique_ptr<AST::Expr> format_str;
AST::FormatArguments args;
// bool is_literal?
};
struct FormatArgsParseError
{
enum class Kind
{
MissingArguments
} kind;
};
static tl::expected<FormatArgsInput, FormatArgsParseError>
format_args_parse_arguments (AST::MacroInvocData &invoc)
{
MacroInvocLexer lex (invoc.get_delim_tok_tree ().to_token_stream ());
Parser<MacroInvocLexer> parser (lex);
// TODO: check if EOF - return that format_args!() requires at least one
// argument
auto args = AST::FormatArguments ();
auto last_token_id = macro_end_token (invoc.get_delim_tok_tree (), parser);
std::unique_ptr<AST::Expr> format_str = nullptr;
// TODO: Handle the case where we're not parsing a string literal (macro
// invocation for e.g.)
if (parser.peek_current_token ()->get_id () == STRING_LITERAL)
format_str = parser.parse_literal_expr ();
// TODO: Allow implicit captures ONLY if the the first arg is a string literal
// and not a macro invocation
// TODO: How to consume all of the arguments until the delimiter?
// TODO: What we then want to do is as follows:
// for each token, check if it is an identifier
// yes? is the next token an equal sign (=)
// yes?
// -> if that identifier is already present in our map, error
// out
// -> parse an expression, return a FormatArgument::Named
// no?
// -> if there have been named arguments before, error out
// (positional after named error)
// -> parse an expression, return a FormatArgument::Normal
while (parser.peek_current_token ()->get_id () != last_token_id)
{
parser.skip_token (COMMA);
if (parser.peek_current_token ()->get_id () == IDENTIFIER
&& parser.peek (1)->get_id () == EQUAL)
{
// FIXME: This is ugly - just add a parser.parse_identifier()?
auto ident_tok = parser.peek_current_token ();
auto ident = Identifier (ident_tok);
parser.skip_token (IDENTIFIER);
parser.skip_token (EQUAL);
auto expr = parser.parse_expr ();
// TODO: Handle graciously
if (!expr)
rust_unreachable ();
args.push (AST::FormatArgument::named (ident, std::move (expr)));
}
else
{
auto expr = parser.parse_expr ();
// TODO: Handle graciously
if (!expr)
rust_unreachable ();
args.push (AST::FormatArgument::normal (std::move (expr)));
}
// we need to skip commas, don't we?
}
return FormatArgsInput{std::move (format_str), std::move (args)};
}
tl::optional<AST::Fragment>
MacroBuiltin::format_args_handler (location_t invoc_locus,
AST::MacroInvocData &invoc,
AST::FormatArgs::Newline nl)
{
// Remove the delimiters from the macro invocation:
// the invoc data for `format_args!(fmt, arg1, arg2)` is `(fmt, arg1, arg2)`,
// so we pop the front and back to remove the parentheses (or curly brackets,
// or brackets)
auto tokens = invoc.get_delim_tok_tree ().to_token_stream ();
tokens.erase (tokens.begin ());
tokens.pop_back ();
auto input = format_args_parse_arguments (invoc);
auto append_newline = nl == AST::FormatArgs::Newline::Yes ? true : false;
auto fmt_arg
= parse_single_string_literal (append_newline ? BuiltinMacro::FormatArgsNl
: BuiltinMacro::FormatArgs,
invoc.get_delim_tok_tree (), invoc_locus,
invoc.get_expander ());
// auto fmt_arg
// // FIXME: this eneds to be split up into a smaller function
// = parse_single_string_literal (append_newline ?
// BuiltinMacro::FormatArgsNl
// : BuiltinMacro::FormatArgs,
// invoc.get_delim_tok_tree (), invoc_locus,
// invoc.get_expander ());
if (!fmt_arg->is_literal ())
{
rust_sorry_at (
invoc_locus,
"cannot yet use eager macro invocations as format strings");
return AST::Fragment::create_empty ();
}
// if (!fmt_arg->is_literal ())
// {
// rust_sorry_at (
// invoc_locus,
// "cannot yet use eager macro invocations as format strings");
// return AST::Fragment::create_empty ();
// }
// FIXME: We need to handle this
// // if it is not a literal, it's an eager macro invocation - return it
@ -993,38 +1077,54 @@ MacroBuiltin::format_args_handler (location_t invoc_locus,
// token_tree.to_token_stream ());
// }
auto fmt_str = static_cast<AST::LiteralExpr &> (*fmt_arg.get ());
// auto fmt_str = static_cast<AST::LiteralExpr &> (*fmt_arg.get ());
// Switch on the format string to know if the string is raw or cooked
switch (fmt_str.get_lit_type ())
{
// case AST::Literal::RAW_STRING:
case AST::Literal::STRING:
break;
case AST::Literal::CHAR:
case AST::Literal::BYTE:
case AST::Literal::BYTE_STRING:
case AST::Literal::INT:
case AST::Literal::FLOAT:
case AST::Literal::BOOL:
case AST::Literal::ERROR:
rust_unreachable ();
}
// switch (fmt_str.get_lit_type ())
// {
// // case AST::Literal::RAW_STRING:
// case AST::Literal::STRING:
// break;
// case AST::Literal::CHAR:
// case AST::Literal::BYTE:
// case AST::Literal::BYTE_STRING:
// case AST::Literal::INT:
// case AST::Literal::FLOAT:
// case AST::Literal::BOOL:
// case AST::Literal::ERROR:
// rust_unreachable ();
// }
// Remove the delimiters from the macro invocation:
// the invoc data for `format_args!(fmt, arg1, arg2)` is `(fmt, arg1, arg2)`,
// so we pop the front and back to remove the parentheses (or curly brackets,
// or brackets)
auto tokens = invoc.get_delim_tok_tree ().to_token_stream ();
tokens.erase (tokens.begin ());
tokens.pop_back ();
std::stringstream stream;
for (const auto &tok : tokens)
stream << tok->as_string () << ' ';
rust_debug ("[ARTHUR]: `%s`", stream.str ().c_str ());
auto pieces = Fmt::Pieces::collect (stream.str ());
auto append_newline = nl == AST::FormatArgs::Newline::Yes ? true : false;
auto pieces = Fmt::Pieces::collect (stream.str (), append_newline);
// TODO:
// do the transformation into an AST::FormatArgs node
// return that
// expand it during lowering
return AST::Fragment::create_empty ();
// TODO: we now need to take care of creating `unfinished_literal`? this is
// for creating the `template`
auto fmt_args_node = new AST::FormatArgs (invoc_locus, std::move (pieces),
std::move (input->args));
auto node = std::unique_ptr<AST::Expr> (fmt_args_node);
auto single_node = AST::SingleASTNode (std::move (node));
return AST::Fragment ({std::move (single_node)},
invoc.get_delim_tok_tree ().to_token_stream ());
}
tl::optional<AST::Fragment>

View File

@ -22,6 +22,7 @@
#include "rust-ast-lower-extern.h"
#include "rust-ast.h"
#include "rust-attribute-values.h"
#include "rust-diagnostics.h"
#include "rust-item.h"
#include "rust-system.h"

View File

@ -2,6 +2,9 @@ use libformat_parser::rust;
fn main() {
dbg!(rust::collect_pieces(
std::env::args().nth(1).unwrap().as_str()
std::env::args().nth(1).unwrap().as_str(),
None,
None,
false
));
}

View File

@ -77,20 +77,15 @@ mod ffi {
/// A piece is a portion of the format string which represents the next part
/// to emit. These are emitted as a stream by the `Parser` class.
#[derive(Clone, Debug, PartialEq)]
#[derive(Debug, Clone, PartialEq)]
#[repr(C)]
pub enum Piece<'a> {
/// A literal string which should directly be emitted
String(&'a str),
/// This describes that formatting should process the next argument (as
/// specified inside) for emission.
NextArgument(*const Argument<'a>),
}
impl<'a> Drop for Piece<'a> {
fn drop(&mut self) {
println!("dropping Piece: {:?}", self)
}
// do we need a pointer here? we're doing big cloning anyway
NextArgument(Argument<'a>),
}
/// Representation of an argument specification.
@ -216,7 +211,7 @@ mod ffi {
let ptr = Box::leak(x);
let dst = Into::<Argument>::into(*ptr);
Piece::NextArgument(&dst as *const Argument)
Piece::NextArgument(dst)
}
}
}
@ -321,8 +316,13 @@ mod ffi {
pub mod rust {
use generic_format_parser::{ParseMode, Parser, Piece};
pub fn collect_pieces(input: &str) -> Vec<Piece<'_>> {
let parser = Parser::new(input, None, None, true, ParseMode::Format);
pub fn collect_pieces(
input: &str,
style: Option<usize>,
snippet: Option<String>,
append_newline: bool,
) -> Vec<Piece<'_>> {
let parser = Parser::new(input, style, snippet, append_newline, ParseMode::Format);
parser.into_iter().collect()
}
@ -337,16 +337,18 @@ pub struct PieceSlice {
}
#[no_mangle]
pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
pub extern "C" fn collect_pieces(input: *const libc::c_char, append_newline: bool) -> PieceSlice {
dbg!(input);
// FIXME: Add comment
let str = unsafe { CStr::from_ptr(input) };
dbg!(str);
// FIXME: No unwrap
let pieces: Vec<ffi::Piece<'_>> = rust::collect_pieces(str.to_str().unwrap())
.into_iter()
.map(Into::into)
.collect();
let pieces: Vec<ffi::Piece<'_>> =
rust::collect_pieces(str.to_str().unwrap(), None, None, append_newline)
.into_iter()
.map(Into::into)
.collect();
println!("[ARTHUR]: debug: {:?}, {:?}", pieces.as_ptr(), pieces.len());
@ -358,6 +360,29 @@ pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
}
#[no_mangle]
pub extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) {
let _ = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
pub unsafe extern "C" fn destroy_pieces(PieceSlice { base_ptr, len, cap }: PieceSlice) {
eprintln!("[ARTHUR] destroying pieces: {base_ptr:?} {len} {cap}");
drop(Vec::from_raw_parts(base_ptr, len, cap));
}
#[no_mangle]
pub extern "C" fn clone_pieces(
base_ptr: *mut ffi::Piece<'static>,
len: usize,
cap: usize,
) -> PieceSlice {
eprintln!("[ARTHUR] cloning pieces: {base_ptr:?} {len} {cap}");
let v = unsafe { Vec::from_raw_parts(base_ptr, len, cap) };
let cloned_v = v.clone();
// FIXME: Add documentation
v.leak();
PieceSlice {
len: cloned_v.len(),
cap: cloned_v.capacity(),
base_ptr: dbg!(cloned_v.leak().as_mut_ptr()),
}
}