From d6332141d714888b53c1973f2d89ee24e422cb62 Mon Sep 17 00:00:00 2001 From: Arthur Cohen Date: Mon, 29 Jan 2024 22:06:39 +0100 Subject: [PATCH] gccrs: libformat_parser: Update header and remove old interface gcc/rust/ChangeLog: * ast/rust-fmt.cc (Pieces::collect): Use new Pieces API. * ast/rust-fmt.h: Update interface with new FFI bindings. libgrust/ChangeLog: * libformat_parser/src/lib.rs: Add IntoFFI trait. * libformat_parser/libformat-parser.h: Removed. --- gcc/rust/ast/rust-fmt.cc | 10 +- gcc/rust/ast/rust-fmt.h | 211 ++++++++++++----- libgrust/libformat_parser/libformat-parser.h | 224 ------------------- libgrust/libformat_parser/src/lib.rs | 56 +++-- 4 files changed, 206 insertions(+), 295 deletions(-) delete mode 100644 libgrust/libformat_parser/libformat-parser.h diff --git a/gcc/rust/ast/rust-fmt.cc b/gcc/rust/ast/rust-fmt.cc index 559b1c8b579..a7c4341c52d 100644 --- a/gcc/rust/ast/rust-fmt.cc +++ b/gcc/rust/ast/rust-fmt.cc @@ -17,6 +17,7 @@ // . #include "rust-fmt.h" +#include "rust-diagnostics.h" namespace Rust { namespace Fmt { @@ -26,13 +27,12 @@ Pieces::collect (const std::string &to_parse) { auto piece_slice = collect_pieces (to_parse.c_str ()); - rust_debug ("[ARTHUR] %p, %lu", (void *) piece_slice.ptr, piece_slice.len); + rust_debug ("[ARTHUR] %p, %lu", (const void *) piece_slice.base_ptr, + piece_slice.len); // this performs multiple copies, can we avoid them maybe? - auto pieces - = std::vector (piece_slice.ptr, piece_slice.ptr + piece_slice.len); - - rust_debug ("[ARTHUR] %p, %lu", (void *) pieces.data (), pieces.size ()); + // auto pieces = std::vector (piece_slice.base_ptr, + // piece_slice.base_ptr + piece_slice.len); return Pieces{}; } diff --git a/gcc/rust/ast/rust-fmt.h b/gcc/rust/ast/rust-fmt.h index 27c1c3625d3..7ec9a2a199d 100644 --- a/gcc/rust/ast/rust-fmt.h +++ b/gcc/rust/ast/rust-fmt.h @@ -1,4 +1,4 @@ -// Copyright (C) 2020-2023 Free Software Foundation, Inc. +// Copyright (C) 2023-2024 Free Software Foundation, Inc. // This file is part of GCC. @@ -19,9 +19,10 @@ #ifndef RUST_FMT_H #define RUST_FMT_H -#include "rust-diagnostics.h" #include "rust-system.h" +// FIXME: How to encode Option? + namespace Rust { namespace Fmt { @@ -30,116 +31,220 @@ struct RustHamster // hehe }; -struct InnerSpan +/// Enum of alignments which are supported. +enum class Alignment { + /// The value will be aligned to the left. + AlignLeft, + /// The value will be aligned to the right. + AlignRight, + /// The value will be aligned in the center. + AlignCenter, + /// The value will take on a default alignment. + AlignUnknown, }; -struct Count +/// Enum for the debug hex flags. +enum class DebugHex { - enum class Kind - { - Is, - IsName, - IsParam, - IsStar, - Implied - } kind; - - union - { - size_t is; - std::pair is_name; - size_t is_param; - size_t is_star; - } data; + /// The `x` flag in `{:x?}`. + Lower, + /// The `X` flag in `{:X?}`. + Upper, }; -struct DebugHex +/// Enum for the sign flags. +enum class Sign { + /// The `+` flag. + Plus, + /// The `-` flag. + Minus, }; -struct Sign -{ -}; - -struct Alignment -{ -}; - -struct RustString -{ - // hehe -}; - +/// Enum describing where an argument for a format can be located. struct Position { + enum class Tag + { + /// The argument is implied to be located at an index + ArgumentImplicitlyIs, + /// The argument is located at a specific index given in the format, + ArgumentIs, + /// The argument has a name. + ArgumentNamed, + }; + + struct ArgumentImplicitlyIs_Body + { + size_t _0; + }; + + struct ArgumentIs_Body + { + size_t _0; + }; + + struct ArgumentNamed_Body + { + RustHamster _0; + }; + + Tag tag; + union + { + ArgumentImplicitlyIs_Body argument_implicitly_is; + ArgumentIs_Body argument_is; + ArgumentNamed_Body argument_named; + }; }; +/// Range inside of a `Span` used for diagnostics when we only have access to +/// relative positions. +struct InnerSpan +{ + size_t start; + size_t end; +}; + +/// A count is used for the precision and width parameters of an integer, and +/// can reference either an argument or a literal integer. +struct Count +{ + enum class Tag + { + /// The count is specified explicitly. + CountIs, + /// The count is specified by the argument with the given name. + CountIsName, + /// The count is specified by the argument at the given index. + CountIsParam, + /// The count is specified by a star (like in `{:.*}`) that refers to the + /// argument at the given index. + CountIsStar, + /// The count is implied and cannot be explicitly specified. + CountImplied, + }; + + struct CountIs_Body + { + size_t _0; + }; + + struct CountIsName_Body + { + RustHamster _0; + InnerSpan _1; + }; + + struct CountIsParam_Body + { + size_t _0; + }; + + struct CountIsStar_Body + { + size_t _0; + }; + + Tag tag; + union + { + CountIs_Body count_is; + CountIsName_Body count_is_name; + CountIsParam_Body count_is_param; + CountIsStar_Body count_is_star; + }; +}; + +/// Specification for the formatting of an argument in the format string. struct FormatSpec { /// Optionally specified character to fill alignment with. - tl::optional fill; + const uint32_t *fill; /// Span of the optionally specified fill character. - tl::optional fill_span; + const InnerSpan *fill_span; /// Optionally specified alignment. Alignment align; /// The `+` or `-` flag. - tl::optional sign; + const Sign *sign; /// The `#` flag. bool alternate; /// The `0` flag. bool zero_pad; /// The `x` or `X` flag. (Only for `Debug`.) - tl::optional debug_hex; + const DebugHex *debug_hex; /// The integer precision to use. Count precision; /// The span of the precision formatting flag (for diagnostics). - tl::optional precision_span; + const InnerSpan *precision_span; /// The string width requested for the resulting format. Count width; /// The span of the width formatting flag (for diagnostics). - tl::optional width_span; + const InnerSpan *width_span; /// The descriptor string representing the name of the format desired for /// this argument, this can be empty or any number of characters, although /// it is required to be one word. RustHamster ty; - // &'a str ty; /// The span of the descriptor string (for diagnostics). - tl::optional ty_span; + const InnerSpan *ty_span; }; +/// Representation of an argument specification. struct Argument { + /// Where to find this argument Position position; - InnerSpan inner_span; + /// The span of the position indicator. Includes any whitespace in implicit + /// positions (`{ }`). + InnerSpan position_span; + /// How to format the argument FormatSpec format; }; +/// A piece is a portion of the format string which represents the next part +/// to emit. These are emitted as a stream by the `Parser` class. struct Piece { - enum class Kind + enum class Tag { + /// A literal string which should directly be emitted String, - NextArgument - } kind; + /// This describes that formatting should process the next argument (as + /// specified inside) for emission. + NextArgument, + }; + struct String_Body + { + RustHamster _0; + }; + + struct NextArgument_Body + { + const Argument *_0; + }; + + Tag tag; union { - RustString string; - Argument *next_argument; - } data; + String_Body string; + NextArgument_Body next_argument; + }; }; struct PieceSlice { - Piece *ptr; + const Piece *base_ptr; size_t len; }; extern "C" { + PieceSlice -collect_pieces (const char *); -} +collect_pieces (const char *input); + +} // extern "C" struct Pieces { @@ -149,4 +254,4 @@ struct Pieces } // namespace Fmt } // namespace Rust -#endif // ! RUST_FMT_H +#endif // !RUST_FMT_H diff --git a/libgrust/libformat_parser/libformat-parser.h b/libgrust/libformat_parser/libformat-parser.h deleted file mode 100644 index a4bc8a75494..00000000000 --- a/libgrust/libformat_parser/libformat-parser.h +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include - -/// Enum of alignments which are supported. -enum class Alignment -{ - /// The value will be aligned to the left. - AlignLeft, - /// The value will be aligned to the right. - AlignRight, - /// The value will be aligned in the center. - AlignCenter, - /// The value will take on a default alignment. - AlignUnknown, -}; - -/// Enum for the debug hex flags. -enum class DebugHex -{ - /// The `x` flag in `{:x?}`. - Lower, - /// The `X` flag in `{:X?}`. - Upper, -}; - -/// Enum for the sign flags. -enum class Sign -{ - /// The `+` flag. - Plus, - /// The `-` flag. - Minus, -}; - -template struct Box; - -template struct Option; - -/// Enum describing where an argument for a format can be located. -struct Position -{ - enum class Tag - { - /// The argument is implied to be located at an index - ArgumentImplicitlyIs, - /// The argument is located at a specific index given in the format, - ArgumentIs, - /// The argument has a name. - ArgumentNamed, - }; - - struct ArgumentImplicitlyIs_Body - { - uintptr_t _0; - }; - - struct ArgumentIs_Body - { - uintptr_t _0; - }; - - struct ArgumentNamed_Body - { - const str *_0; - }; - - Tag tag; - union - { - ArgumentImplicitlyIs_Body argument_implicitly_is; - ArgumentIs_Body argument_is; - ArgumentNamed_Body argument_named; - }; -}; - -/// Range inside of a `Span` used for diagnostics when we only have access to -/// relative positions. -struct InnerSpan -{ - uintptr_t start; - uintptr_t end; -}; - -/// A count is used for the precision and width parameters of an integer, and -/// can reference either an argument or a literal integer. -struct Count -{ - enum class Tag - { - /// The count is specified explicitly. - CountIs, - /// The count is specified by the argument with the given name. - CountIsName, - /// The count is specified by the argument at the given index. - CountIsParam, - /// The count is specified by a star (like in `{:.*}`) that refers to the - /// argument at the given index. - CountIsStar, - /// The count is implied and cannot be explicitly specified. - CountImplied, - }; - - struct CountIs_Body - { - uintptr_t _0; - }; - - struct CountIsName_Body - { - const str *_0; - InnerSpan _1; - }; - - struct CountIsParam_Body - { - uintptr_t _0; - }; - - struct CountIsStar_Body - { - uintptr_t _0; - }; - - Tag tag; - union - { - CountIs_Body count_is; - CountIsName_Body count_is_name; - CountIsParam_Body count_is_param; - CountIsStar_Body count_is_star; - }; -}; - -/// Specification for the formatting of an argument in the format string. -struct FormatSpec -{ - /// Optionally specified character to fill alignment with. - Option fill; - /// Span of the optionally specified fill character. - Option fill_span; - /// Optionally specified alignment. - Alignment align; - /// The `+` or `-` flag. - Option sign; - /// The `#` flag. - bool alternate; - /// The `0` flag. - bool zero_pad; - /// The `x` or `X` flag. (Only for `Debug`.) - Option debug_hex; - /// The integer precision to use. - Count precision; - /// The span of the precision formatting flag (for diagnostics). - Option precision_span; - /// The string width requested for the resulting format. - Count width; - /// The span of the width formatting flag (for diagnostics). - Option width_span; - /// The descriptor string representing the name of the format desired for - /// this argument, this can be empty or any number of characters, although - /// it is required to be one word. - const str *ty; - /// The span of the descriptor string (for diagnostics). - Option ty_span; -}; - -/// Representation of an argument specification. -struct Argument -{ - /// Where to find this argument - Position position; - /// The span of the position indicator. Includes any whitespace in implicit - /// positions (`{ }`). - InnerSpan position_span; - /// How to format the argument - FormatSpec format; -}; - -/// A piece is a portion of the format string which represents the next part -/// to emit. These are emitted as a stream by the `Parser` class. -struct Piece -{ - enum class Tag - { - /// A literal string which should directly be emitted - String, - /// This describes that formatting should process the next argument (as - /// specified inside) for emission. - NextArgument, - }; - - struct String_Body - { - const str *_0; - }; - - struct NextArgument_Body - { - Box _0; - }; - - Tag tag; - union - { - String_Body string; - NextArgument_Body next_argument; - }; -}; - -struct PieceSlice -{ - const Piece *base_ptr; - uintptr_t len; -}; - -extern "C" { - -PieceSlice -collect_pieces (const char *input); - -} // extern "C" diff --git a/libgrust/libformat_parser/src/lib.rs b/libgrust/libformat_parser/src/lib.rs index 49821e7cd2f..4bbc468c755 100644 --- a/libgrust/libformat_parser/src/lib.rs +++ b/libgrust/libformat_parser/src/lib.rs @@ -5,8 +5,31 @@ use std::ffi::CStr; +trait IntoFFI { + type Output; + + fn into_ffi(&self) -> Self::Output; +} + +impl IntoFFI for Option +where + T: Sized, +{ + type Output = *const T; + + fn into_ffi(&self) -> Self::Output { + match self.as_ref() { + None => std::ptr::null(), + Some(r) => r as *const T, + } + } +} + +// FIXME: Make an ffi module in a separate file +// FIXME: Remember to leak the boxed type somehow +// FIXME: How to encode the Option type? As a pointer? Option -> Option<&T> -> *const T could work maybe? mod ffi { - use std::ops::Deref; + use super::IntoFFI; // Note: copied from rustc_span /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. @@ -102,31 +125,31 @@ mod ffi { /// Optionally specified character to fill alignment with. pub fill: Option, /// Span of the optionally specified fill character. - pub fill_span: Option, + pub fill_span: *const InnerSpan, /// Optionally specified alignment. pub align: Alignment, /// The `+` or `-` flag. - pub sign: Option, + pub sign: *const Sign, /// The `#` flag. pub alternate: bool, /// The `0` flag. pub zero_pad: bool, /// The `x` or `X` flag. (Only for `Debug`.) - pub debug_hex: Option, + pub debug_hex: *const DebugHex, /// The integer precision to use. pub precision: Count<'a>, /// The span of the precision formatting flag (for diagnostics). - pub precision_span: Option, + pub precision_span: *const InnerSpan, /// The string width requested for the resulting format. pub width: Count<'a>, /// The span of the width formatting flag (for diagnostics). - pub width_span: Option, + pub width_span: *const InnerSpan, /// The descriptor string representing the name of the format desired for /// this argument, this can be empty or any number of characters, although /// it is required to be one word. pub ty: &'a str, /// The span of the descriptor string (for diagnostics). - pub ty_span: Option, + pub ty_span: *const InnerSpan, } /// Enum describing where an argument for a format can be located. @@ -197,6 +220,11 @@ mod ffi { match old { generic_format_parser::Piece::String(x) => Piece::String(x), generic_format_parser::Piece::NextArgument(x) => { + // FIXME: This is problematic - if we do this, then we probably run into the issue that the Box + // is freed at the end of the call to collect_pieces. if we just .leak() it, then we have + // a memory leak... should we resend the info back to the Rust lib afterwards to free it? + // this is definitely the best way - store that pointer in the FFI piece and rebuild the box + // in a Rust destructor Piece::NextArgument(Box::new(Into::::into(*x))) } } @@ -240,18 +268,18 @@ mod ffi { fn from(old: generic_format_parser::FormatSpec<'a>) -> Self { FormatSpec { fill: old.fill, - fill_span: old.fill_span.map(Into::into), + fill_span: old.fill_span.map(Into::into).into_ffi(), align: old.align.into(), - sign: old.sign.map(Into::into), + sign: old.sign.map(Into::into).into_ffi(), alternate: old.alternate, zero_pad: old.zero_pad, - debug_hex: old.debug_hex.map(Into::into), + debug_hex: old.debug_hex.map(Into::into).into_ffi(), precision: old.precision.into(), - precision_span: old.precision_span.map(Into::into), + precision_span: old.precision_span.map(Into::into).into_ffi(), width: old.width.into(), - width_span: old.width_span.map(Into::into), + width_span: old.width_span.map(Into::into).into_ffi(), ty: old.ty, - ty_span: old.ty_span.map(Into::into), + ty_span: old.ty_span.map(Into::into).into_ffi(), } } } @@ -327,6 +355,8 @@ pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice { .map(Into::into) .collect(); + println!("debug: {:?}, {:?}", pieces.as_ptr(), pieces.len()); + PieceSlice { base_ptr: pieces.as_ptr(), len: pieces.len(),