fix(ext/ffi): trampoline for fast calls (#15139)

This commit is contained in:
Divy Srivastava 2022-07-12 06:33:05 +05:30 committed by GitHub
parent 5db16d1229
commit 77d065e034
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 435 additions and 11 deletions

View File

@ -38,6 +38,7 @@
"cli/tsc/*typescript.js",
"gh-pages",
"target",
"test_ffi/tests/test.js",
"test_util/std",
"test_util/wpt",
"third_party",

3
.gitmodules vendored
View File

@ -9,3 +9,6 @@
[submodule "test_util/wpt"]
path = test_util/wpt
url = https://github.com/web-platform-tests/wpt.git
[submodule "ext/ffi/tinycc"]
path = ext/ffi/tinycc
url = https://github.com/TinyCC/tinycc

View File

@ -1,3 +1,25 @@
# deno_ffi
This crate implements dynamic library ffi.
## Performance
Deno FFI calls have extremely low overhead (~1ns on M1 16GB RAM) and perform on
par with native code. Deno leverages V8 fast api calls and JIT compiled bindings
to achieve these high speeds.
`Deno.dlopen` generates an optimized and a fallback path. Optimized paths are
triggered when V8 decides to optimize the function, hence call through the Fast
API. Fallback paths handle types like function callbacks and implement proper
error handling for unexpected types, that is not supported in Fast calls.
Optimized calls enter a JIT compiled function "trampoline" that translates Fast
API values directly for symbol calls. JIT compilation itself is super fast,
thanks to `tinycc`. Currently, the optimized path is only supported on Linux and
MacOS.
To run benchmarks:
```bash
target/release/deno bench --allow-ffi --allow-read --unstable ./test_ffi/tests/bench.js
```

63
ext/ffi/build.rs Normal file
View File

@ -0,0 +1,63 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
use std::env;
fn build_tcc() {
{
// TODO(@littledivy): Windows support for fast call.
// let tcc_path = root
// .parent()
// .unwrap()
// .to_path_buf()
// .parent()
// .unwrap()
// .to_path_buf()
// .join("third_party")
// .join("prebuilt")
// .join("win");
// println!("cargo:rustc-link-search=native={}", tcc_path.display());
}
#[cfg(not(target_os = "windows"))]
{
use std::path::PathBuf;
use std::process::exit;
use std::process::Command;
let root = PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR")));
let tcc_src = root.join("tinycc");
dbg!(&tcc_src);
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let mut configure = Command::new(tcc_src.join("configure"));
configure.current_dir(&out_dir);
configure.args(&["--enable-static", "--extra-cflags=-fPIC -O3 -g -static"]);
let status = configure.status().unwrap();
if !status.success() {
eprintln!("Fail to configure: {:?}", status);
exit(1);
}
let mut make = Command::new("make");
make.current_dir(&out_dir).arg(format!(
"-j{}",
env::var("NUM_JOBS").unwrap_or_else(|_| String::from("1"))
));
make.args(&["libtcc.a"]);
let status = make.status().unwrap();
if !status.success() {
eprintln!("Fail to make: {:?}", status);
exit(1);
}
println!("cargo:rustc-link-search=native={}", out_dir.display());
println!("cargo:rerun-if-changed={}", tcc_src.display());
}
}
#[cfg(target_os = "windows")]
fn main() {}
#[cfg(not(target_os = "windows"))]
fn main() {
build_tcc();
println!("cargo:rustc-link-lib=static=tcc");
}

153
ext/ffi/jit_trampoline.rs Normal file
View File

@ -0,0 +1,153 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
use crate::NativeType;
use crate::{tcc::Compiler, Symbol};
use std::ffi::c_void;
use std::ffi::CString;
use std::fmt::Write as _;
pub(crate) struct Allocation {
pub addr: *mut c_void,
_ctx: Compiler,
_sym: Box<Symbol>,
}
macro_rules! cstr {
($st:expr) => {
&CString::new($st).unwrap()
};
}
fn native_arg_to_c(ty: &NativeType) -> &'static str {
match ty {
NativeType::U8 | NativeType::U16 | NativeType::U32 => "uint32_t",
NativeType::I8 | NativeType::I16 | NativeType::I32 => "int32_t",
NativeType::Void => "void",
NativeType::F32 => "float",
NativeType::F64 => "double",
_ => unimplemented!(),
}
}
fn native_to_c(ty: &NativeType) -> &'static str {
match ty {
NativeType::U8 => "uint8_t",
NativeType::U16 => "uint16_t",
NativeType::U32 => "uint32_t",
NativeType::I8 => "int8_t",
NativeType::I16 => "uint16_t",
NativeType::I32 => "int32_t",
NativeType::Void => "void",
NativeType::F32 => "float",
NativeType::F64 => "double",
_ => unimplemented!(),
}
}
pub(crate) fn codegen(sym: &crate::Symbol) -> String {
let mut c = String::from("#include <stdint.h>\n");
let ret = native_to_c(&sym.result_type);
// extern <return_type> func(
c += "\nextern ";
c += ret;
c += " func(";
// <param_type> p0, <param_type> p1, ...);
for (i, ty) in sym.parameter_types.iter().enumerate() {
if i > 0 {
c += ", ";
}
c += native_to_c(ty);
let _ = write!(c, " p{i}");
}
c += ");\n\n";
// void* recv, <param_type> p0, <param_type> p1, ...);
c += ret;
c += " func_trampoline(";
c += "void* recv";
for (i, ty) in sym.parameter_types.iter().enumerate() {
c += ", ";
c += native_arg_to_c(ty);
let _ = write!(c, " p{i}");
}
c += ") {\n";
// return func(p0, p1, ...);
c += " return func(";
for (i, _) in sym.parameter_types.iter().enumerate() {
if i > 0 {
c += ", ";
}
let _ = write!(c, "p{i}");
}
c += ");\n}\n\n";
c
}
pub(crate) fn gen_trampoline(
sym: Box<crate::Symbol>,
) -> Result<Box<Allocation>, ()> {
let mut ctx = Compiler::new()?;
ctx.set_options(cstr!("-nostdlib"));
// SAFETY: symbol satisfies ABI requirement.
unsafe { ctx.add_symbol(cstr!("func"), sym.ptr.0 as *const c_void) };
let c = codegen(&sym);
ctx.compile_string(cstr!(c))?;
let alloc = Allocation {
addr: ctx.relocate_and_get_symbol(cstr!("func_trampoline"))?,
_ctx: ctx,
_sym: sym,
};
Ok(Box::new(alloc))
}
#[cfg(test)]
mod tests {
use super::*;
use libffi::middle::Type;
use std::ptr::null_mut;
fn codegen(parameters: Vec<NativeType>, ret: NativeType) -> String {
let sym = Box::new(crate::Symbol {
cif: libffi::middle::Cif::new(vec![], Type::void()),
ptr: libffi::middle::CodePtr(null_mut()),
parameter_types: parameters,
result_type: ret,
can_callback: false,
});
super::codegen(&sym)
}
#[test]
fn test_gen_trampoline() {
assert_eq!(
codegen(vec![], NativeType::Void),
"#include <stdint.h>\n\nextern void func();\n\nvoid func_trampoline(void* recv) {\n return func();\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::U32, NativeType::U32], NativeType::U32),
"#include <stdint.h>\n\nextern uint32_t func(uint32_t p0, uint32_t p1);\n\nuint32_t func_trampoline(void* recv, uint32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::I32, NativeType::I32], NativeType::I32),
"#include <stdint.h>\n\nextern int32_t func(int32_t p0, int32_t p1);\n\nint32_t func_trampoline(void* recv, int32_t p0, int32_t p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::F32, NativeType::F32], NativeType::F32),
"#include <stdint.h>\n\nextern float func(float p0, float p1);\n\nfloat func_trampoline(void* recv, float p0, float p1) {\n return func(p0, p1);\n}\n\n"
);
assert_eq!(
codegen(vec![NativeType::F64, NativeType::F64], NativeType::F64),
"#include <stdint.h>\n\nextern double func(double p0, double p1);\n\ndouble func_trampoline(void* recv, double p0, double p1) {\n return func(p0, p1);\n}\n\n"
);
}
#[test]
fn test_gen_trampoline_implicit_cast() {
assert_eq!(
codegen(vec![NativeType::I8, NativeType::U8], NativeType::I8),
"#include <stdint.h>\n\nextern int8_t func(int8_t p0, uint8_t p1);\n\nint8_t func_trampoline(void* recv, int32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n"
)
}
}

View File

@ -39,6 +39,11 @@ use std::path::PathBuf;
use std::ptr;
use std::rc::Rc;
#[cfg(not(target_os = "windows"))]
mod jit_trampoline;
#[cfg(not(target_os = "windows"))]
mod tcc;
thread_local! {
static LOCAL_ISOLATE_POINTER: RefCell<*const v8::Isolate> = RefCell::new(ptr::null());
}
@ -72,6 +77,8 @@ struct Symbol {
ptr: libffi::middle::CodePtr,
parameter_types: Vec<NativeType>,
result_type: NativeType,
// This is dead code only on Windows
#[allow(dead_code)]
can_callback: bool,
}
@ -678,6 +685,7 @@ impl From<&NativeType> for fast_api::Type {
}
}
#[cfg(not(target_os = "windows"))]
fn is_fast_api(rv: NativeType) -> bool {
!matches!(
rv,
@ -696,25 +704,36 @@ fn make_sync_fn<'s>(
scope: &mut v8::HandleScope<'s>,
sym: Box<Symbol>,
) -> v8::Local<'s, v8::Function> {
let mut fast_ffi_templ = None;
#[cfg(not(target_os = "windows"))]
let mut fast_ffi_templ: Option<FfiFastCallTemplate> = None;
#[cfg(target_os = "windows")]
let fast_ffi_templ: Option<FfiFastCallTemplate> = None;
#[cfg(not(target_os = "windows"))]
let mut fast_allocations: Option<*mut ()> = None;
#[cfg(not(target_os = "windows"))]
if !sym.can_callback
&& !sym.parameter_types.iter().any(|t| !is_fast_api(*t))
&& is_fast_api(sym.result_type)
{
let ret = fast_api::Type::from(&sym.result_type);
let mut args = sym
.parameter_types
.iter()
.map(|t| t.into())
.collect::<Vec<_>>();
if args.is_empty() {
args.push(fast_api::Type::V8Value);
}
// recv
args.insert(0, fast_api::Type::V8Value);
let symbol_trampoline =
jit_trampoline::gen_trampoline(sym.clone()).expect("gen_trampoline");
fast_ffi_templ = Some(FfiFastCallTemplate {
args: args.into_boxed_slice(),
ret: (&fast_api::Type::from(&sym.result_type)).into(),
symbol_ptr: sym.ptr.as_ptr() as *const c_void,
ret: (&ret).into(),
symbol_ptr: symbol_trampoline.addr,
});
fast_allocations = Some(Box::into_raw(symbol_trampoline) as *mut ());
}
let sym = Box::leak(sym);
@ -754,7 +773,13 @@ fn make_sync_fn<'s>(
Box::new(move |_| {
// SAFETY: This is never called twice. pointer obtained
// from Box::into_raw, hence, satisfies memory layout requirements.
unsafe { Box::from_raw(sym) };
unsafe {
Box::from_raw(sym);
#[cfg(not(target_os = "windows"))]
if let Some(fast_allocations) = fast_allocations {
Box::from_raw(fast_allocations as *mut jit_trampoline::Allocation);
}
}
}),
);

143
ext/ffi/tcc.rs Normal file
View File

@ -0,0 +1,143 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
use std::{
ffi::CStr,
marker::PhantomData,
os::raw::{c_char, c_int, c_void},
ptr::null_mut,
};
#[repr(C)]
#[derive(Debug)]
pub struct TCCState {
_unused: [u8; 0],
}
pub const TCC_OUTPUT_MEMORY: i32 = 1;
extern "C" {
pub fn tcc_new() -> *mut TCCState;
pub fn tcc_delete(s: *mut TCCState);
pub fn tcc_set_options(s: *mut TCCState, str: *const c_char);
pub fn tcc_compile_string(s: *mut TCCState, buf: *const c_char) -> c_int;
pub fn tcc_add_symbol(
s: *mut TCCState,
name: *const c_char,
val: *const c_void,
) -> c_int;
pub fn tcc_set_output_type(s: *mut TCCState, output_type: c_int) -> c_int;
pub fn tcc_relocate(s1: *mut TCCState, ptr: *mut c_void) -> c_int;
pub fn tcc_get_symbol(s: *mut TCCState, name: *const c_char) -> *mut c_void;
}
/// Compilation context.
pub struct Compiler {
inner: *mut TCCState,
_phantom: PhantomData<TCCState>,
pub bin: Option<Vec<u8>>,
}
impl Compiler {
pub fn new() -> Result<Self, ()> {
// SAFETY: There is one context per thread.
let inner = unsafe { tcc_new() };
if inner.is_null() {
Err(())
} else {
let ret =
// SAFETY: set output to memory.
unsafe { tcc_set_output_type(inner, TCC_OUTPUT_MEMORY as c_int) };
assert_eq!(ret, 0);
Ok(Self {
inner,
_phantom: PhantomData,
bin: None,
})
}
}
pub fn set_options(&mut self, option: &CStr) -> &mut Self {
// SAFETY: option is a null-terminated C string.
unsafe {
tcc_set_options(self.inner, option.as_ptr());
}
self
}
pub fn compile_string(&mut self, p: &CStr) -> Result<(), ()> {
// SAFETY: p is a null-terminated C string.
let ret = unsafe { tcc_compile_string(self.inner, p.as_ptr()) };
if ret == 0 {
Ok(())
} else {
Err(())
}
}
/// # Safety
/// Symbol need satisfy ABI requirement.
pub unsafe fn add_symbol(&mut self, sym: &CStr, val: *const c_void) {
// SAFETY: sym is a null-terminated C string.
let ret = tcc_add_symbol(self.inner, sym.as_ptr(), val);
assert_eq!(ret, 0);
}
pub fn relocate_and_get_symbol(
&mut self,
sym: &CStr,
) -> Result<*mut c_void, ()> {
// SAFETY: pass null ptr to get required length
let len = unsafe { tcc_relocate(self.inner, null_mut()) };
if len == -1 {
return Err(());
};
let mut bin = Vec::with_capacity(len as usize);
let ret =
// SAFETY: bin is allocated up to len.
unsafe { tcc_relocate(self.inner, bin.as_mut_ptr() as *mut c_void) };
if ret != 0 {
return Err(());
}
// SAFETY: if ret == 0, bin is initialized.
unsafe {
bin.set_len(len as usize);
}
self.bin = Some(bin);
// SAFETY: sym is a null-terminated C string.
let addr = unsafe { tcc_get_symbol(self.inner, sym.as_ptr()) };
Ok(addr)
}
}
impl Drop for Compiler {
fn drop(&mut self) {
// SAFETY: delete state from tcc_new()
unsafe { tcc_delete(self.inner) };
}
}
#[cfg(test)]
mod test {
use super::*;
use std::ffi::CString;
#[test]
fn test_compiler_jit() {
let p = CString::new(
r#"
#include <stdint.h>
int32_t add(int32_t a, int32_t b) {
return a + b;
}
"#
.as_bytes(),
)
.unwrap();
let sym = CString::new("add".as_bytes()).unwrap();
let mut ctx = Compiler::new().unwrap();
let ops = CString::new("-nostdlib").unwrap();
ctx.set_options(&ops);
assert!(ctx.compile_string(&p).is_ok());
ctx.relocate_and_get_symbol(&sym).unwrap();
}
}

1
ext/ffi/tinycc Submodule

@ -0,0 +1 @@
Subproject commit afc136262e93ae85fb3643005b36dbfc30d99c42

View File

@ -30,6 +30,7 @@ fn basic() {
.arg("--allow-read")
.arg("--unstable")
.arg("--quiet")
.arg(r#"--v8-flags=--allow-natives-syntax"#)
.arg("tests/test.js")
.env("NO_COLOR", "1")
.output()
@ -62,6 +63,7 @@ fn basic() {
true\n\
579\n\
579\n\
579\n\
8589934590n\n\
-8589934590n\n\
8589934590n\n\

View File

@ -1,6 +1,8 @@
// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license.
// deno-lint-ignore-file
// Run using cargo test or `--v8-options=--allow-natives-syntax`
import { assertThrows } from "../../test_util/std/testing/asserts.ts";
const targetDir = Deno.execPath().replace(/[^\/\\]+$/, "");
@ -182,8 +184,9 @@ const dylib = Deno.dlopen(libPath, {
type: "pointer",
},
});
const { symbols } = dylib;
dylib.symbols.printSomething();
symbols.printSomething();
const buffer = new Uint8Array([1, 2, 3, 4, 5, 6, 7, 8]);
const buffer2 = new Uint8Array([9, 10]);
dylib.symbols.print_buffer(buffer, buffer.length);
@ -238,7 +241,15 @@ const before = performance.now();
await sleepNonBlocking.call(100);
console.log(performance.now() - before >= 100);
console.log(dylib.symbols.add_u32(123, 456));
const { add_u32 } = symbols;
function addU32Fast(a, b) {
return add_u32(a, b);
};
%PrepareFunctionForOptimization(addU32Fast);
console.log(addU32Fast(123, 456));
%OptimizeFunctionOnNextCall(addU32Fast);
console.log(addU32Fast(123, 456));
console.log(dylib.symbols.add_i32(123, 456));
console.log(dylib.symbols.add_u64(0xffffffffn, 0xffffffffn));
@ -448,4 +459,4 @@ After: ${postStr}`,
}
console.log("Correct number of resources");
})();
})();

@ -1 +1 @@
Subproject commit 4fd74a381b2a9f357ea7be80c12c24863596841f
Subproject commit 9f314cefb507e3b9de08edc6046353e4012279fc