diff --git a/precompiles/design.md b/precompiles/design.md index 472fb5ab..2064674e 100644 --- a/precompiles/design.md +++ b/precompiles/design.md @@ -57,7 +57,7 @@ We can emit this information by using macros to embed code that expands to the f ```rust #[no_mangle] -#[link_section=.nexus-dynamic-precompiles] +#[link_section=.note.nexus-precompiles] pub static DYNAMIC_PRECOMPILE_{idx}: &str = "{content}"; ``` diff --git a/precompiles/macros/Cargo.toml b/precompiles/macros/Cargo.toml index 7eb9139f..407a6818 100644 --- a/precompiles/macros/Cargo.toml +++ b/precompiles/macros/Cargo.toml @@ -10,4 +10,6 @@ proc-macro = true quote = "1.0" proc-macro2 = "1.0" proc-macro-crate = "3.1.0" -syn = { version = "2.0", features = ["full"] } +serde = "1.0" +serde_json = "1.0" +syn = { version = "2.0", features = ["full", "parsing"] } diff --git a/precompiles/macros/src/generation.rs b/precompiles/macros/src/generation.rs new file mode 100644 index 00000000..10466225 --- /dev/null +++ b/precompiles/macros/src/generation.rs @@ -0,0 +1,85 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::{spanned::Spanned, Ident}; + +use crate::precompile_path::{PrecompilePath, SerializablePath}; + +/// Generate the custom RISC-V instruction implementations for each precompile. Separated for +/// readability. +pub(crate) fn generate_instruction_impls(paths: &[PrecompilePath]) -> TokenStream { + let num_precompiles = paths.len() as u16; + + (0..num_precompiles) + .zip(paths.iter()) + .map(|(i, path)| { + // Format is index = 0b0000_00[fn7][fn3] + const FN7_MASK: u16 = 0b011_1111_1000; + const FN3_MASK: u16 = 0b0111; + const R_TYPE_PRECOMPILE_OPCODE: u8 = 0b0001011; + + let fn7 = ((FN7_MASK & i) >> 3) as u8; + let fn3 = (FN3_MASK & i) as u8; + + // ".insn ins_type opcode, func3, func7, rd, rs1, rs2" + let insn = format!( + ".insn r 0x{R_TYPE_PRECOMPILE_OPCODE:x}, 0x{fn3:x}, 0x{fn7:x}, {{rd}}, {{rs1}}, {{rs2}}" + ); + let path = &path.as_syn_path(); + quote! { + impl InstructionEmitter for #path { + #[inline(always)] + fn emit_instruction(rs1: u32, rs2: u32, imm: u32) -> u32 { + #[cfg(target_arch = "riscv32")] { + let mut rd: u32; + unsafe { + ::core::arch::asm!( + #insn, + rd = out(reg) rd, + rs1 = in(reg) rs1, + rs2 = in(reg) rs2, + ); + } + return rd; + } + #[cfg(not(target_arch = "riscv32"))] { + return <#path as ::nexus_precompiles::PrecompileInstruction>::native_call(rs1, rs2); + } + } + } + } + }) + .collect() +} + +/// Generate the static variables that hold the precompile metadata. +#[cfg(target_arch = "riscv32")] +pub(crate) fn generate_statics(paths: &Vec) -> Result { + let num_precompiles = paths.len() as u16; + let mut statics = quote! { + #[no_mangle] + #[link_section = ".note.nexus-precompiles"] + pub static PRECOMPILE_COUNT: u16 = #num_precompiles; + }; + + for (i, path) in (0..num_precompiles).zip(paths) { + let symbol_name = Ident::new(&format!("PRECOMPILE_{i}"), Span::call_site()); + let serializable_path = SerializablePath::from((*path).clone()); + let data = serde_json::to_string(&serializable_path); + + if let Err(e) = data { + return Err(syn::Error::new( + path.as_syn_path().span(), + format!("Failed to serialize metadata for {}", e), + )); + } + + let data = data.unwrap(); + statics.extend(quote! { + #[no_mangle] + #[link_section = ".note.nexus-precompiles"] + pub static #symbol_name: &'static str = #data; + }); + } + + Ok(statics) +} diff --git a/precompiles/macros/src/lib.rs b/precompiles/macros/src/lib.rs index ccf03462..217ecb71 100644 --- a/precompiles/macros/src/lib.rs +++ b/precompiles/macros/src/lib.rs @@ -1,8 +1,12 @@ extern crate proc_macro; -mod path_with_rename; +mod generation; +mod precompile_path; -use path_with_rename::PathWithRename; +use generation::generate_instruction_impls; +#[cfg(target_arch = "riscv32")] +use generation::generate_statics; +use precompile_path::PrecompilePath; use proc_macro::TokenStream; use quote::quote; use quote::ToTokens; @@ -35,8 +39,8 @@ pub fn use_precompiles(input: TokenStream) -> TokenStream { // 1. Parse the input into a list of paths to precompile implementations. let paths = - match Punctuated::::parse_terminated.parse2(input.clone()) { - Ok(p) => p.into_iter().collect::>(), + match Punctuated::::parse_terminated.parse2(input.clone()) { + Ok(p) => p.into_iter().collect::>(), Err(e) => { return e.into_compile_error().into(); } @@ -68,7 +72,7 @@ pub fn use_precompiles(input: TokenStream) -> TokenStream { let valid_checks: Vec = paths .iter() .map(|path| { - let path = &path.path; + let path = path.as_syn_path(); quote! { is_valid::<#path>() } }) .collect(); @@ -96,50 +100,7 @@ pub fn use_precompiles(input: TokenStream) -> TokenStream { // 5. Generate code that picks a 10-bit index 0-1023 for each precompile and uses it to generate // a custom RISC-V instruction for each precompile. This is done by encoding the precompile // index into the `func3` and `func7` fields of the custom RISC-V instruction we use. - - // Safety: size already checked to be <= MAX_PRECOMPILES, guaranteed to fit in 10 bits - let num_precompiles = paths.len() as u16; - let insn_impls: proc_macro2::TokenStream = (0..num_precompiles) - .zip(paths.iter()) - .map(|(i, path)| { - // Format is index = 0b0000_00[fn7][fn3] - const FN7_MASK: u16 = 0b011_1111_1000; - const FN3_MASK: u16 = 0b0111; - const R_TYPE_PRECOMPILE_OPCODE: u8 = 0b0001011; - - let fn7 = ((FN7_MASK & i) >> 3) as u8; - let fn3 = (FN3_MASK & i) as u8; - - // ".insn ins_type opcode, func3, func7, rd, rs1, rs2" - let insn = format!( - ".insn r 0x{R_TYPE_PRECOMPILE_OPCODE:x}, 0x{fn3:x}, 0x{fn7:x}, {{rd}}, {{rs1}}, {{rs2}}" - ); - let path = &path.path; - quote! { - impl InstructionEmitter for #path { - #[inline(always)] - fn emit_instruction(rs1: u32, rs2: u32, imm: u32) -> u32 { - #[cfg(target_arch = "riscv32")] { - let mut rd: u32; - unsafe { - ::core::arch::asm!( - #insn, - rd = out(reg) rd, - rs1 = in(reg) rs1, - rs2 = in(reg) rs2, - ); - } - return rd; - } - #[cfg(not(target_arch = "riscv32"))] { - return <#path as ::nexus_precompiles::PrecompileInstruction>::native_call(rs1, rs2); - } - } - } - } - }) - .collect(); - output.extend(insn_impls); + output.extend(generate_instruction_impls(&paths)); // 6. Generate a `#[no_mangle]` static variable that expresses the number of precompiles present // in the guest binary. This is not likely super useful but serves as a guard against this macro @@ -147,14 +108,15 @@ pub fn use_precompiles(input: TokenStream) -> TokenStream { // future update, this will be omitted and replaced by embedding the precompile metadata in the // binary itself. #[cfg(target_arch = "riscv32")] - let macro_guard = quote! { - #[no_mangle] - #[link_section = ".nexus_precompile_count"] - pub static PRECOMPILE_COUNT: u16 = #num_precompiles; - }; + { + let statics = generate_statics(&paths); - #[cfg(target_arch = "riscv32")] - output.extend(macro_guard); + if let Err(e) = statics { + return e.into_compile_error().into(); + } + + output.extend(statics.unwrap()); + } // 7. Call each precompile's call-generating macro. This macro is expected to define and // implement a trait which is used for the actual precompile call. This should have the name @@ -165,17 +127,11 @@ pub fn use_precompiles(input: TokenStream) -> TokenStream { let custom_generators = paths .iter() .map(|path| { - let mut prefix = path.path.clone(); - - // Remove the name of the precompile struct to get the path to its module. - let _postfix = match prefix.segments.pop() { - Some(segment) => segment, - None => return Err(spanned_error(path, "Invalid path: no module specified")), - }; + let prefix = path.prefix(); + let path = path.as_syn_path(); - let path = &path.path; Ok(quote! { - #prefix generate_instruction_caller!(#path); + #prefix::generate_instruction_caller!(#path); }) }) .collect::, TokenStream>>(); diff --git a/precompiles/macros/src/path_with_rename.rs b/precompiles/macros/src/path_with_rename.rs deleted file mode 100644 index 94f86cbc..00000000 --- a/precompiles/macros/src/path_with_rename.rs +++ /dev/null @@ -1,38 +0,0 @@ -use quote::{quote, ToTokens}; -use syn::{ - parse::{Parse, ParseStream}, - Ident, Path, Token, -}; - -pub(crate) struct PathWithRename { - pub(crate) path: Path, - pub(crate) rename: Option, -} - -impl Parse for PathWithRename { - fn parse(input: ParseStream) -> syn::Result { - let path = input.parse()?; - - let rename = if input.peek(Token![as]) { - input.parse::()?; - Some(input.parse()?) - } else { - None - }; - - Ok(Self { path, rename }) - } -} - -impl ToTokens for PathWithRename { - fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { - let path = &self.path; - let rename = self.rename.as_ref(); - - tokens.extend(quote! { #path }); - - if let Some(rename) = rename { - tokens.extend(quote! { as #rename }); - } - } -} diff --git a/precompiles/macros/src/precompile_path.rs b/precompiles/macros/src/precompile_path.rs new file mode 100644 index 00000000..b795fd0a --- /dev/null +++ b/precompiles/macros/src/precompile_path.rs @@ -0,0 +1,140 @@ +use std::fmt::Display; + +use quote::{quote, ToTokens}; +use serde::{Deserialize, Serialize}; +use syn::{ + parse::{Parse, ParseStream}, + Ident, Path, Token, +}; + +/// Similar to the usual concept of a path but with the restrictions and features needed for our +/// use-case. Our paths are always absolute, and the precompile implementation's parent must be +/// where the precompile module's `generate_instruction_caller!` macro is defined. The user may +/// optionally rename the precompile for use in their client code. +#[derive(Debug, Clone)] +pub(crate) struct PrecompilePath { + pub(crate) path: Vec, + pub(crate) rename: Option, +} + +impl PrecompilePath { + fn as_syn_path_impl(path: &[Ident]) -> Path { + let path = path + .iter() + .map(|ident| ident.to_string()) + .collect::>() + .join("::"); + + syn::parse_str(&format!("::{}", path)).unwrap() + } + + pub(crate) fn as_syn_path(&self) -> Path { + Self::as_syn_path_impl(&self.path) + } + + pub(crate) fn prefix(&self) -> Path { + let mut path = self.path.clone(); + path.pop().unwrap(); // Safety: path is guaranteed to have at least two elements. + + Self::as_syn_path_impl(&path) + } +} + +impl Parse for PrecompilePath { + fn parse(input: ParseStream) -> syn::Result { + let mut path = Vec::new(); + let rename = loop { + if input.is_empty() { + break None; + } + + let _separator = input.parse::()?; + let ident = input.parse::()?; + + path.push(ident); + + if input.parse::().is_ok() { + let ident = input.parse::()?; + if !input.is_empty() { + return Err(syn::Error::new( + input.span(), + "Unexpected tokens after precompile rename.", + )); + } + + break Some(ident); + } + }; + + if path.len() < 2 { + return Err(syn::Error::new( + input.span(), + "Precompile path must have at least two elements \ + (module name and implementing struct).", + )); + } + + Ok(Self { path, rename }) + } +} + +impl ToTokens for PrecompilePath { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + for ident in &self.path { + tokens.extend(quote! { ::#ident }); + } + + if let Some(rename) = self.rename.as_ref() { + tokens.extend(quote! { as #rename }); + } + } +} + +impl Display for PrecompilePath { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let path = self + .path + .iter() + .map(|ident| ident.to_string()) + .collect::>() + .join("::"); + let rename = self.rename.as_ref(); + + write!(f, "::{}", path)?; + + if let Some(rename) = rename { + write!(f, " as {}", rename)?; + } + + Ok(()) + } +} + +pub struct SerializablePath(pub(crate) Path); + +impl Serialize for SerializablePath { + fn serialize(&self, serializer: S) -> Result + where + S: serde::ser::Serializer, + { + self.0.to_token_stream().to_string().serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for SerializablePath { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + let path = String::deserialize(deserializer)?; + let path = syn::parse_str(&path).map_err(serde::de::Error::custom)?; + + Ok(Self(path)) + } +} + +impl From for SerializablePath { + fn from(path: PrecompilePath) -> Self { + Self(path.as_syn_path()) + } +} diff --git a/precompiles/tests/integration_test.rs b/precompiles/tests/integration_test.rs index ce0de65e..ad618988 100644 --- a/precompiles/tests/integration_test.rs +++ b/precompiles/tests/integration_test.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod tests { use nexus_precompile_macros::use_precompiles; - use_precompiles!(dummy_div::DummyDiv as MyDummyDiv); + use_precompiles!(::dummy_div::DummyDiv as MyDummyDiv); #[test] fn test_precompile_macro() { diff --git a/vm/src/elf/error.rs b/vm/src/elf/error.rs index 412a8d16..e4bc6bf8 100644 --- a/vm/src/elf/error.rs +++ b/vm/src/elf/error.rs @@ -74,6 +74,10 @@ pub enum ParserError { /// An error occurred while parsing the ELF headers #[error(transparent)] ELFError(#[from] elf::ParseError), + + /// An error occurred while decoding a byte slice into a word. + #[error(transparent)] + WordDecodingFailed(#[from] std::array::TryFromSliceError), } /// Result type for VM functions that can produce errors diff --git a/vm/src/elf/loader.rs b/vm/src/elf/loader.rs index cbab9749..b78f1afc 100644 --- a/vm/src/elf/loader.rs +++ b/vm/src/elf/loader.rs @@ -67,6 +67,9 @@ pub struct ElfFile { /// Initial read write memory image containing global and initialized data. pub ram_image: BTreeMap, + + /// Nexus-specific metadata embedded in the ELF file. + pub nexus_metadata: Vec, } impl ElfFile { @@ -76,6 +79,7 @@ impl ElfFile { base: u32, rom_image: BTreeMap, ram_image: BTreeMap, + nexus_metadata: Vec, ) -> Self { ElfFile { instructions, @@ -83,6 +87,7 @@ impl ElfFile { base, rom_image, ram_image, + nexus_metadata, } } @@ -109,6 +114,7 @@ impl ElfFile { base: parsed_elf_data.base_address as u32, rom_image: parsed_elf_data.readonly_memory, ram_image: parsed_elf_data.writable_memory, + nexus_metadata: parsed_elf_data.nexus_metadata, }) } diff --git a/vm/src/elf/parser.rs b/vm/src/elf/parser.rs index 9f895d57..e3893a73 100644 --- a/vm/src/elf/parser.rs +++ b/vm/src/elf/parser.rs @@ -9,6 +9,7 @@ //! - Parses segment information and extracts executable content //! - Supports Harvard architecture with separate instruction and data memories //! - Handles allowed sections: .text, .data, .sdata, and .rodata +//! - Supports our custom metadata section: .nexus_precompile_metadata //! //! Main Components: //! - `validate_elf_header`: Ensures the ELF file meets RISC-V 32-bit executable requirements @@ -37,6 +38,7 @@ use std::fmt; use super::error::{ParserError, Result}; type Instructions = Vec; +type Metadata = Vec; type MemoryImage = BTreeMap; pub struct ParsedElfData { @@ -44,6 +46,7 @@ pub struct ParsedElfData { pub readonly_memory: MemoryImage, pub writable_memory: MemoryImage, pub base_address: u64, + pub nexus_metadata: Metadata, } /// The maximum size of the memory in bytes. @@ -59,8 +62,16 @@ pub const WORD_SIZE: usize = 4; /// When building the section map, only these sections and their variants are considered. /// Section names starting with any of these prefixes are included (e.g., .text1, .data2). /// All other sections are ignored during parsing. -const ALLOWED_SECTIONS: [&str; 8] = [ - ".text", ".data", ".sdata", ".rodata", ".init", ".fini", ".bss", ".sbss", +const ALLOWED_SECTIONS: [&str; 9] = [ + ".text", + ".data", + ".sdata", + ".rodata", + ".init", + ".fini", + ".bss", + ".sbss", + ".nexus_precompile_metadata", ]; #[derive(Debug, Clone, Copy)] @@ -68,6 +79,7 @@ enum WordType { Instruction, ReadOnlyData, Data, + Metadata, } impl fmt::Display for WordType { @@ -76,6 +88,7 @@ impl fmt::Display for WordType { WordType::Instruction => write!(f, "Instruction"), WordType::ReadOnlyData => write!(f, "Read-Only Data"), WordType::Data => write!(f, "Data"), + WordType::Metadata => write!(f, "Metadata"), } } } @@ -203,48 +216,56 @@ fn parse_segment_content( instructions: &mut Vec, readonly_memory_image: &mut BTreeMap, memory_image: &mut BTreeMap, + metadata: &mut Vec, ) -> Result<()> { let is_executable_segment = (segment.p_flags & abi::PF_X) != 0; - let (virtual_address, offset, mem_size) = parse_segment_info(segment)?; + let (segment_virtual_address, segment_physical_address, segment_size) = + parse_segment_info(segment)?; - for address in (0..mem_size).step_by(WORD_SIZE as _) { + for offset_in_segment in (0..segment_size).step_by(WORD_SIZE as _) { // Calculate the memory address for this word - let memory_address = virtual_address - .checked_add(address) + let memory_address = segment_virtual_address + .checked_add(offset_in_segment) .ok_or(ParserError::InvalidSegmentAddress)?; if memory_address == MAXIMUM_MEMORY_SIZE { return Err(ParserError::AddressExceedsMemorySize); } // Calculate the offset within the segment for this word - let segment_offset = address + offset; + let absolute_address = offset_in_segment + segment_physical_address; // Read the word from the file data let word = u32::from_le_bytes( - data[segment_offset as usize..(segment_offset + WORD_SIZE as u32) as usize] - .try_into() - .unwrap(), + data[absolute_address as usize..(absolute_address + WORD_SIZE as u32) as usize] + .try_into()?, ); // Determine the type of word based on the segment and section information + let word_type = if is_executable_segment && section_map.iter().any(|(prefix, (_, end))| { (prefix.starts_with(".text") || prefix.starts_with(".init") || prefix.starts_with(".fini")) - && segment_offset < *end as u32 + && absolute_address < *end as u32 }) { Some(WordType::Instruction) } else if section_map.iter().any(|(prefix, (start, end))| { prefix.starts_with(".rodata") - && *start as u32 <= segment_offset - && segment_offset < *end as u32 + && *start as u32 <= absolute_address + && absolute_address < *end as u32 }) { Some(WordType::ReadOnlyData) + } else if section_map.iter().any(|(prefix, (start, end))| { + prefix.starts_with(".note.nexus-precompiles") + && *start as u32 <= absolute_address + && absolute_address < *end as u32 + }) { + Some(WordType::Metadata) } else if section_map.iter().any(|(prefix, (start, end))| { (!prefix.starts_with(".text") && !prefix.starts_with(".rodata")) - && *start as u32 <= segment_offset - && segment_offset < *end as u32 + && *start as u32 <= absolute_address + && absolute_address < *end as u32 }) { Some(WordType::Data) } else { @@ -263,6 +284,9 @@ fn parse_segment_content( return Err(ParserError::DuplicateMemoryAddress); } } + Some(WordType::Metadata) => { + metadata.push(word); + } None => (), } } @@ -320,18 +344,18 @@ pub fn parse_segments(elf: &ElfBytes, data: &[u8]) -> Result, data: &[u8]) -> Result, data: &[u8]) -> Result