diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e8d3107..89aaefc0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,3 +54,4 @@ jobs: run: rustup target add riscv32i-unknown-none-elf - run: cargo test -r --all --all-features + timeout-minutes: 30 diff --git a/common/src/constants/constants.rs b/common/src/constants/constants.rs new file mode 100644 index 00000000..50a49902 --- /dev/null +++ b/common/src/constants/constants.rs @@ -0,0 +1,2 @@ +pub const MEMORY_TOP: u32 = 0x80400000; +pub const WORD_SIZE: usize = 4; diff --git a/common/src/constants/mod.rs b/common/src/constants/mod.rs new file mode 100644 index 00000000..6ec4a388 --- /dev/null +++ b/common/src/constants/mod.rs @@ -0,0 +1,2 @@ +pub mod constants; +pub use constants::*; diff --git a/common/src/lib.rs b/common/src/lib.rs index a874b681..abae99d5 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -1,3 +1,4 @@ +pub mod constants; pub mod cpu; pub mod error; pub mod memory; diff --git a/common/src/memory/alignment.rs b/common/src/memory/alignment.rs new file mode 100644 index 00000000..a45666b6 --- /dev/null +++ b/common/src/memory/alignment.rs @@ -0,0 +1,34 @@ +// use crate::constants::WORD_SIZE; + +#[macro_export] +macro_rules! word_align { + ($len:expr) => { + ($len + $crate::constants::WORD_SIZE - 1) & !($crate::constants::WORD_SIZE - 1) + }; +} + +#[macro_export] +macro_rules! bytes_to_words { + ($bytes:expr) => {{ + // Convert the associated data to word representation. + let mut bytes = $bytes.to_vec(); + let padded_len = $crate::word_align!(bytes.len()); + bytes.resize(padded_len, 0); + // Convert to u32 chunks. + bytes + .chunks($crate::constants::WORD_SIZE) + .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) + .collect::>() + }}; +} + +#[macro_export] +macro_rules! words_to_bytes { + ($words:expr) => {{ + let mut bytes: Vec = Vec::with_capacity($words.len() * $crate::constants::WORD_SIZE); + for word in $words { + bytes.extend_from_slice(&word.to_le_bytes()); + } + bytes + }}; +} diff --git a/common/src/memory/mod.rs b/common/src/memory/mod.rs index a7c2d354..6fa1110c 100644 --- a/common/src/memory/mod.rs +++ b/common/src/memory/mod.rs @@ -1,2 +1,4 @@ +pub mod alignment; +pub use alignment::*; pub mod traits; pub use traits::*; diff --git a/tests/testing-framework/src/lib.rs b/tests/testing-framework/src/lib.rs index 6d5a8428..9a6676b4 100644 --- a/tests/testing-framework/src/lib.rs +++ b/tests/testing-framework/src/lib.rs @@ -1,17 +1,20 @@ #[cfg(test)] mod test { use nexus_common::cpu::InstructionResult; + use nexus_vm::elf::ElfFile; use nexus_vm::emulator::MemoryTranscript; use nexus_vm::emulator::{Emulator, HarvardEmulator, LinearEmulator, LinearMemoryLayout}; use postcard::{from_bytes, to_allocvec}; - use serde::{de::DeserializeOwned, Deserialize, Serialize}; + use serde::{de::DeserializeOwned, Serialize}; use std::{path::PathBuf, process::Command}; use tempfile::{tempdir, TempDir}; + #[derive(Clone)] enum EmulatorType { Harvard, Linear(u32, u32, u32), // heap size, stack size, program size + TwoPass, } impl EmulatorType { @@ -103,26 +106,28 @@ mod test { std::fs::read(elf_file).expect("Failed to read elf file") } - /// Serialize a value into a vector of u32 words. - fn serialize_into_u32_chunks(value: &T) -> Vec { - // Serialize to bytes. - let mut bytes = to_allocvec(value).expect("Serialization failed"); - // Pad to the next multiple of 4. - bytes.resize((bytes.len() + 3) & !3, 0); - // Convert to u32 chunks. - bytes - .chunks(4) - .map(|chunk| u32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]])) - .collect() - } + fn compile_multi(test_name: &str, compile_flags: &[&str]) -> Vec { + let mut elfs = Vec::::new(); + // Set up the temporary directories for intermediate project setup. + let tmp_dir = &create_tmp_dir(); + let tmp_project_path = tmp_dir.path().join("integration"); - /// Deserialize a value from a vector of u32 words. - fn deserialize_from_u32_chunks(u32_chunks: &[u32]) -> T { - let mut bytes = Vec::with_capacity(u32_chunks.len() * 4); - for &word in u32_chunks { - bytes.extend_from_slice(&word.to_le_bytes()); + for flag_set in compile_flags { + // Check that the tests compile and execute correctly. + // Compile the test file. + let test_dir_path = "../integration-tests"; + let test_path = format!("{test_dir_path}/{test_name}.rs"); + let elf_contents = compile_to_elf(tmp_project_path.clone(), &test_path, flag_set); + + // Save the elf file for debugging purposes. + let elf_path = format!("{test_dir_path}/{test_name}.elf"); + std::fs::write(&elf_path, &elf_contents).expect("Failed to write file"); + + // Parse the elf file. + let elf = ElfFile::from_bytes(&elf_contents).expect("Unable to load ELF from bytes"); + elfs.push(elf); } - from_bytes(&bytes).expect("Deserialization failed") + elfs } /// Helper function to run emulator and check that the inputs and outputs are correct. @@ -130,7 +135,7 @@ mod test { T: Serialize, U: Serialize + DeserializeOwned + std::fmt::Debug + PartialEq + Clone, >( - test_name: &str, + elfs: Vec, input: Option, expected_output: Option, expected_result: Result< @@ -138,210 +143,157 @@ mod test { nexus_vm::error::VMError, >, emulator_type: EmulatorType, - compile_flags: &[&str], ) { - // Set up the temporary directories for intermediate project setup. - let tmp_dir = &create_tmp_dir(); - let tmp_project_path = tmp_dir.path().join("integration"); - - for flag_set in compile_flags { - // Check that the tests compile and execute correctly. - // Compile the test file. - let test_dir_path = "../integration-tests"; - let test_path = format!("{test_dir_path}/{test_name}.rs"); - let elf_contents = compile_to_elf(tmp_project_path.clone(), &test_path, flag_set); + // Serialize the input. + let mut input_bytes = Vec::::new(); + if let Some(input) = &input { + input_bytes = to_allocvec(input).expect("Serialization failed"); + } - // Save the elf file for debugging purposes. - let elf_path = format!("{test_dir_path}/{test_name}.elf"); - std::fs::write(&elf_path, &elf_contents).expect("Failed to write file"); + let mut deserialized_output: Option = None; + let ad = vec![0u8; 0xbeef as usize]; // placeholder ad until we have use for it - // Parse the elf file. - let elf = ElfFile::from_path(&elf_path).expect("Unable to load ELF from path"); - let input_bytes: Vec = if let Some(input) = &input { - serialize_into_u32_chunks(input) - } else { - vec![] - }; - let mut deserialized_output: Option = None; + for elf in elfs { match emulator_type { - EmulatorType::Harvard => { - let mut emulator = HarvardEmulator::from_elf(elf, &input_bytes, &[]); + EmulatorType::Harvard | EmulatorType::TwoPass => { + // Use elf file to build the harvard emulator. + let mut emulator = HarvardEmulator::from_elf(elf.clone(), &input_bytes, &[]); + + // Check that the program exits correctly. assert_eq!(emulator.execute(), expected_result); + + // Deserialize the output. if expected_output.is_some() { - let output_vec = emulator.get_output().unwrap(); - deserialized_output = Some(deserialize_from_u32_chunks::(&output_vec)); + let output_bytes = emulator.get_output().unwrap(); + deserialized_output = + Some(from_bytes(&output_bytes).expect("Deserialization failed")); + } + + // Run a second pass with a linear emulator constructed from the harvard emulator. + if matches!(emulator_type, EmulatorType::TwoPass) { + // Check that the intermediate output is correct. + assert_eq!(deserialized_output, expected_output); + + // Use the data obtained from the harvard emulator to construct the linear emulator. + let mut linear_emulator = + LinearEmulator::from_harvard(emulator, elf, &ad, &[]).unwrap(); + + // Check that the program exits correctly. + assert_eq!(linear_emulator.execute(), expected_result); + + // Deserialize the output. + if expected_output.is_some() { + let output_bytes = linear_emulator.get_output().unwrap(); + deserialized_output = + Some(from_bytes(&output_bytes).expect("Deserialization failed")); + } } } EmulatorType::Linear(heap_size, stack_size, program_size) => { - let output_len = if let Some(expected_output) = expected_output.clone() { - serialize_into_u32_chunks(&expected_output).len() - } else { - 0 - }; - + // Calculate the output length. + let mut output_len = 0; + if let Some(expected_output) = expected_output.clone() { + output_len = to_allocvec(&expected_output) + .expect("Serialization failed") + .len(); + } + // Construct the memory layout. let memory_layout = LinearMemoryLayout::new( heap_size, stack_size, - input_bytes.len() as u32 * 4, - output_len as u32 * 4, + input_bytes.len() as u32, + output_len as u32, program_size, - 0xbeef * 4, + ad.len() as u32, ) .expect("Invalid memory layout"); - let mut emulator = LinearEmulator::from_elf( - memory_layout, - &vec![0; 0xbeef as usize], - elf, - &input_bytes, - &[], - ); + // Construct the linear emulator. + let mut emulator = + LinearEmulator::from_elf(memory_layout, &ad, elf, &input_bytes, &[]); + + // Check that the program exits correctly. assert_eq!(emulator.execute(), expected_result); + + // Deserialize the output. if expected_output.is_some() { - let output_vec = emulator.get_output().unwrap(); - deserialized_output = Some(deserialize_from_u32_chunks::(&output_vec)); + let output_bytes = emulator.get_output().unwrap(); + deserialized_output = + Some(from_bytes(&output_bytes).expect("Deserialization failed")); } } }; - - // Check that the program exits correctly. - assert_eq!(deserialized_output, expected_output); } + + // Check that the program exits correctly. + assert_eq!(deserialized_output, expected_output); } #[test] fn test_emulate() { - // Works. - emulate::( - "io_u32", - Some(123u32), - Some(123u32), - Err(nexus_vm::error::VMError::VMExited(0)), + let emulators = vec![ EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "io_u64", - Some(1u64 << 32), - Some(1u64 << 32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "io_u128", - Some(332306998946228968225970211937533483u128), - Some(332306998946228968225970211937533483u128), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "io_u32", - Some(123u32), - Some(123u32), - Err(nexus_vm::error::VMError::VMExited(0)), EmulatorType::default_linear(), - &["-C opt-level=0", ""], - ); - emulate::( - "io_u64", - Some(1u64 << 32), - Some(1u64 << 32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::default_linear(), - &["-C opt-level=0", ""], - ); - emulate::( - "io_u128", - Some(332306998946228968225970211937533483u128), - Some(332306998946228968225970211937533483u128), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::default_linear(), - &["-C opt-level=0", ""], - ); + EmulatorType::TwoPass, + ]; + let io_u32_elfs = compile_multi("io_u32", &["-C opt-level=0", ""]); + let io_u64_elfs = compile_multi("io_u64", &["-C opt-level=0", ""]); + let io_u128_elfs = compile_multi("io_u128", &["-C opt-level=0", ""]); + + for emulator in emulators { + emulate::( + io_u32_elfs.clone(), + Some(123u32), + Some(123u32), + Err(nexus_vm::error::VMError::VMExited(0)), + emulator.clone(), + ); + emulate::( + io_u64_elfs.clone(), + Some(1u64 << 32), + Some(1u64 << 32), + Err(nexus_vm::error::VMError::VMExited(0)), + emulator.clone(), + ); + emulate::( + io_u128_elfs.clone(), + Some(332306998946228968225970211937533483u128), + Some(332306998946228968225970211937533483u128), + Err(nexus_vm::error::VMError::VMExited(0)), + emulator, + ); + } } #[test] fn test_fib() { - emulate::( - "fib", - Some(1u32), - Some(1u32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "fib", - Some(10u32), - Some(34u32), - Err(nexus_vm::error::VMError::VMExited(0)), + let inputs = vec![1u32, 10u32, 20u32]; + let outputs = vec![1u32, 34u32, 4181u32]; + let emulators = vec![ EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "fib", - Some(20u32), - Some(4181u32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::Harvard, - &["-C opt-level=0", ""], - ); - emulate::( - "fib", - Some(1u32), - Some(1u32), - Err(nexus_vm::error::VMError::VMExited(0)), EmulatorType::default_linear(), - &["-C opt-level=0", ""], - ); - emulate::( + EmulatorType::TwoPass, + ]; + let elfs = compile_multi( "fib", - Some(10u32), - Some(34u32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::default_linear(), - &["-C opt-level=0", ""], + &[ + "-C opt-level=0", + "-C opt-level=1", + "-C opt-level=2", + "-C opt-level=3", + ], ); - emulate::( - "fib", - Some(20u32), - Some(4181u32), - Err(nexus_vm::error::VMError::VMExited(0)), - EmulatorType::default_linear(), - &["-C opt-level=0", ""], - ); - } - #[test] - fn test_word_serialization() { - let input_u32 = 1324234u32; - let serialized_u32 = serialize_into_u32_chunks(&input_u32); - let deserialized_u32 = deserialize_from_u32_chunks::(&serialized_u32); - assert_eq!(input_u32, deserialized_u32); - - let input_u64 = 1u64 << 32; - let serialized_u64 = serialize_into_u32_chunks(&input_u64); - let deserialized_u64 = deserialize_from_u32_chunks::(&serialized_u64); - assert_eq!(input_u64, deserialized_u64); - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct TestStruct { - a: u32, - b: u32, - c: u128, - d: u128, + for (input, output) in inputs.iter().zip(outputs.iter()) { + for emulator in emulators.clone() { + emulate::( + elfs.clone(), + Some(input.clone()), + Some(output.clone()), + Err(nexus_vm::error::VMError::VMExited(0)), + emulator.clone(), + ); + } } - - let input_struct = TestStruct { - a: 1, - b: 2, - c: 3, - d: 4, - }; - let serialized_struct = serialize_into_u32_chunks(&input_struct); - let deserialized_struct = deserialize_from_u32_chunks::(&serialized_struct); - assert_eq!(input_struct, deserialized_struct); } } diff --git a/vm/src/elf/loader.rs b/vm/src/elf/loader.rs index b78f1afc..320d3102 100644 --- a/vm/src/elf/loader.rs +++ b/vm/src/elf/loader.rs @@ -52,6 +52,7 @@ use std::path::Path; use super::error::ParserError; +#[derive(Clone)] pub struct ElfFile { /// The instructions of the program encoded as 32-bits. pub instructions: Vec, diff --git a/vm/src/elf/mod.rs b/vm/src/elf/mod.rs index 87a73aa8..b37ec52e 100644 --- a/vm/src/elf/mod.rs +++ b/vm/src/elf/mod.rs @@ -3,4 +3,4 @@ mod loader; mod parser; pub use loader::ElfFile; -pub use parser::WORD_SIZE; +pub use nexus_common::constants::WORD_SIZE; diff --git a/vm/src/elf/parser.rs b/vm/src/elf/parser.rs index e5de3740..1ae58203 100644 --- a/vm/src/elf/parser.rs +++ b/vm/src/elf/parser.rs @@ -32,6 +32,7 @@ use elf::{ segment::ProgramHeader, ElfBytes, }; +use nexus_common::constants::WORD_SIZE; use std::collections::{BTreeMap, HashMap}; use std::fmt; @@ -52,9 +53,6 @@ pub struct ParsedElfData { /// The maximum size of the memory in bytes. const MAXIMUM_MEMORY_SIZE: u32 = u32::MAX; -/// The size of a word in bytes. -pub const WORD_SIZE: usize = 4; - /// Defines the allowed sections for Harvard architecture: /// - Instruction memory: .text /// - Data memory: .data, .sdata, .rodata diff --git a/vm/src/emulator/executor.rs b/vm/src/emulator/executor.rs index 1f26432e..f740522b 100644 --- a/vm/src/emulator/executor.rs +++ b/vm/src/emulator/executor.rs @@ -65,10 +65,7 @@ //! ``` //! -use std::cmp::max; -use std::collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque}; - -use super::{layout::LinearMemoryLayout, registry::InstructionExecutorRegistry}; +use super::{layout::LinearMemoryLayout, memory_stats::*, registry::InstructionExecutorRegistry}; use crate::{ cpu::{instructions::InstructionResult, Cpu}, elf::ElfFile, @@ -77,11 +74,21 @@ use crate::{ FixedMemory, LoadOp, MemAccessSize, MemoryProcessor, MemoryRecords, Modes, StoreOp, UnifiedMemory, VariableMemory, NA, RO, RW, WO, }, - riscv::{decode_until_end_of_a_block, BasicBlock, Instruction, Opcode}, + riscv::{ + decode_instruction, decode_until_end_of_a_block, BasicBlock, BuiltinOpcode, Instruction, + Opcode, Register, + }, system::SyscallInstruction, - WORD_SIZE, }; -use nexus_common::cpu::InstructionExecutor; +use nexus_common::{ + constants::{MEMORY_TOP, WORD_SIZE}, + cpu::{InstructionExecutor, Registers}, + word_align, +}; +use std::{ + cmp::max, + collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque}, +}; pub type MemoryTranscript = Vec; @@ -240,6 +247,9 @@ pub struct HarvardEmulator { // A combined read-only (in part) and read-write (in part) memory image pub data_memory: UnifiedMemory, + + // Tracker for the memory sizes since they are not known ahead of time + memory_stats: MemoryStats, } impl Default for HarvardEmulator { @@ -251,27 +261,37 @@ impl Default for HarvardEmulator { input_memory: FixedMemory::::new(0, 0x1000), output_memory: VariableMemory::::default(), data_memory: UnifiedMemory::default(), + memory_stats: MemoryStats::default(), } } } impl HarvardEmulator { - pub fn from_elf(elf: ElfFile, public_input: &[u32], private_input: &[u8]) -> Self { + pub fn from_elf(elf: ElfFile, public_input: &[u8], private_input: &[u8]) -> Self { // the stack and heap will also be stored in this variable memory segment - let mut data_memory = UnifiedMemory::from(VariableMemory::::from(elf.ram_image)); + let text_end = (elf.instructions.len() * WORD_SIZE) as u32 + elf.base; + let mut data_end = elf + .ram_image + .last_key_value() + .unwrap_or((&text_end, &0)) + .0 + .clone(); + let mut data_memory = + UnifiedMemory::from(VariableMemory::::from(elf.ram_image.clone())); if !elf.rom_image.is_empty() { let ro_data_base_address: u32 = *elf.rom_image.first_key_value().unwrap().0; - let mut ro_data: Vec = vec![ - 0; - *elf.rom_image.keys().max().unwrap_or(&0) as usize + 1 - - ro_data_base_address as usize - ]; + let ro_data_end = *elf.rom_image.keys().max().unwrap_or(&0); + let mut ro_data: Vec = + vec![0; ro_data_end as usize + 1 - ro_data_base_address as usize]; for (addr, &value) in &elf.rom_image { ro_data[(addr - ro_data_base_address) as usize] = value; } + // Linker places data after rodata, but need to guard against edge case of empty data. + data_end = max(data_end, ro_data_end); + let ro_data_memory = FixedMemory::::from_vec( ro_data_base_address, ro_data.len() * WORD_SIZE, @@ -284,9 +304,13 @@ impl HarvardEmulator { // Zero out the public input and public output start locations since no offset is needed for harvard emulator. data_memory - .add_fixed_ro(&FixedMemory::::from_slice(0x80, 8, &[0, 0])) + .add_fixed_ro(&FixedMemory::::from_words(0x80, 8, &[0, 0])) .unwrap(); + // Add the public input length to the beginning of the public input. + let len_bytes = (word_align!(public_input.len()) / WORD_SIZE) as u32; + let public_input_with_len = [&len_bytes.to_le_bytes()[..], public_input].concat(); + let mut emulator = Self { executor: Executor { private_input_tape: VecDeque::::from(private_input.to_vec()), @@ -300,20 +324,17 @@ impl HarvardEmulator { elf.instructions.len() * WORD_SIZE, elf.instructions, ), - input_memory: FixedMemory::::from_slice( - 0, - (1 + public_input.len()) * WORD_SIZE, - &[&[public_input.len() as u32; 1], public_input].concat(), - ), + input_memory: FixedMemory::::from_bytes(0, &public_input_with_len), output_memory: VariableMemory::::default(), data_memory, + memory_stats: MemoryStats::new(data_end, MEMORY_TOP), }; emulator.executor.cpu.pc.value = emulator.executor.entrypoint; emulator } - pub fn get_output(&self) -> Result, MemoryError> { - self.output_memory.segment(0, None) + pub fn get_output(&self) -> Result, MemoryError> { + self.output_memory.segment_bytes(0, None) } } @@ -328,7 +349,7 @@ impl Emulator for HarvardEmulator { &mut self, bare_instruction: &Instruction, ) -> Result<(InstructionResult, MemoryRecords)> { - let _ = match ( + let ((_, (load_ops, store_ops)), accessed_io_memory) = match ( self.executor .instruction_executor .get_for_read_input(&bare_instruction.opcode), @@ -339,32 +360,51 @@ impl Emulator for HarvardEmulator { .instruction_executor .get(&bare_instruction.opcode), ) { - (_, _, _) if bare_instruction.is_system_instruction() => { + (_, _, _) if bare_instruction.is_system_instruction() => ( ::execute_syscall( &mut self.executor, &mut self.data_memory, None, bare_instruction, - )? - } - (Some(read_input), _, _) => read_input( - &mut self.executor.cpu, - &mut self.input_memory, - bare_instruction, - )?, - (_, Some(write_output), _) => write_output( - &mut self.executor.cpu, - &mut self.output_memory, - bare_instruction, - )?, - (_, _, Ok(executor)) => executor( - &mut self.executor.cpu, - &mut self.data_memory, - bare_instruction, - )?, + )?, + false, + ), + (Some(read_input), _, _) => ( + read_input( + &mut self.executor.cpu, + &mut self.input_memory, + bare_instruction, + )?, + true, + ), + (_, Some(write_output), _) => ( + write_output( + &mut self.executor.cpu, + &mut self.output_memory, + bare_instruction, + )?, + true, + ), + (_, _, Ok(executor)) => ( + executor( + &mut self.executor.cpu, + &mut self.data_memory, + bare_instruction, + )?, + false, + ), (_, _, Err(e)) => return Err(e), }; + // Update the memory size statistics. + if !accessed_io_memory { + self.memory_stats.update( + load_ops, + store_ops, + self.executor.cpu.registers.read(Register::X2), // Stack pointer + )?; + } + if !bare_instruction.is_branch_or_jump_instruction() { self.executor.cpu.pc.step(); } @@ -428,13 +468,69 @@ pub struct LinearEmulator { impl LinearEmulator { pub fn from_harvard( - _memory_layout: LinearMemoryLayout, - _ad: &[u32], - _emul: HarvardEmulator, - ) -> Self { - // Reminder!: the output linear memory segment should be pre-populated with the contents of the output - // harvard memory segment, in order to enable an i/o consistency argument like that of Jolt. - todo!() + emulator_harvard: HarvardEmulator, + mut elf: ElfFile, + ad: &[u8], + private_input: &[u8], + ) -> Result { + // Reminder!: Add feature flag to control pre-populating output memory. + // This allows flexibility in the consistency argument used by the prover. + + let public_input = emulator_harvard + .input_memory + .segment_bytes(WORD_SIZE as u32, None); // exclude the first word which is the length + let output_memory = emulator_harvard.get_output()?; + + // Replace custom instructions `rin` and `wou` with `lw` and `sw`. + elf.instructions = elf + .instructions + .iter() + .map(|instr| { + let mut decoded_ins = decode_instruction(*instr); + + if emulator_harvard + .executor + .instruction_executor + .is_read_input(&decoded_ins.opcode) + { + decoded_ins.opcode = Opcode::from(BuiltinOpcode::LW); + decoded_ins.encode() + } else if emulator_harvard + .executor + .instruction_executor + .is_write_output(&decoded_ins.opcode) + { + decoded_ins.opcode = Opcode::from(BuiltinOpcode::SW); + decoded_ins.encode() + } else { + *instr + } + }) + .collect(); + + // Create an optimized memory layout using memory statistics from the first pass. + let memory_layout = emulator_harvard + .memory_stats + .create_optimized_layout( + ((elf.instructions.len() + + WORD_SIZE + + elf.rom_image.len() + + WORD_SIZE + + elf.ram_image.len()) + * WORD_SIZE) as u32, + ad.len() as u32, + public_input.len() as u32, + output_memory.len() as u32, + ) + .unwrap(); + + Ok(Self::from_elf( + memory_layout, + ad, + elf, + public_input.as_slice(), + private_input, + )) } /// Creates a Linear Emulator from an ELF file. @@ -449,9 +545,9 @@ impl LinearEmulator { /// layout is not compatible with the ELF file. pub fn from_elf( memory_layout: LinearMemoryLayout, - ad: &[u32], + ad: &[u8], elf: ElfFile, - public_input: &[u32], + public_input: &[u8], private_input: &[u8], ) -> Self { let mut memory = UnifiedMemory::default(); @@ -502,14 +598,17 @@ impl LinearEmulator { let _ = memory.add_fixed_rw(&data_memory).unwrap(); } + // Add the public input length to the beginning of the public input. + let len_bytes = (word_align!(public_input.len()) / WORD_SIZE) as u32; + let public_input_with_len = [&len_bytes.to_le_bytes()[..], public_input].concat(); + let input_len = (memory_layout.public_input_end() - memory_layout.public_input_start()) as usize; - assert_eq!(input_len, WORD_SIZE + (public_input.len() * WORD_SIZE)); + assert_eq!(word_align!(public_input_with_len.len()), input_len); if input_len > 0 { - let input_memory = FixedMemory::::from_slice( + let input_memory = FixedMemory::::from_bytes( memory_layout.public_input_start(), - input_len, - &[&[public_input.len() as u32; 1], public_input].concat(), + &public_input_with_len, ); let _ = memory.add_fixed_ro(&input_memory).unwrap(); } @@ -535,15 +634,15 @@ impl LinearEmulator { let _ = memory.add_fixed_rw(&stack_memory).unwrap(); let ad_len = (memory_layout.ad_end() - memory_layout.ad_start()) as usize; - assert_eq!(ad_len, ad.len() * WORD_SIZE); + assert_eq!(ad_len, ad.len()); if ad_len > 0 { - let ad_memory = FixedMemory::::from_slice(memory_layout.ad_start(), ad_len, ad); + let ad_memory = FixedMemory::::from_bytes(memory_layout.ad_start(), ad); let _ = memory.add_fixed_na(&ad_memory).unwrap(); } // Add the public input and public output start locations. memory - .add_fixed_ro(&FixedMemory::::from_slice( + .add_fixed_ro(&FixedMemory::::from_words( 0x80, 8, &[ @@ -571,12 +670,12 @@ impl LinearEmulator { } /// Returns the output memory segment. - pub fn get_output(&self) -> Result<&[u32], MemoryError> { - self.memory.segment( + pub fn get_output(&self) -> Result, MemoryError> { + Ok(self.memory.segment_bytes( (Modes::WO as usize, 0), self.memory_layout.public_output_start(), Some(self.memory_layout.public_output_end()), - ) + )?) } /// Creates a Linear Emulator from a basic block IR, for simple testing purposes. diff --git a/vm/src/emulator/layout.rs b/vm/src/emulator/layout.rs index a93f9f41..66162bb1 100644 --- a/vm/src/emulator/layout.rs +++ b/vm/src/emulator/layout.rs @@ -1,5 +1,6 @@ use crate::error::{Result, VMError}; -use crate::WORD_SIZE; +use nexus_common::constants::WORD_SIZE; +use nexus_common::word_align; use serde::{Deserialize, Serialize}; // see runtime @@ -48,9 +49,9 @@ impl LinearMemoryLayout { // enforce order if self.ad_start() <= self.program_start() - || self.program_start() <= self.public_output_start() + || self.program_start() < self.public_output_start() // Allow empty output || self.public_output_start() <= self.panic() - || self.panic() <= self.public_input_start() + || self.panic() < self.public_input_start() // Allow empty input || self.public_input_start() <= self.stack_top() || self.stack_top() <= self.stack_bottom() || self.stack_bottom() <= self.gap_start() @@ -103,8 +104,8 @@ impl LinearMemoryLayout { let ml = Self::new_unchecked( max_heap_size, max_stack_size, - public_input_size, - public_output_size, + word_align!(public_input_size as usize) as u32, + word_align!(public_output_size as usize) as u32, program_size, ad_size, ); diff --git a/vm/src/emulator/memory_stats.rs b/vm/src/emulator/memory_stats.rs new file mode 100644 index 00000000..0c02400f --- /dev/null +++ b/vm/src/emulator/memory_stats.rs @@ -0,0 +1,157 @@ +use crate::emulator::layout::LinearMemoryLayout; +use crate::{ + error::Result, + memory::{LoadOp, StoreOp}, +}; +use std::cmp::{max, min}; +use std::collections::HashSet; + +#[derive(Debug)] +pub struct MemoryStats { + pub max_heap_access: u32, + pub min_stack_access: u32, + heap_bottom: u32, + stack_top: u32, +} + +impl Default for MemoryStats { + fn default() -> Self { + Self::new(0, 0) + } +} + +impl MemoryStats { + pub fn new(heap_bottom: u32, stack_top: u32) -> Self { + Self { + max_heap_access: 0, + min_stack_access: u32::MAX, + heap_bottom, + stack_top, + } + } + + /// Update the memory stats based on load and store operations. + pub fn update( + &mut self, + load_ops: HashSet, + store_ops: HashSet, + stack_pointer: u32, + ) -> Result<()> { + // Collect all memory accesses. + let memory_accesses: HashSet = load_ops + .iter() + .map(|op| op.get_address()) + .chain(store_ops.iter().map(|op| op.get_address())) + .collect(); + + // Find the highest memory access in the heap. + self.max_heap_access = max( + self.max_heap_access, + *memory_accesses + .iter() + .filter(|&addr| addr < &stack_pointer) + .max() + .unwrap_or(&0), + ); + + // For safety, we just check the stack pointer directly rather than looking for the lowest memory access. + // This ensures we respect the full stack frame that was reserved, even if not all of it is used. + // We could optimize this in the future by tracking actual stack accesses if needed. + self.min_stack_access = min(self.min_stack_access, stack_pointer); + Ok(()) + } + + /// Create an optimized linear memory layout based on the memory stats. + pub fn create_optimized_layout( + &self, + program_size: u32, + ad_size: u32, + input_size: u32, + output_size: u32, + ) -> Result { + LinearMemoryLayout::new( + self.max_heap_access - self.heap_bottom, + self.stack_top - self.min_stack_access, + input_size, + output_size, + program_size, + ad_size, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::memory::{LoadOp, MemAccessSize, StoreOp}; + + #[test] + fn test_update_data_region() { + let mut sizes = MemoryStats::new(0, 0); + let mut load_ops = HashSet::new(); + let mut store_ops = HashSet::new(); + let stack_pointer = 1000; + + // Heap accesses (below stack pointer). + load_ops.insert(LoadOp::Op(MemAccessSize::Word, 500, 0)); + store_ops.insert(StoreOp::Op(MemAccessSize::Word, 600, 0, 0)); + + // Stack accesses (above stack pointer). + load_ops.insert(LoadOp::Op(MemAccessSize::Word, 1100, 0)); + store_ops.insert(StoreOp::Op(MemAccessSize::Word, 1200, 0, 0)); + + sizes.update(load_ops, store_ops, stack_pointer).unwrap(); + assert_eq!(sizes.max_heap_access, 600); + assert_eq!(sizes.min_stack_access, 1000); + } + + #[test] + fn test_create_optimized_layout() { + let mut stats = MemoryStats::new(0, 1000000); + let stack_pointer = 3000; + + // Create heap accesses (below stack pointer). + let mut load_ops = HashSet::new(); + load_ops.insert(LoadOp::Op(MemAccessSize::Word, 1000, 0)); + load_ops.insert(LoadOp::Op(MemAccessSize::Word, 800, 0)); + + // Create stack accesses (above stack pointer). + let mut store_ops = HashSet::new(); + store_ops.insert(StoreOp::Op(MemAccessSize::Word, 3000, 0, 0)); + store_ops.insert(StoreOp::Op(MemAccessSize::Word, 3500, 0, 0)); + + // Update data region (heap and stack). + stats + .update( + load_ops.iter().cloned().collect(), + store_ops.iter().cloned().collect(), + stack_pointer, + ) + .unwrap(); + + let mut more_load_ops = HashSet::new(); + more_load_ops.insert(LoadOp::Op(MemAccessSize::Word, 500, 0)); + stats + .update(more_load_ops, HashSet::new(), stack_pointer) + .unwrap(); + + let mut more_store_ops = HashSet::new(); + more_store_ops.insert(StoreOp::Op(MemAccessSize::Word, 800, 0, 0)); + stats + .update(HashSet::new(), more_store_ops, stack_pointer) + .unwrap(); + + let program_size = 300; + let ad_size = 100; + + let layout = stats + .create_optimized_layout(program_size, ad_size, 0, 0) + .unwrap(); + + assert_eq!(layout.heap_end(), 5096); + assert_eq!(layout.stack_bottom(), 9192); + assert_eq!(layout.stack_top(), 1006188); + assert_eq!(layout.public_input_end(), 1006196); + assert_eq!(layout.public_output_end(), 1006200); + } +} diff --git a/vm/src/emulator/mod.rs b/vm/src/emulator/mod.rs index c05f1f86..3b8c4395 100644 --- a/vm/src/emulator/mod.rs +++ b/vm/src/emulator/mod.rs @@ -1,5 +1,6 @@ mod executor; mod layout; +mod memory_stats; mod registry; pub use executor::{Emulator, Executor, HarvardEmulator, LinearEmulator, MemoryTranscript}; diff --git a/vm/src/emulator/registry.rs b/vm/src/emulator/registry.rs index 306e6bf4..e6924ec9 100644 --- a/vm/src/emulator/registry.rs +++ b/vm/src/emulator/registry.rs @@ -305,7 +305,7 @@ impl InstructionExecutorRegistry { op: &Opcode, ) -> Option> { // Opcode will be parsed dynamically so the name will be different. - if op.raw() == self.read_input.raw() && op.fn3() == self.read_input.fn3() { + if self.is_read_input(op) { // Interpret `rin` as `lw`. return Some(instructions::LwInstruction::evaluator as InstructionExecutorFn); } @@ -318,11 +318,21 @@ impl InstructionExecutorRegistry { op: &Opcode, ) -> Option> { // Opcode will be parsed dynamically so the name will be different. - if op.raw() == self.write_output.raw() && op.fn3() == self.write_output.fn3() { + if self.is_write_output(op) { // Interpret `wou` as `sw`. return Some(instructions::SwInstruction::evaluator as InstructionExecutorFn); } None } + + #[inline(always)] + pub fn is_read_input(&self, op: &Opcode) -> bool { + op.raw() == self.read_input.raw() && op.fn3() == self.read_input.fn3() + } + + #[inline(always)] + pub fn is_write_output(&self, op: &Opcode) -> bool { + op.raw() == self.write_output.raw() && op.fn3() == self.write_output.fn3() + } } diff --git a/vm/src/memory/fixed.rs b/vm/src/memory/fixed.rs index bd754cc2..a7df0c1b 100644 --- a/vm/src/memory/fixed.rs +++ b/vm/src/memory/fixed.rs @@ -1,7 +1,8 @@ use std::{fmt::Debug, marker::PhantomData}; -use crate::WORD_SIZE; +use nexus_common::constants::WORD_SIZE; use nexus_common::error::MemoryError; +use nexus_common::{bytes_to_words, word_align, words_to_bytes}; use super::{LoadOp, MemAccessSize, MemoryProcessor, Mode, StoreOp, NA, RO, RW, WO}; @@ -64,8 +65,17 @@ impl FixedMemory { } } - pub fn from_slice(base_address: u32, max_len: usize, slice: &[u32]) -> Self { - let mut vec = slice.to_vec(); + pub fn from_bytes(base_address: u32, bytes: &[u8]) -> Self { + FixedMemory:: { + base_address, + max_len: word_align!(bytes.len()), + vec: bytes_to_words!(bytes), + __mode: PhantomData, + } + } + + pub fn from_words(base_address: u32, max_len: usize, words: &[u32]) -> Self { + let mut vec = words.to_vec(); vec.truncate(max_len / WORD_SIZE); FixedMemory:: { @@ -86,6 +96,10 @@ impl FixedMemory { &self.vec[s as usize..] } } + + pub fn segment_bytes(&self, start: u32, end: Option) -> Vec { + words_to_bytes!(self.segment(start, end)) + } } impl FixedMemory { diff --git a/vm/src/memory/unified.rs b/vm/src/memory/unified.rs index 144a7675..4b024742 100644 --- a/vm/src/memory/unified.rs +++ b/vm/src/memory/unified.rs @@ -4,6 +4,8 @@ use num_traits::FromPrimitive; use rangemap::RangeMap; use std::fmt::{Debug, Display, Formatter, Result as FmtResult}; +use nexus_common::words_to_bytes; + use super::{ FixedMemory, LoadOp, MemAccessSize, MemoryProcessor, StoreOp, VariableMemory, NA, RO, RW, WO, }; @@ -205,6 +207,15 @@ impl UnifiedMemory { _ => Err(MemoryError::UndefinedMemoryRegion), } } + + pub fn segment_bytes( + &self, + uidx: (usize, usize), + start: u32, + end: Option, + ) -> Result, MemoryError> { + Ok(words_to_bytes!(self.segment(uidx, start, end)?)) + } } impl MemoryProcessor for UnifiedMemory { diff --git a/vm/src/memory/variable.rs b/vm/src/memory/variable.rs index 9ba18318..7798855f 100644 --- a/vm/src/memory/variable.rs +++ b/vm/src/memory/variable.rs @@ -2,8 +2,9 @@ use std::collections::BTreeMap; use std::fmt::{Debug, Formatter, Result as FmtResult}; use std::marker::PhantomData; -use crate::WORD_SIZE; +use nexus_common::constants::WORD_SIZE; use nexus_common::error::MemoryError; +use nexus_common::words_to_bytes; use super::{LoadOp, MemAccessSize, MemoryProcessor, Mode, StoreOp, RO, RW, WO}; @@ -153,6 +154,10 @@ impl VariableMemory { Ok(values) } } + + pub fn segment_bytes(&self, start: u32, end: Option) -> Result, MemoryError> { + Ok(words_to_bytes!(self.segment(start, end)?)) + } } impl MemoryProcessor for VariableMemory { diff --git a/vm/src/riscv/decoder.rs b/vm/src/riscv/decoder.rs index 4f5e7835..3963069c 100644 --- a/vm/src/riscv/decoder.rs +++ b/vm/src/riscv/decoder.rs @@ -49,47 +49,6 @@ use crate::riscv::instructions::{BasicBlock, BasicBlockProgram, Instruction, Ins use nexus_common::riscv::{instruction::InstructionType, Opcode}; use rrs_lib::process_instruction; -/// Decodes RISC-V instructions from an ELF file into basic blocks -/// -/// # Arguments -/// -/// * `u32_instructions` - A slice of u32 values representing RISC-V instructions -/// -/// # Returns -/// -/// A `BasicBlockProgram` containing the decoded instructions organized into basic blocks -pub fn decode_instructions(u32_instructions: &[u32]) -> BasicBlockProgram { - let mut program = BasicBlockProgram::default(); - let mut current_block = BasicBlock::default(); - let mut decoder = InstructionDecoder; - let mut start_new_block = true; - - for &u32_instruction in u32_instructions.iter() { - // Decode the instruction, if the instruction is unrecognizable, it will be marked as unimplemented. - let decoded_instruction = - process_instruction(&mut decoder, u32_instruction).unwrap_or_else(Instruction::unimpl); - - // Start a new basic block if necessary - if start_new_block && !current_block.0.is_empty() { - program.blocks.push(current_block); - current_block = BasicBlock::default(); - } - - // Check if the next instruction should start a new basic block - start_new_block = decoded_instruction.is_branch_or_jump_instruction(); - - // Add the decoded instruction to the current basic block - current_block.0.push(decoded_instruction); - } - - // Add the last block if it's not empty - if !current_block.0.is_empty() { - program.blocks.push(current_block); - } - - program -} - #[inline(always)] fn extract_opcode(u32_instruction: u32) -> u8 { const OPCODE_MASK: u32 = 0x7F; // 7 least significant bits (6-0) @@ -154,54 +113,96 @@ const DYNAMIC_RTYPE_OPCODE: u8 = 0b0001011; const DYNAMIC_STYPE_OPCODE: u8 = 0b1011011; const DYNAMIC_ITYPE_OPCODE: u8 = 0b0101011; +pub fn decode_instruction(u32_instruction: u32) -> Instruction { + let mut decoder = InstructionDecoder; + // Decode the instruction + process_instruction(&mut decoder, u32_instruction).unwrap_or_else(|| { + // The rrs_lib instruction decoding doesn't have support for custom instructions, + // so we need to handle them more as an error condition. + let opcode = extract_opcode(u32_instruction); + let fn3 = extract_fn3(u32_instruction); + let fn7 = extract_fn7(u32_instruction); + let rs1 = extract_rs1(u32_instruction); + let rs2 = extract_rs2(u32_instruction); + let rd = extract_rd(u32_instruction); + let i_imm = extract_i_imm(u32_instruction); + let s_imm = extract_s_imm(u32_instruction); + + if opcode == DYNAMIC_ITYPE_OPCODE { + Instruction::new( + Opcode::new(opcode, Some(fn3), None, "dynamic"), + rd, + rs1, + i_imm, + InstructionType::IType, + ) + } else if opcode == DYNAMIC_STYPE_OPCODE { + Instruction::new( + Opcode::new(opcode, Some(fn3), None, "dynamic"), + rs1, + rs2, + s_imm, + InstructionType::SType, + ) + } else if opcode == DYNAMIC_RTYPE_OPCODE { + Instruction::new( + Opcode::new(opcode, Some(fn3), Some(fn7), "dynamic"), + rd, + rs1, + rs2.into(), + InstructionType::RType, + ) + } else { + // Only support the single dynamic R-type, S-type, and I-type opcodes. + Instruction::unimpl() + } + }) +} + +/// Decodes RISC-V instructions from an ELF file into basic blocks +/// +/// # Arguments +/// +/// * `u32_instructions` - A slice of u32 values representing RISC-V instructions +/// +/// # Returns +/// +/// A `BasicBlockProgram` containing the decoded instructions organized into basic blocks +pub fn decode_instructions(u32_instructions: &[u32]) -> BasicBlockProgram { + let mut program = BasicBlockProgram::default(); + let mut current_block = BasicBlock::default(); + let mut start_new_block = true; + + for &u32_instruction in u32_instructions.iter() { + // Decode the instruction, if the instruction is unrecognizable, it will be marked as unimplemented. + let decoded_instruction = decode_instruction(u32_instruction); + + // Start a new basic block if necessary + if start_new_block && !current_block.0.is_empty() { + program.blocks.push(current_block); + current_block = BasicBlock::default(); + } + + // Check if the next instruction should start a new basic block + start_new_block = decoded_instruction.is_branch_or_jump_instruction(); + + // Add the decoded instruction to the current basic block + current_block.0.push(decoded_instruction); + } + + // Add the last block if it's not empty + if !current_block.0.is_empty() { + program.blocks.push(current_block); + } + + program +} + pub fn decode_until_end_of_a_block(u32_instructions: &[u32]) -> BasicBlock { let mut block = BasicBlock::default(); - let mut decoder = InstructionDecoder; for &u32_instruction in u32_instructions.iter() { - // Decode the instruction - let decoded_instruction = process_instruction(&mut decoder, u32_instruction) - .unwrap_or_else(|| { - // The rrs_lib instruction decoding doesn't have support for custom instructions, - // so we need to handle them more as an error condition. - let opcode = extract_opcode(u32_instruction); - let fn3 = extract_fn3(u32_instruction); - let fn7 = extract_fn7(u32_instruction); - let rs1 = extract_rs1(u32_instruction); - let rs2 = extract_rs2(u32_instruction); - let rd = extract_rd(u32_instruction); - let i_imm = extract_i_imm(u32_instruction); - let s_imm = extract_s_imm(u32_instruction); - - if opcode == DYNAMIC_ITYPE_OPCODE { - Instruction::new( - Opcode::new(opcode, Some(fn3), None, "dynamic"), - rd, - rs1, - i_imm, - InstructionType::IType, - ) - } else if opcode == DYNAMIC_STYPE_OPCODE { - Instruction::new( - Opcode::new(opcode, Some(fn3), None, "dynamic"), - rs1, - rs2, - s_imm, - InstructionType::SType, - ) - } else if opcode == DYNAMIC_RTYPE_OPCODE { - Instruction::new( - Opcode::new(opcode, Some(fn3), Some(fn7), "dynamic"), - rd, - rs1, - rs2.into(), - InstructionType::RType, - ) - } else { - // Only support the single dynamic R-type, S-type, and I-type opcodes. - Instruction::unimpl() - } - }); + let decoded_instruction = decode_instruction(u32_instruction); let pc_changed = decoded_instruction.is_branch_or_jump_instruction(); @@ -219,7 +220,7 @@ pub fn decode_until_end_of_a_block(u32_instructions: &[u32]) -> BasicBlock { mod tests { use super::*; use crate::elf::ElfFile; - use crate::WORD_SIZE; + use nexus_common::constants::WORD_SIZE; /// Tests the decoding of instructions from an ELF file /// diff --git a/vm/src/riscv/instructions/basic_block.rs b/vm/src/riscv/instructions/basic_block.rs index f426e070..3b42e3c5 100644 --- a/vm/src/riscv/instructions/basic_block.rs +++ b/vm/src/riscv/instructions/basic_block.rs @@ -101,7 +101,7 @@ impl Display for BasicBlockProgram { mod tests { use crate::elf::ElfFile; use crate::riscv::{decode_instructions, decode_until_end_of_a_block}; - use crate::WORD_SIZE; + use nexus_common::constants::WORD_SIZE; #[test] fn test_encode_decode_consistency_in_a_block() { diff --git a/vm/src/riscv/mod.rs b/vm/src/riscv/mod.rs index c2dee449..07fa4e2f 100644 --- a/vm/src/riscv/mod.rs +++ b/vm/src/riscv/mod.rs @@ -1,7 +1,7 @@ pub(crate) mod decoder; pub(crate) mod instructions; -pub use decoder::{decode_instructions, decode_until_end_of_a_block}; +pub use decoder::{decode_instruction, decode_instructions, decode_until_end_of_a_block}; pub use instructions::{ BasicBlock, BasicBlockProgram, BuiltinOpcode, Instruction, InstructionType, Opcode, }; diff --git a/vm/src/trace.rs b/vm/src/trace.rs index 6e3d0675..673e9be9 100644 --- a/vm/src/trace.rs +++ b/vm/src/trace.rs @@ -274,8 +274,8 @@ fn k_step(vm: &mut LinearEmulator, k: usize) -> (Option, Result<()>) { /// These padded instructions are not executed in the VM. pub fn k_trace( elf: ElfFile, - ad_hash: &[u32], - public_input: &[u32], + ad_hash: &[u8], + public_input: &[u8], private_input: &[u8], k: usize, ) -> Result { @@ -388,8 +388,8 @@ fn bb_step(vm: &mut LinearEmulator) -> (Option, Result<()>) { /// Trace a program over basic blocks. pub fn bb_trace( elf: ElfFile, - ad_hash: &[u32], - public_input: &[u32], + ad_hash: &[u8], + public_input: &[u8], private_input: &[u8], ) -> Result { // todo: get memory segment using a first-pass trace