diff --git a/src/Makefile b/src/Makefile index 0581ca1bb..bc89cfb97 100644 --- a/src/Makefile +++ b/src/Makefile @@ -229,11 +229,16 @@ HASH_CFLAGS=-O3 -DNDEBUG -funroll-loops -fno-stack-protector # Optimization flags for the interpreter ifneq (,$(filter yes,$(relwithdebinfo) $(release))) +# Enable strict aliasing otherwise the optimizer may generate indirections to the jump table +# This is fine, because the interpreter is written to comply with strict aliasing rules +INTERPRET_CXXFLAGS+=-fstrict-aliasing +# Ensure frame pointers are omited to free an extra register (some distributions enable them by default) +INTERPTER_CXXFLAGS+=-fomit-frame-pointer ifneq (,$(findstring gcc,$(CC))) # The following improves computed goto dispatch as stated in GCC manual INTERPRET_CXXFLAGS+=-fno-gcse # The following remove extra jumps in the computed goto dispatch -# INTERPRET_CXXFLAGS+=-fno-crossjumping +INTERPRET_CXXFLAGS+=-fno-crossjumping # The interpreter dispatch loop performs better as a big inlined function INTERPRET_CXXFLAGS+=-finline-limit=1024 # The interpreter hot loop is big and puts pressure on register allocation, this improves register use diff --git a/src/collect-mcycle-hashes-state-access.h b/src/collect-mcycle-hashes-state-access.h index 595db354f..c934969b7 100644 --- a/src/collect-mcycle-hashes-state-access.h +++ b/src/collect-mcycle-hashes-state-access.h @@ -58,8 +58,9 @@ class collect_mcycle_hashes_state_access : private: // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) - context &m_c; ///< context for dirty words - machine &m_m; ///< reference to machine + processor_state &m_s; ///< reference to processor state + context &m_c; ///< context for dirty words + machine &m_m; ///< reference to machine // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) public: @@ -67,7 +68,7 @@ class collect_mcycle_hashes_state_access : /// \param context Context for the recording with the log filename /// \param m reference to machine /// \details The log file is saved when finish() is called - collect_mcycle_hashes_state_access(context &c, machine &m) : m_c(c), m_m(m) {} + collect_mcycle_hashes_state_access(context &c, machine &m) : m_s(m.get_state()), m_c(c), m_m(m) {} private: using fast_addr_type = host_addr; @@ -87,44 +88,44 @@ class collect_mcycle_hashes_state_access : friend i_state_access; uint64_t do_read_x(int i) const { - return m_m.get_state().shadow.registers.x[i]; + return m_s.shadow.registers.x[i]; } void do_write_x(int i, uint64_t val) const { assert(i != 0); - m_m.get_state().shadow.registers.x[i] = val; + m_s.shadow.registers.x[i] = val; } uint64_t do_read_f(int i) const { - return m_m.get_state().shadow.registers.f[i]; + return m_s.shadow.registers.f[i]; } void do_write_f(int i, uint64_t val) const { - m_m.get_state().shadow.registers.f[i] = val; + m_s.shadow.registers.f[i] = val; } uint64_t do_read_pc() const { - return m_m.get_state().shadow.registers.pc; + return m_s.shadow.registers.pc; } void do_write_pc(uint64_t val) const { - m_m.get_state().shadow.registers.pc = val; + m_s.shadow.registers.pc = val; } uint64_t do_read_fcsr() const { - return m_m.get_state().shadow.registers.fcsr; + return m_s.shadow.registers.fcsr; } void do_write_fcsr(uint64_t val) const { - m_m.get_state().shadow.registers.fcsr = val; + m_s.shadow.registers.fcsr = val; } uint64_t do_read_icycleinstret() const { - return m_m.get_state().shadow.registers.icycleinstret; + return m_s.shadow.registers.icycleinstret; } void do_write_icycleinstret(uint64_t val) const { - m_m.get_state().shadow.registers.icycleinstret = val; + m_s.shadow.registers.icycleinstret = val; } uint64_t do_read_mvendorid() const { // NOLINT(readability-convert-member-functions-to-static) @@ -140,11 +141,11 @@ class collect_mcycle_hashes_state_access : } uint64_t do_read_mcycle() const { - return m_m.get_state().shadow.registers.mcycle; + return m_s.shadow.registers.mcycle; } void do_write_mcycle(uint64_t val) const { - m_m.get_state().shadow.registers.mcycle = val; + m_s.shadow.registers.mcycle = val; // Only mcycle writes mark all registers as dirty. // This is done for efficiency, since the interpreter writes mcycle only when exiting its loop. @@ -157,271 +158,271 @@ class collect_mcycle_hashes_state_access : } uint64_t do_read_mstatus() const { - return m_m.get_state().shadow.registers.mstatus; + return m_s.shadow.registers.mstatus; } void do_write_mstatus(uint64_t val) const { - m_m.get_state().shadow.registers.mstatus = val; + m_s.shadow.registers.mstatus = val; } uint64_t do_read_menvcfg() const { - return m_m.get_state().shadow.registers.menvcfg; + return m_s.shadow.registers.menvcfg; } void do_write_menvcfg(uint64_t val) const { - m_m.get_state().shadow.registers.menvcfg = val; + m_s.shadow.registers.menvcfg = val; } uint64_t do_read_mtvec() const { - return m_m.get_state().shadow.registers.mtvec; + return m_s.shadow.registers.mtvec; } void do_write_mtvec(uint64_t val) const { - m_m.get_state().shadow.registers.mtvec = val; + m_s.shadow.registers.mtvec = val; } uint64_t do_read_mscratch() const { - return m_m.get_state().shadow.registers.mscratch; + return m_s.shadow.registers.mscratch; } void do_write_mscratch(uint64_t val) const { - m_m.get_state().shadow.registers.mscratch = val; + m_s.shadow.registers.mscratch = val; } uint64_t do_read_mepc() const { - return m_m.get_state().shadow.registers.mepc; + return m_s.shadow.registers.mepc; } void do_write_mepc(uint64_t val) const { - m_m.get_state().shadow.registers.mepc = val; + m_s.shadow.registers.mepc = val; } uint64_t do_read_mcause() const { - return m_m.get_state().shadow.registers.mcause; + return m_s.shadow.registers.mcause; } void do_write_mcause(uint64_t val) const { - m_m.get_state().shadow.registers.mcause = val; + m_s.shadow.registers.mcause = val; } uint64_t do_read_mtval() const { - return m_m.get_state().shadow.registers.mtval; + return m_s.shadow.registers.mtval; } void do_write_mtval(uint64_t val) const { - m_m.get_state().shadow.registers.mtval = val; + m_s.shadow.registers.mtval = val; } uint64_t do_read_misa() const { - return m_m.get_state().shadow.registers.misa; + return m_s.shadow.registers.misa; } void do_write_misa(uint64_t val) const { - m_m.get_state().shadow.registers.misa = val; + m_s.shadow.registers.misa = val; } uint64_t do_read_mie() const { - return m_m.get_state().shadow.registers.mie; + return m_s.shadow.registers.mie; } void do_write_mie(uint64_t val) const { - m_m.get_state().shadow.registers.mie = val; + m_s.shadow.registers.mie = val; } uint64_t do_read_mip() const { - return m_m.get_state().shadow.registers.mip; + return m_s.shadow.registers.mip; } void do_write_mip(uint64_t val) const { - m_m.get_state().shadow.registers.mip = val; + m_s.shadow.registers.mip = val; } uint64_t do_read_medeleg() const { - return m_m.get_state().shadow.registers.medeleg; + return m_s.shadow.registers.medeleg; } void do_write_medeleg(uint64_t val) const { - m_m.get_state().shadow.registers.medeleg = val; + m_s.shadow.registers.medeleg = val; } uint64_t do_read_mideleg() const { - return m_m.get_state().shadow.registers.mideleg; + return m_s.shadow.registers.mideleg; } void do_write_mideleg(uint64_t val) const { - m_m.get_state().shadow.registers.mideleg = val; + m_s.shadow.registers.mideleg = val; } uint64_t do_read_mcounteren() const { - return m_m.get_state().shadow.registers.mcounteren; + return m_s.shadow.registers.mcounteren; } void do_write_mcounteren(uint64_t val) const { - m_m.get_state().shadow.registers.mcounteren = val; + m_s.shadow.registers.mcounteren = val; } uint64_t do_read_senvcfg() const { - return m_m.get_state().shadow.registers.senvcfg; + return m_s.shadow.registers.senvcfg; } void do_write_senvcfg(uint64_t val) const { - m_m.get_state().shadow.registers.senvcfg = val; + m_s.shadow.registers.senvcfg = val; } uint64_t do_read_stvec() const { - return m_m.get_state().shadow.registers.stvec; + return m_s.shadow.registers.stvec; } void do_write_stvec(uint64_t val) const { - m_m.get_state().shadow.registers.stvec = val; + m_s.shadow.registers.stvec = val; } uint64_t do_read_sscratch() const { - return m_m.get_state().shadow.registers.sscratch; + return m_s.shadow.registers.sscratch; } void do_write_sscratch(uint64_t val) const { - m_m.get_state().shadow.registers.sscratch = val; + m_s.shadow.registers.sscratch = val; } uint64_t do_read_sepc() const { - return m_m.get_state().shadow.registers.sepc; + return m_s.shadow.registers.sepc; } void do_write_sepc(uint64_t val) const { - m_m.get_state().shadow.registers.sepc = val; + m_s.shadow.registers.sepc = val; } uint64_t do_read_scause() const { - return m_m.get_state().shadow.registers.scause; + return m_s.shadow.registers.scause; } void do_write_scause(uint64_t val) const { - m_m.get_state().shadow.registers.scause = val; + m_s.shadow.registers.scause = val; } uint64_t do_read_stval() const { - return m_m.get_state().shadow.registers.stval; + return m_s.shadow.registers.stval; } void do_write_stval(uint64_t val) const { - m_m.get_state().shadow.registers.stval = val; + m_s.shadow.registers.stval = val; } uint64_t do_read_satp() const { - return m_m.get_state().shadow.registers.satp; + return m_s.shadow.registers.satp; } void do_write_satp(uint64_t val) const { - m_m.get_state().shadow.registers.satp = val; + m_s.shadow.registers.satp = val; } uint64_t do_read_scounteren() const { - return m_m.get_state().shadow.registers.scounteren; + return m_s.shadow.registers.scounteren; } void do_write_scounteren(uint64_t val) const { - m_m.get_state().shadow.registers.scounteren = val; + m_s.shadow.registers.scounteren = val; } uint64_t do_read_ilrsc() const { - return m_m.get_state().shadow.registers.ilrsc; + return m_s.shadow.registers.ilrsc; } void do_write_ilrsc(uint64_t val) const { - m_m.get_state().shadow.registers.ilrsc = val; + m_s.shadow.registers.ilrsc = val; } uint64_t do_read_iprv() const { - return m_m.get_state().shadow.registers.iprv; + return m_s.shadow.registers.iprv; } void do_write_iprv(uint64_t val) const { - m_m.get_state().shadow.registers.iprv = val; + m_s.shadow.registers.iprv = val; } uint64_t do_read_iflags_X() const { - return m_m.get_state().shadow.registers.iflags.X; + return m_s.shadow.registers.iflags.X; } void do_write_iflags_X(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.X = val; + m_s.shadow.registers.iflags.X = val; } uint64_t do_read_iflags_Y() const { - return m_m.get_state().shadow.registers.iflags.Y; + return m_s.shadow.registers.iflags.Y; } void do_write_iflags_Y(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.Y = val; + m_s.shadow.registers.iflags.Y = val; } uint64_t do_read_iflags_H() const { - return m_m.get_state().shadow.registers.iflags.H; + return m_s.shadow.registers.iflags.H; } void do_write_iflags_H(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.H = val; + m_s.shadow.registers.iflags.H = val; } uint64_t do_read_iunrep() const { - return m_m.get_state().shadow.registers.iunrep; + return m_s.shadow.registers.iunrep; } void do_write_iunrep(uint64_t val) const { - m_m.get_state().shadow.registers.iunrep = val; + m_s.shadow.registers.iunrep = val; } uint64_t do_read_clint_mtimecmp() const { - return m_m.get_state().shadow.registers.clint.mtimecmp; + return m_s.shadow.registers.clint.mtimecmp; } void do_write_clint_mtimecmp(uint64_t val) const { - m_m.get_state().shadow.registers.clint.mtimecmp = val; + m_s.shadow.registers.clint.mtimecmp = val; } uint64_t do_read_plic_girqpend() const { - return m_m.get_state().shadow.registers.plic.girqpend; + return m_s.shadow.registers.plic.girqpend; } void do_write_plic_girqpend(uint64_t val) const { - m_m.get_state().shadow.registers.plic.girqpend = val; + m_s.shadow.registers.plic.girqpend = val; } uint64_t do_read_plic_girqsrvd() const { - return m_m.get_state().shadow.registers.plic.girqsrvd; + return m_s.shadow.registers.plic.girqsrvd; } void do_write_plic_girqsrvd(uint64_t val) const { - m_m.get_state().shadow.registers.plic.girqsrvd = val; + m_s.shadow.registers.plic.girqsrvd = val; } uint64_t do_read_htif_fromhost() const { - return m_m.get_state().shadow.registers.htif.fromhost; + return m_s.shadow.registers.htif.fromhost; } void do_write_htif_fromhost(uint64_t val) const { - m_m.get_state().shadow.registers.htif.fromhost = val; + m_s.shadow.registers.htif.fromhost = val; } uint64_t do_read_htif_tohost() const { - return m_m.get_state().shadow.registers.htif.tohost; + return m_s.shadow.registers.htif.tohost; } void do_write_htif_tohost(uint64_t val) const { - m_m.get_state().shadow.registers.htif.tohost = val; + m_s.shadow.registers.htif.tohost = val; } uint64_t do_read_htif_ihalt() const { - return m_m.get_state().shadow.registers.htif.ihalt; + return m_s.shadow.registers.htif.ihalt; } uint64_t do_read_htif_iconsole() const { - return m_m.get_state().shadow.registers.htif.iconsole; + return m_s.shadow.registers.htif.iconsole; } uint64_t do_read_htif_iyield() const { - return m_m.get_state().shadow.registers.htif.iyield; + return m_s.shadow.registers.htif.iyield; } // NOLINTNEXTLINE(readability-convert-member-functions-to-static) @@ -459,18 +460,18 @@ class collect_mcycle_hashes_state_access : template uint64_t do_read_tlb_vaddr_page(uint64_t slot_index) const { - return m_m.get_state().penumbra.tlb[SET][slot_index].vaddr_page; + return m_s.penumbra.tlb[SET][slot_index].vaddr_page; } template uint64_t do_read_tlb_pma_index(uint64_t slot_index) const { - return m_m.get_state().shadow.tlb[SET][slot_index].pma_index; + return m_s.shadow.tlb[SET][slot_index].pma_index; } //??D This is still a bit too complicated for my taste template host_addr do_read_tlb_vf_offset(uint64_t slot_index) const { - return m_m.get_state().penumbra.tlb[SET][slot_index].vh_offset; + return m_s.penumbra.tlb[SET][slot_index].vh_offset; } //??D This is still a bit too complicated for my taste diff --git a/src/dtb.cpp b/src/dtb.cpp index 349072be4..b5f411610 100644 --- a/src/dtb.cpp +++ b/src/dtb.cpp @@ -20,7 +20,6 @@ #include "dtb.h" #include -#include #include #include @@ -38,17 +37,6 @@ using namespace std::string_literals; namespace cartesi { -static std::string misa_to_isa_string(uint64_t misa) { - std::ostringstream ss; - ss << "rv64"; - for (int i = 0; i < 26; i++) { - if ((misa & (1 << i)) != 0) { - ss << static_cast('a' + i); - } - } - return ss.str(); -} - void dtb_init(const machine_config &c, unsigned char *dtb_start, uint64_t dtb_length) { using namespace std::string_literals; enum : uint32_t { INTC_PHANDLE = 1, PLIC_PHANDLE }; // NOLINT(cppcoreguidelines-use-enum-class) @@ -98,8 +86,8 @@ void dtb_init(const machine_config &c, unsigned char *dtb_start, uint64_t dtb_le fdt.prop_u32("reg", 0); fdt.prop_string("status", "okay"); fdt.prop_string("compatible", "riscv"); - fdt.prop_string("riscv,isa", misa_to_isa_string(c.processor.registers.misa)); - fdt.prop_string("mmu-type", "riscv,sv39"); + fdt.prop_string("riscv,isa", ISA_string); + fdt.prop_string("mmu-type", "riscv,sv48"); fdt.prop_u32("clock-frequency", RTC_CLOCK_FREQ); { // interrupt-controller fdt.begin_node("interrupt-controller"); diff --git a/src/interpret.cpp b/src/interpret.cpp index 5bbf61e52..4fd33e3a5 100644 --- a/src/interpret.cpp +++ b/src/interpret.cpp @@ -804,6 +804,20 @@ static FORCE_INLINE int32_t insn_get_C_LW_C_SW_imm(uint32_t insn) { return static_cast(((insn >> (10 - 3)) & 0x38) | ((insn >> (6 - 2)) & 0x4) | ((insn << (6 - 5)) & 0x40)); } +/// \brief Obtains the immediate value from C.LBU and C.SB instructions (Zcb extension). +/// \param insn Instruction. +/// \details This function is forced to be inline because GCC may not always inline it. +static FORCE_INLINE uint32_t insn_get_C_LS_B_uimm(uint32_t insn) { + return ((insn >> 6) & 0b1) | ((insn >> 4) & 0b10); +} + +/// \brief Obtains the immediate value from C.LHU, C.LH, and C.SH instructions (Zcb extension). +/// \param insn Instruction. +/// \details This function is forced to be inline because GCC may not always inline it. +static FORCE_INLINE uint32_t insn_get_C_LS_H_uimm(uint32_t insn) { + return (insn >> 4) & 0b10; +} + /// \brief Obtains the immediate value from a CIW-type instruction. /// \param insn Instruction. /// \details This function is forced to be inline because GCC may not always inline it. @@ -851,14 +865,12 @@ static FORCE_INLINE int32_t insn_get_C_LWSP_imm(uint32_t insn) { /// \brief Obtains the immediate value from a C.FSDSP and C.SDSP instructions. /// \param insn Instruction. -/// \details This function is forced to be inline because GCC may not always inline it. static FORCE_INLINE int32_t insn_get_C_FSDSP_SDSP_imm(uint32_t insn) { return static_cast(((insn >> (10 - 3)) & 0x38) | ((insn >> (7 - 6)) & 0x1c0)); } /// \brief Obtains the immediate value from a C.SWSP instruction. /// \param insn Instruction. -/// \details This function is forced to be inline because GCC may not always inline it. static FORCE_INLINE int32_t insn_get_C_SWSP_imm(uint32_t insn) { return static_cast(((insn >> (9 - 2)) & 0x3c) | ((insn >> (7 - 6)) & 0xc0)); } @@ -1223,6 +1235,44 @@ static FORCE_INLINE execute_status execute_jump(STATE_ACCESS /*a*/, uint64_t &pc return execute_status::success; } +template +static FORCE_INLINE execute_status execute_unary(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { + const uint32_t rd = insn_get_rd(insn); + const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); + a.write_x(rd, f(rs1)); + return advance_to_next_insn(a, pc); +} + +template +static FORCE_INLINE execute_status execute_arithmetic(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { + const uint32_t rd = insn_get_rd(insn); + // Load rs1 and rs2 separately to ensure evaluation order before calling f() + const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); + const uint64_t rs2 = a.read_x(insn_get_rs2(insn)); + a.write_x(rd, f(rs1, rs2)); + return advance_to_next_insn(a, pc); +} + +template +static FORCE_INLINE execute_status execute_arithmetic_immediate(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, + const F &f) { + const uint32_t rd = insn_get_rd(insn); + const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); + const int32_t imm = insn_I_get_imm(insn); + a.write_x(rd, f(rs1, imm)); + return advance_to_next_insn(a, pc); +} + +template +static FORCE_INLINE execute_status execute_arithmetic_uimmediate(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, + const F &f) { + const uint32_t rd = insn_get_rd(insn); + const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); + const uint32_t uimm = insn_I_get_uimm(insn); + a.write_x(rd, f(rs1, uimm)); + return advance_to_next_insn(a, pc); +} + /// \brief Execute the LR instruction. /// \tparam STATE_ACCESS Class of machine state accessor object. /// \param a Machine state accessor object. @@ -1534,6 +1584,18 @@ static FORCE_INLINE execute_status execute_ADDW(const STATE_ACCESS a, uint64_t & }); } +/// \brief Implementation of the ADD.UW instruction from Zba extension. +/// \details Add unsigned word. +template +static FORCE_INLINE execute_status execute_ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "add.uw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, + [](uint64_t rs1, uint64_t rs2) -> uint64_t { return static_cast(static_cast(rs1)) + rs2; }); +} + /// \brief Implementation of the SUBW instruction. template static FORCE_INLINE execute_status execute_SUBW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { @@ -1554,16 +1616,12 @@ static FORCE_INLINE execute_status execute_SUBW(const STATE_ACCESS a, uint64_t & /// \brief Implementation of the SLLW instruction. template static FORCE_INLINE execute_status execute_SLLW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - if (unlikely((insn & 0b11111110000000000111000001111111) != 0b00000000000000000001000000111011)) { - return raise_illegal_insn_exception(a, pc, insn); - } [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sllw"); if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { - const auto rs1w = static_cast(static_cast(rs1) << (rs2 & 31)); - return static_cast(rs1w); + return static_cast(static_cast(static_cast(rs1) << (rs2 & 0b11111))); }); } @@ -1575,8 +1633,7 @@ static FORCE_INLINE execute_status execute_SRLW(const STATE_ACCESS a, uint64_t & return advance_to_next_insn(a, pc); } return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { - auto rs1w = static_cast(static_cast(rs1) >> (rs2 & 31)); - return static_cast(rs1w); + return static_cast(static_cast(static_cast(rs1) >> (rs2 & 0b11111))); }); } @@ -1588,8 +1645,7 @@ static FORCE_INLINE execute_status execute_SRAW(const STATE_ACCESS a, uint64_t & return advance_to_next_insn(a, pc); } return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { - const int32_t rs1w = static_cast(rs1) >> (rs2 & 31); - return static_cast(rs1w); + return static_cast(static_cast(rs1) >> (rs2 & 0b11111)); }); } @@ -1612,9 +1668,6 @@ static FORCE_INLINE execute_status execute_MULW(const STATE_ACCESS a, uint64_t & /// \brief Implementation of the DIVW instruction. template static FORCE_INLINE execute_status execute_DIVW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - if (unlikely((insn & 0b11111110000000000111000001111111) != 0b00000010000000000100000000111011)) { - return raise_illegal_insn_exception(a, pc, insn); - } [[maybe_unused]] auto note = dump_insn(a, pc, insn, "divw"); if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); @@ -1652,9 +1705,6 @@ static FORCE_INLINE execute_status execute_DIVUW(const STATE_ACCESS a, uint64_t /// \brief Implementation of the REMW instruction. template static FORCE_INLINE execute_status execute_REMW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - if (unlikely((insn & 0b11111110000000000111000001111111) != 0b00000010000000000110000000111011)) { - return raise_illegal_insn_exception(a, pc, insn); - } [[maybe_unused]] auto note = dump_insn(a, pc, insn, "remw"); if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); @@ -1692,6 +1742,441 @@ static FORCE_INLINE execute_status execute_REMUW(const STATE_ACCESS a, uint64_t }); } +/// \brief Implementation of the ANDN instruction from Zbb extension. +/// \details AND with inverted operand +template +static FORCE_INLINE execute_status execute_ANDN(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "andn"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 & ~rs2; }); +} + +/// \brief Implementation of the ORN instruction from Zbb extension. +/// \details OR with inverted operand +template +static FORCE_INLINE execute_status execute_ORN(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "orn"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 | ~rs2; }); +} + +/// \brief Implementation of the XNOR instruction from Zbb extension. +/// \details Exclusive NOR +template +static FORCE_INLINE execute_status execute_XNOR(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "xnor"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return ~(rs1 ^ rs2); }); +} + +/// \brief Implementation of the CLZ instruction from Zbb extension. +/// \details Count leading zero bits +template +static FORCE_INLINE execute_status execute_CLZ(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "clz"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { + if (rs1 == 0) [[unlikely]] { + return XLEN; + } + return static_cast(__builtin_clzll(rs1)); + }); +} + +/// \brief Implementation of the CLZW instruction from Zbb extension. +/// \details Count leading zero bits in word +template +static FORCE_INLINE execute_status execute_CLZW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "clzw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { + const auto rs1w = static_cast(rs1); + if (rs1w == 0) [[unlikely]] { + return 32; + } + return static_cast(__builtin_clz(rs1w)); + }); +} + +/// \brief Implementation of the CTZ instruction from Zbb extension. +/// \details Count trailing zero bits +template +static FORCE_INLINE execute_status execute_CTZ(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "ctz"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { + if (rs1 == 0) [[unlikely]] { + return XLEN; + } + return static_cast(__builtin_ctzll(rs1)); + }); +} + +/// \brief Implementation of the CTZW instruction from Zbb extension. +/// \details Count trailing zero bits in word +template +static FORCE_INLINE execute_status execute_CTZW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "ctzw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { + const auto rs1w = static_cast(rs1); + if (rs1w == 0) [[unlikely]] { + return 32; + } + return static_cast(__builtin_ctz(rs1w)); + }); +} + +/// \brief Implementation of the CPOP instruction from Zbb extension. +/// \details Count set bits +template +static FORCE_INLINE execute_status execute_CPOP(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "cpop"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, + [](uint64_t rs1) -> uint64_t { return static_cast(__builtin_popcountll(rs1)); }); +} + +/// \brief Implementation of the CPOPW instruction from Zbb extension. +/// \details Count set bits in word +template +static FORCE_INLINE execute_status execute_CPOPW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "cpopw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, + [](uint64_t rs1) -> uint64_t { return static_cast(__builtin_popcount(static_cast(rs1))); }); +} + +/// \brief Implementation of the MAX instruction from Zbb extension. +/// \details Signed maximum +template +static FORCE_INLINE execute_status execute_MAX(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "max"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (static_cast(rs1) > static_cast(rs2)) { + return rs1; + } + return rs2; + }); +} + +/// \brief Implementation of the MAXU instruction from Zbb extension. +/// \details Unsigned maximum +template +static FORCE_INLINE execute_status execute_MAXU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "maxu"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (rs1 > rs2) { + return rs1; + } + return rs2; + }); +} + +/// \brief Implementation of the MIN instruction from Zbb extension. +/// \details Signed minimum +template +static FORCE_INLINE execute_status execute_MIN(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "min"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (static_cast(rs1) < static_cast(rs2)) { + return rs1; + } + return rs2; + }); +} + +/// \brief Implementation of the MINU instruction from Zbb extension. +/// \details Unsigned minimum +template +static FORCE_INLINE execute_status execute_MINU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "minu"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (rs1 < rs2) { + return rs1; + } + return rs2; + }); +} + +/// \brief Implementation of the SEXT.B instruction from Zbb extension. +/// \details Sign-extend byte +template +static FORCE_INLINE execute_status execute_SEXT_B(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sext.b"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, + [](uint64_t rs1) -> uint64_t { return static_cast(static_cast(rs1)); }); +} + +/// \brief Implementation of the SEXT.H instruction from Zbb extension. +/// \details Sign-extend halfword +template +static FORCE_INLINE execute_status execute_SEXT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sext.h"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, + [](uint64_t rs1) -> uint64_t { return static_cast(static_cast(rs1)); }); +} + +/// \brief Implementation of the ZEXT.H instruction from Zbb extension. +/// \details Zero-extend halfword +template +static FORCE_INLINE execute_status execute_ZEXT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "zext.h"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, + [](uint64_t rs1) -> uint64_t { return static_cast(static_cast(rs1)); }); +} + +/// \brief Implementation of the ROL instruction from Zbb extension. +/// \details Rotate left (Register) +template +static FORCE_INLINE execute_status execute_ROL(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "rol"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + const auto shamt = rs2 & (XLEN - 1); + if (shamt == 0) [[unlikely]] { + return rs1; + } + return (rs1 << shamt) | (rs1 >> (XLEN - shamt)); + }); +} + +/// \brief Implementation of the ROLW instruction from Zbb extension. +/// \details Rotate Left Word (Register) +template +static FORCE_INLINE execute_status execute_ROLW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "rolw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + const auto rs1w = static_cast(rs1); + const auto shamt = rs2 & 0b11111; + if (shamt == 0) [[unlikely]] { + return static_cast(static_cast(rs1w)); + } + return static_cast(static_cast((rs1w << shamt) | (rs1w >> (32 - shamt)))); + }); +} + +/// \brief Implementation of the ROR instruction from Zbb extension. +/// \details Rotate right (Register) +template +static FORCE_INLINE execute_status execute_ROR(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "ror"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + const auto shamt = rs2 & (XLEN - 1); + if (shamt == 0) [[unlikely]] { + return rs1; + } + return (rs1 >> shamt) | (rs1 << (XLEN - shamt)); + }); +} + +/// \brief Implementation of the RORI instruction from Zbb extension. +/// \details Rotate right (Immediate) +template +static FORCE_INLINE execute_status execute_RORI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "rori"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + const auto shamt = uimm & (XLEN - 1); + if (shamt == 0) [[unlikely]] { + return rs1; + } + return (rs1 >> shamt) | (rs1 << (XLEN - shamt)); + }); +} + +/// \brief Implementation of the RORIW instruction from Zbb extension. +/// \details Rotate right word (Immediate) +template +static FORCE_INLINE execute_status execute_RORIW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "roriw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + const auto rs1w = static_cast(rs1); + const auto shamt = uimm & 0b11111; + if (shamt == 0) [[unlikely]] { + return static_cast(static_cast(rs1w)); + } + return static_cast(static_cast((rs1w >> shamt) | (rs1w << (32 - shamt)))); + }); +} + +/// \brief Implementation of the RORW instruction from Zbb extension. +/// \details Rotate right Word (Register) +template +static FORCE_INLINE execute_status execute_RORW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "rorw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + const auto rs1w = static_cast(rs1); + const auto shamt = rs2 & 0b11111; + if (shamt == 0) [[unlikely]] { + return static_cast(static_cast(rs1w)); + } + return static_cast(static_cast((rs1w >> shamt) | (rs1w << (32 - shamt)))); + }); +} + +/// \brief Implementation of the ORC.B instruction from Zbb extension. +/// \details Bitwise OR-Combine, byte granule +template +static FORCE_INLINE execute_status execute_ORC_B(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "orc.b"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { + // Efficient implementation without branches or loops + // Compress each byte to single bit + uint64_t val = rs1; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + val &= UINT64_C(0x0101010101010101); + // Uncompress each byte + val |= val << 1; + val |= val << 2; + val |= val << 4; + return val; + }); +} + +/// \brief Implementation of the REV8 instruction from Zbb extension. +/// \details Byte-reverse register +template +static FORCE_INLINE execute_status execute_REV8(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "rev8"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_unary(a, pc, insn, [](uint64_t rs1) -> uint64_t { return __builtin_bswap64(rs1); }); +} + +/// \brief Implementation of the SH1ADD instruction from Zba extension. +/// \details Shift left by 1 and add. +template +static FORCE_INLINE execute_status execute_SH1ADD(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh1add"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return (rs1 << 1) + rs2; }); +} + +/// \brief Implementation of the SH1ADD.UW instruction from Zba extension. +/// \details Shift unsigned word left by 1 and add. +template +static FORCE_INLINE execute_status execute_SH1ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh1add.uw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + return (static_cast(static_cast(rs1)) << 1) + rs2; + }); +} + +/// \brief Implementation of the SH2ADD instruction from Zba extension. +/// \details Shift left by 2 and add. +template +static FORCE_INLINE execute_status execute_SH2ADD(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh2add"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return (rs1 << 2) + rs2; }); +} + +/// \brief Implementation of the SH2ADD.UW instruction from Zba extension. +/// \details Shift unsigned word left by 2 and add. +template +static FORCE_INLINE execute_status execute_SH2ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh2add.uw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + return (static_cast(static_cast(rs1)) << 2) + rs2; + }); +} + +/// \brief Implementation of the SH3ADD instruction from Zba extension. +/// \details Shift left by 2 and add. +template +static FORCE_INLINE execute_status execute_SH3ADD(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh3add"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return (rs1 << 3) + rs2; }); +} + +/// \brief Implementation of the SH3ADD.UW instruction from Zba extension. +/// \details Shift unsigned word left by 3 and add. +template +static FORCE_INLINE execute_status execute_SH3ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "sh3add.uw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + return (static_cast(static_cast(rs1)) << 3) + rs2; + }); +} + static inline uint64_t read_csr_fail(bool *status) { *status = false; return 0; @@ -2851,18 +3336,6 @@ static FORCE_INLINE execute_status execute_FENCE_I(const STATE_ACCESS a, uint64_ return advance_to_next_insn(a, pc); } -template -static FORCE_INLINE execute_status execute_arithmetic(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, const F &f) { - const uint32_t rd = insn_get_rd(insn); - // Ensure rs1 and rs2 are loaded in order: do not nest with call to f() as - // the order of evaluation of arguments in a function call is undefined. - const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); - const uint64_t rs2 = a.read_x(insn_get_rs2(insn)); - // Now we can safely invoke f() - a.write_x(rd, f(rs1, rs2)); - return advance_to_next_insn(a, pc); -} - /// \brief Implementation of the ADD instruction. template static FORCE_INLINE execute_status execute_ADD(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { @@ -2870,11 +3343,7 @@ static FORCE_INLINE execute_status execute_ADD(const STATE_ACCESS a, uint64_t &p if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { - uint64_t val = 0; - __builtin_add_overflow(rs1, rs2, &val); - return val; - }); + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 + rs2; }); } /// \brief Implementation of the SUB instruction. @@ -3102,16 +3571,6 @@ static FORCE_INLINE execute_status execute_REMU(const STATE_ACCESS a, uint64_t & }); } -template -static FORCE_INLINE execute_status execute_arithmetic_immediate(const STATE_ACCESS a, uint64_t &pc, uint32_t insn, - const F &f) { - const uint32_t rd = insn_get_rd(insn); - const uint64_t rs1 = a.read_x(insn_get_rs1(insn)); - const int32_t imm = insn_I_get_imm(insn); - a.write_x(rd, f(rs1, imm)); - return advance_to_next_insn(a, pc); -} - /// \brief Implementation of the SRLI instruction. template static FORCE_INLINE execute_status execute_SRLI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { @@ -3119,8 +3578,8 @@ static FORCE_INLINE execute_status execute_SRLI(const STATE_ACCESS a, uint64_t & if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, - [](uint64_t rs1, int32_t imm) -> uint64_t { return rs1 >> (imm & (XLEN - 1)); }); + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return rs1 >> (uimm & (XLEN - 1)); }); } /// \brief Implementation of the SRAI instruction. @@ -3130,8 +3589,8 @@ static FORCE_INLINE execute_status execute_SRAI(const STATE_ACCESS a, uint64_t & if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, [](uint64_t rs1, int32_t imm) -> uint64_t { - return static_cast(static_cast(rs1) >> (imm & (XLEN - 1))); + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + return static_cast(static_cast(rs1) >> (uimm & (XLEN - 1))); }); } @@ -3208,18 +3667,12 @@ static FORCE_INLINE execute_status execute_ANDI(const STATE_ACCESS a, uint64_t & /// \brief Implementation of the SLLI instruction. template static FORCE_INLINE execute_status execute_SLLI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - if (unlikely((insn & (0b111111 << 26)) != 0)) { - return raise_illegal_insn_exception(a, pc, insn); - } [[maybe_unused]] auto note = dump_insn(a, pc, insn, "slli"); if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, [](uint64_t rs1, int32_t imm) -> uint64_t { - // No need to mask lower 6 bits in imm because of the if condition a above - // We do it anyway here to prevent problems if this code is moved - return rs1 << (imm & 0b111111); - }); + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return rs1 << (uimm & (XLEN - 1)); }); } /// \brief Implementation of the ADDIW instruction. @@ -3239,18 +3692,24 @@ static FORCE_INLINE execute_status execute_ADDIW(const STATE_ACCESS a, uint64_t /// \brief Implementation of the SLLIW instruction. template static FORCE_INLINE execute_status execute_SLLIW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - if (unlikely(insn_get_funct7(insn) != 0)) { - return raise_illegal_insn_exception(a, pc, insn); - } [[maybe_unused]] auto note = dump_insn(a, pc, insn, "slliw"); if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, [](uint64_t rs1, int32_t imm) -> uint64_t { - // No need to mask lower 5 bits in imm because of the if condition a above - // We do it anyway here to prevent problems if this code is moved - const auto rs1w = static_cast(static_cast(rs1) << (imm & 0b11111)); - return static_cast(rs1w); + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + return static_cast(static_cast(static_cast(rs1) << (uimm & 0b11111))); + }); +} + +/// \brief Implementation of the SLLI.UW instruction. +template +static FORCE_INLINE execute_status execute_SLLI_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "slli.uw"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + return static_cast(static_cast(rs1)) << (uimm & (XLEN - 1)); }); } @@ -3261,11 +3720,8 @@ static FORCE_INLINE execute_status execute_SRLIW(const STATE_ACCESS a, uint64_t if constexpr (rd_kind == rd_kind::x0) { return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, [](uint64_t rs1, int32_t imm) -> uint64_t { - // No need to mask lower 5 bits in imm because of funct7 test in caller - // We do it anyway here to prevent problems if this code is moved - auto rs1w = static_cast(static_cast(rs1) >> (imm & 0b11111)); - return static_cast(rs1w); + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + return static_cast(static_cast(static_cast(rs1) >> (uimm & 0b11111))); }); } @@ -3283,9 +3739,193 @@ static FORCE_INLINE execute_status execute_SRAIW(const STATE_ACCESS a, uint64_t } return advance_to_next_insn(a, pc); } - return execute_arithmetic_immediate(a, pc, insn, [](uint64_t rs1, int32_t imm) -> uint64_t { - const int32_t rs1w = static_cast(rs1) >> (imm & 0b11111); - return static_cast(rs1w); + return execute_arithmetic_uimmediate(a, pc, insn, [](uint64_t rs1, uint32_t uimm) -> uint64_t { + return static_cast(static_cast(rs1) >> (uimm & 0b11111)); + }); +} + +/// \brief Implementation of the CLMUL instruction from Zbc extension. +/// \details Carry-less multiply (low-part) +template +static FORCE_INLINE execute_status execute_CLMUL(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "clmul"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + uint64_t val = 0; + for (uint32_t i = 0; i < XLEN; ++i) { + // Use a mask to make the algorithm branchless + const auto mask = -((rs2 >> i) & uint64_t{1}); + val ^= (rs1 << i) & mask; + } + return val; + }); +} + +/// \brief Implementation of the CLMULH instruction from Zbc extension. +/// \details Carry-less multiply (high-part) +template +static FORCE_INLINE execute_status execute_CLMULH(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "clmulh"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + uint64_t val = 0; + for (uint32_t i = 1; i < XLEN; ++i) { + // Use a mask to make the algorithm branchless + const auto mask = -((rs2 >> i) & uint64_t{1}); + val ^= (rs1 >> (XLEN - i)) & mask; + } + return val; + }); +} + +/// \brief Implementation of the CLMULR instruction from Zbc extension. +/// \details Carry-less multiply (reversed) +template +static FORCE_INLINE execute_status execute_CLMULR(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "clmulr"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + uint64_t val = 0; + for (uint32_t i = 0; i < XLEN; ++i) { + // Use a mask to make the algorithm branchless + const auto mask = -((rs2 >> i) & uint64_t{1}); + val ^= (rs1 >> (XLEN - i - 1)) & mask; + } + return val; + }); +} + +/// \brief Implementation of the BCLR instruction from Zbs extension. +/// \details Single-Bit Clear (Register) +template +static FORCE_INLINE execute_status execute_BCLR(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bclr"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, + [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 & ~(uint64_t{1} << (rs2 & (XLEN - 1))); }); +} + +/// \brief Implementation of the BCLRI instruction from Zbs extension. +/// \details Single-Bit Clear (Immediate) +template +static FORCE_INLINE execute_status execute_BCLRI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bclri"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return rs1 & ~(uint64_t{1} << (uimm & (XLEN - 1))); }); +} + +/// \brief Implementation of the BEXT instruction from Zbs extension. +/// \details Single-Bit Extract (Register) +template +static FORCE_INLINE execute_status execute_BEXT(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bext"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, + [](uint64_t rs1, uint64_t rs2) -> uint64_t { return (rs1 >> (rs2 & (XLEN - 1))) & uint64_t{1}; }); +} + +/// \brief Implementation of the BEXTI instruction from Zbs extension. +/// \details Single-Bit Extract (Immediate) +template +static FORCE_INLINE execute_status execute_BEXTI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bexti"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return (rs1 >> (uimm & (XLEN - 1))) & 1; }); +} + +/// \brief Implementation of the BINV instruction from Zbs extension. +/// \details Single-Bit Invert (Register) +template +static FORCE_INLINE execute_status execute_BINV(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "binv"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, + [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 ^ (uint64_t{1} << (rs2 & (XLEN - 1))); }); +} + +/// \brief Implementation of the BINVI instruction from Zbs extension. +/// \details Single-Bit Invert (Immediate) +template +static FORCE_INLINE execute_status execute_BINVI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "binvi"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return rs1 ^ (uint64_t{1} << (uimm & (XLEN - 1))); }); +} + +/// \brief Implementation of the BSET instruction from Zbs extension. +/// \details Single-Bit Set (Register) +template +static FORCE_INLINE execute_status execute_BSET(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bset"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, + [](uint64_t rs1, uint64_t rs2) -> uint64_t { return rs1 | (uint64_t{1} << (rs2 & (XLEN - 1))); }); +} + +/// \brief Implementation of the BSETI instruction from Zbs extension. +/// \details Single-Bit Set (Immediate) +template +static FORCE_INLINE execute_status execute_BSETI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "bseti"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic_uimmediate(a, pc, insn, + [](uint64_t rs1, uint32_t uimm) -> uint64_t { return rs1 | (uint64_t{1} << (uimm & (XLEN - 1))); }); +} + +/// \brief Implementation of the CZERO.EQZ instruction from Zicond extension. +/// \details Moves zero to a register rd, if the condition rs2 is equal to zero, otherwise moves rs1 to rd. +template +static FORCE_INLINE execute_status execute_CZERO_EQZ(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "czero.eqz"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (rs2 == 0) { + return 0; + } + return rs1; + }); +} + +/// \brief Implementation of the CZERO.NEZ instruction from Zicond extension. +/// \details Moves zero to a register rd, if the condition rs2 is nonzero, otherwise moves rs1 to rd. +template +static FORCE_INLINE execute_status execute_CZERO_NEZ(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "czero.nez"); + if constexpr (rd_kind == rd_kind::x0) { + return advance_to_next_insn(a, pc); + } + return execute_arithmetic(a, pc, insn, [](uint64_t rs1, uint64_t rs2) -> uint64_t { + if (rs2 != 0) { + return 0; + } + return rs1; }); } @@ -3566,28 +4206,83 @@ static FORCE_INLINE execute_status execute_SFENCE_VMA(const STATE_ACCESS a, uint } template -static FORCE_INLINE execute_status execute_SRLI_SRAI(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7_sr1 = static_cast(insn_get_funct7_sr1(insn)); - if (funct7_sr1 == insn_SRLI_SRAI_funct7_sr1::SRLI) { - return execute_SRLI(a, pc, insn); +static FORCE_INLINE execute_status execute_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI(const STATE_ACCESS a, + uint64_t &pc, uint32_t insn) { + const auto funct7_sr1 = + static_cast(insn_get_funct7_sr1(insn)); + switch (funct7_sr1) { + case insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1::SLLI: + return execute_SLLI(a, pc, insn); + case insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1::CLZ_CTZ_CPOP_SEXT_B_SEXT_H: { + const auto funct7_rs2 = static_cast(insn_get_funct7_rs2(insn)); + switch (funct7_rs2) { + case insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2::CLZ: + return execute_CLZ(a, pc, insn); + case insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2::CTZ: + return execute_CTZ(a, pc, insn); + case insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2::CPOP: + return execute_CPOP(a, pc, insn); + case insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2::SEXT_B: + return execute_SEXT_B(a, pc, insn); + case insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2::SEXT_H: + return execute_SEXT_H(a, pc, insn); + } + break; + } + case insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1::BCLRI: + return execute_BCLRI(a, pc, insn); + case insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1::BINVI: + return execute_BINVI(a, pc, insn); + case insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1::BSETI: + return execute_BSETI(a, pc, insn); } - if (funct7_sr1 == insn_SRLI_SRAI_funct7_sr1::SRAI) { - return execute_SRAI(a, pc, insn); + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI(const STATE_ACCESS a, uint64_t &pc, + uint32_t insn) { + const auto funct7_sr1 = static_cast(insn_get_funct7_sr1(insn)); + switch (funct7_sr1) { + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::SRLI: + return execute_SRLI(a, pc, insn); + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::SRAI: + return execute_SRAI(a, pc, insn); + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::RORI: + return execute_RORI(a, pc, insn); + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::ORC_B: { + const auto funct7_rs2 = static_cast(insn_get_funct7_rs2(insn)); + if (funct7_rs2 == insn_ORC_B_funct7_rs2::ORC_B) { + return execute_ORC_B(a, pc, insn); + } + break; + } + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::REV8: { + const auto funct7_rs2 = static_cast(insn_get_funct7_rs2(insn)); + if (funct7_rs2 == insn_REV8_funct7_rs2::REV8) { + return execute_REV8(a, pc, insn); + } + break; + } + case insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1::BEXTI: + return execute_BEXTI(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SRLIW_SRAIW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_SRLIW_SRAIW_RORIW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SRLIW_SRAIW_funct7::SRLIW) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SRLIW_SRAIW_RORIW_funct7::SRLIW) { return execute_SRLIW(a, pc, insn); } - if (funct7 == insn_SRLIW_SRAIW_funct7::SRAIW) { + if (funct7 == insn_SRLIW_SRAIW_RORIW_funct7::SRAIW) { return execute_SRAIW(a, pc, insn); } + if (funct7 == insn_SRLIW_SRAIW_RORIW_funct7::RORIW) { + return execute_RORIW(a, pc, insn); + } return raise_illegal_insn_exception(a, pc, insn); } @@ -3616,9 +4311,8 @@ static FORCE_INLINE execute_status execute_AMO_W(const STATE_ACCESS a, uint64_t return execute_AMOMINU_W(a, pc, mcycle, insn); case insn_AMO_funct7_sr2::AMOMAXU: return execute_AMOMAXU_W(a, pc, mcycle, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -3646,149 +4340,266 @@ static FORCE_INLINE execute_status execute_AMO_D(const STATE_ACCESS a, uint64_t return execute_AMOMINU_D(a, pc, mcycle, insn); case insn_AMO_funct7_sr2::AMOMAXU: return execute_AMOMAXU_D(a, pc, mcycle, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_ADD_MUL_SUB(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_ADD_SUB_MUL(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_ADD_MUL_SUB_funct7::ADD) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_ADD_SUB_MUL_funct7::ADD) { return execute_ADD(a, pc, insn); } - if (funct7 == insn_ADD_MUL_SUB_funct7::MUL) { + if (funct7 == insn_ADD_SUB_MUL_funct7::MUL) { return execute_MUL(a, pc, insn); } - if (funct7 == insn_ADD_MUL_SUB_funct7::SUB) { + if (funct7 == insn_ADD_SUB_MUL_funct7::SUB) { return execute_SUB(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SLL_MULH(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SLL_MULH_funct7::SLL) { - return execute_SLL(a, pc, insn); - } - if (funct7 == insn_SLL_MULH_funct7::MULH) { - return execute_MULH(a, pc, insn); +static FORCE_INLINE execute_status execute_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET(const STATE_ACCESS a, uint64_t &pc, + uint32_t insn) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + switch (funct7) { + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::SLL: + return execute_SLL(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::MULH: + return execute_MULH(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::ROL: + return execute_ROL(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::CLMUL: + return execute_CLMUL(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::BCLR: + return execute_BCLR(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::BINV: + return execute_BINV(a, pc, insn); + case insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7::BSET: + return execute_BSET(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SLT_MULHSU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_SLT_MULHSU_SH1ADD_CLMULR(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SLT_MULHSU_funct7::SLT) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SLT_MULHSU_SH1ADD_CLMULR_funct7::SLT) { return execute_SLT(a, pc, insn); } - if (funct7 == insn_SLT_MULHSU_funct7::MULHSU) { + if (funct7 == insn_SLT_MULHSU_SH1ADD_CLMULR_funct7::MULHSU) { return execute_MULHSU(a, pc, insn); } + if (funct7 == insn_SLT_MULHSU_SH1ADD_CLMULR_funct7::SH1ADD) { + return execute_SH1ADD(a, pc, insn); + } + if (funct7 == insn_SLT_MULHSU_SH1ADD_CLMULR_funct7::CLMULR) { + return execute_CLMULR(a, pc, insn); + } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SLTU_MULHU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_SLTU_MULHU_CLMULH(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SLTU_MULHU_funct7::SLTU) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SLTU_MULHU_CLMULH_funct7::SLTU) { return execute_SLTU(a, pc, insn); } - if (funct7 == insn_SLTU_MULHU_funct7::MULHU) { + if (funct7 == insn_SLTU_MULHU_CLMULH_funct7::MULHU) { return execute_MULHU(a, pc, insn); } + if (funct7 == insn_SLTU_MULHU_CLMULH_funct7::CLMULH) { + return execute_CLMULH(a, pc, insn); + } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_XOR_DIV(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_XOR_DIV_funct7::XOR) { - return execute_XOR(a, pc, insn); +static FORCE_INLINE execute_status execute_XOR_DIV_SH2ADD_XNOR_MIN(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + switch (funct7) { + case insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7::XOR: + return execute_XOR(a, pc, insn); + case insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7::DIV: + return execute_DIV(a, pc, insn); + case insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7::SH2ADD: + return execute_SH2ADD(a, pc, insn); + case insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7::XNOR: + return execute_XNOR(a, pc, insn); + case insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7::MIN: + return execute_MIN(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ(const STATE_ACCESS a, uint64_t &pc, + uint32_t insn) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + switch (funct7) { + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::SRL: + return execute_SRL(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::SRA: + return execute_SRA(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::DIVU: + return execute_DIVU(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::MINU: + return execute_MINU(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::ROR: + return execute_ROR(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::BEXT: + return execute_BEXT(a, pc, insn); + case insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7::CZERO_EQZ: + return execute_CZERO_EQZ(a, pc, insn); } - if (funct7 == insn_XOR_DIV_funct7::DIV) { - return execute_DIV(a, pc, insn); + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_OR_REM_SH3ADD_ORN_MAX(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + switch (funct7) { + case insn_OR_REM_SH3ADD_ORN_MAX_funct7::OR: + return execute_OR(a, pc, insn); + case insn_OR_REM_SH3ADD_ORN_MAX_funct7::REM: + return execute_REM(a, pc, insn); + case insn_OR_REM_SH3ADD_ORN_MAX_funct7::SH3ADD: + return execute_SH3ADD(a, pc, insn); + case insn_OR_REM_SH3ADD_ORN_MAX_funct7::ORN: + return execute_ORN(a, pc, insn); + case insn_OR_REM_SH3ADD_ORN_MAX_funct7::MAX: + return execute_MAX(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_AND_REMU_ANDN_MAXU_CZERO_NEZ(const STATE_ACCESS a, uint64_t &pc, + uint32_t insn) { + const auto funct7 = static_cast(insn_get_funct7(insn)); + switch (funct7) { + case insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7::AND: + return execute_AND(a, pc, insn); + case insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7::REMU: + return execute_REMU(a, pc, insn); + case insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7::ANDN: + return execute_ANDN(a, pc, insn); + case insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7::MAXU: + return execute_MAXU(a, pc, insn); + case insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7::CZERO_NEZ: + return execute_CZERO_NEZ(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SRL_DIVU_SRA(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_ADDW_SUBW_MULW_ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SRL_DIVU_SRA_funct7::SRL) { - return execute_SRL(a, pc, insn); + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_ADDW_SUBW_MULW_ADD_UW_funct7::ADDW) { + return execute_ADDW(a, pc, insn); + } + if (funct7 == insn_ADDW_SUBW_MULW_ADD_UW_funct7::MULW) { + return execute_MULW(a, pc, insn); } - if (funct7 == insn_SRL_DIVU_SRA_funct7::SRA) { - return execute_SRA(a, pc, insn); + if (funct7 == insn_ADDW_SUBW_MULW_ADD_UW_funct7::SUBW) { + return execute_SUBW(a, pc, insn); } - if (funct7 == insn_SRL_DIVU_SRA_funct7::DIVU) { - return execute_DIVU(a, pc, insn); + if (funct7 == insn_ADDW_SUBW_MULW_ADD_UW_funct7::ADD_UW) { + return execute_ADD_UW(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_OR_REM(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_SLLW_ROLW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_OR_REM_funct7::OR) { - return execute_OR(a, pc, insn); + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SLLW_ROLW_funct7::SLLW) { + return execute_SLLW(a, pc, insn); } - if (funct7 == insn_OR_REM_funct7::REM) { - return execute_REM(a, pc, insn); + if (funct7 == insn_SLLW_ROLW_funct7::ROLW) { + return execute_ROLW(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_AND_REMU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_SRLW_SRAW_DIVUW_RORW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_AND_REMU_funct7::AND) { - return execute_AND(a, pc, insn); + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SRLW_SRAW_DIVUW_RORW_funct7::SRLW) { + return execute_SRLW(a, pc, insn); + } + if (funct7 == insn_SRLW_SRAW_DIVUW_RORW_funct7::SRAW) { + return execute_SRAW(a, pc, insn); + } + if (funct7 == insn_SRLW_SRAW_DIVUW_RORW_funct7::DIVUW) { + return execute_DIVUW(a, pc, insn); } - if (funct7 == insn_AND_REMU_funct7::REMU) { - return execute_REMU(a, pc, insn); + if (funct7 == insn_SRLW_SRAW_DIVUW_RORW_funct7::RORW) { + return execute_RORW(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_ADDW_MULW_SUBW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_DIVW_SH2ADD_UW_ZEXT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_ADDW_MULW_SUBW_funct7::ADDW) { - return execute_ADDW(a, pc, insn); + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_DIVW_SH2ADD_UW_ZEXT_H_funct7::DIVW) { + return execute_DIVW(a, pc, insn); } - if (funct7 == insn_ADDW_MULW_SUBW_funct7::MULW) { - return execute_MULW(a, pc, insn); + if (funct7 == insn_DIVW_SH2ADD_UW_ZEXT_H_funct7::SH2ADD_UW) { + return execute_SH2ADD_UW(a, pc, insn); } - if (funct7 == insn_ADDW_MULW_SUBW_funct7::SUBW) { - return execute_SUBW(a, pc, insn); + if (funct7 == insn_DIVW_SH2ADD_UW_ZEXT_H_funct7::ZEXT_H) { + return execute_ZEXT_H(a, pc, insn); } return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_SRLW_DIVUW_SRAW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_REMW_SH3ADD_UW(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // Use ifs instead of a switch to produce fewer branches for the most frequent instructions - const auto funct7 = static_cast(insn_get_funct7(insn)); - if (funct7 == insn_SRLW_DIVUW_SRAW_funct7::SRLW) { - return execute_SRLW(a, pc, insn); + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_REMW_SH3ADD_UW_funct7::REMW) { + return execute_REMW(a, pc, insn); } - if (funct7 == insn_SRLW_DIVUW_SRAW_funct7::DIVUW) { - return execute_DIVUW(a, pc, insn); + if (funct7 == insn_REMW_SH3ADD_UW_funct7::SH3ADD_UW) { + return execute_SH3ADD_UW(a, pc, insn); } - if (funct7 == insn_SRLW_DIVUW_SRAW_funct7::SRAW) { - return execute_SRAW(a, pc, insn); + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_SLLIW_SLLI_UW_CLZW_CTZW_CPOPW(const STATE_ACCESS a, uint64_t &pc, + uint32_t insn) { + // Use ifs instead of a switch to produce fewer branches for the most frequent instructions + const auto funct7 = static_cast(insn_get_funct7(insn)); + if (funct7 == insn_SLLIW_CLZW_CTZW_CPOPW_funct7::SLLIW) { + return execute_SLLIW(a, pc, insn); + } + if (funct7 == insn_SLLIW_CLZW_CTZW_CPOPW_funct7::CLZW_CTZW_CPOPW) { + const auto rs2 = static_cast(insn_get_rs2(insn)); + if (rs2 == insn_CLZW_CTZW_CPOPW_rs2::CLZW) { + return execute_CLZW(a, pc, insn); + } + if (rs2 == insn_CLZW_CTZW_CPOPW_rs2::CTZW) { + return execute_CTZW(a, pc, insn); + } + if (rs2 == insn_CLZW_CTZW_CPOPW_rs2::CPOPW) { + return execute_CPOPW(a, pc, insn); + } + } else { + const auto funct7_sr1 = static_cast(insn_get_funct7_sr1(insn)); + if (funct7_sr1 == insn_SLLI_UW_funct7_sr1::SLLI_UW) { + return execute_SLLI_UW(a, pc, insn); + } } return raise_illegal_insn_exception(a, pc, insn); } @@ -3834,7 +4645,7 @@ static inline uint64_t float_box(T val) { template static inline T float_unbox(uint64_t val) { constexpr uint64_t TLEN = sizeof(T) * 8; - static_assert(TLEN == 32 || TLEN == 64, "unsupported soft float length"); + static_assert(TLEN == 16 || TLEN == 32 || TLEN == 64, "unsupported soft float length"); if constexpr (TLEN < FLEN) { // Floating-point operations on narrower n-bit operations (n < FLEN), // must check if the input operands are correctly NaN-boxed, i.e., all upper FLEN−n bits are 1. @@ -3843,7 +4654,9 @@ static inline T float_unbox(uint64_t val) { if ((val >> TLEN) != (UINT64_C(-1) >> TLEN)) { // The canonical NaN has a positive sign and all significant bits clear except the MSB, // a.k.a. the quiet bit. - if constexpr (TLEN == 32) { + if constexpr (TLEN == 16) { + return i_sfloat16::F_QNAN; + } else if constexpr (TLEN == 32) { return i_sfloat32::F_QNAN; } else if constexpr (TLEN == 64) { return i_sfloat64::F_QNAN; @@ -3938,6 +4751,16 @@ static FORCE_INLINE execute_status execute_FS(const STATE_ACCESS a, uint64_t &pc return advance_to_next_insn(a, pc); } +template +static FORCE_INLINE execute_status execute_FSH(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsh"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } + return execute_FS(a, pc, mcycle, insn); +} + template static FORCE_INLINE execute_status execute_FSW(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsw"); @@ -3974,6 +4797,16 @@ static FORCE_INLINE execute_status execute_FL(const STATE_ACCESS a, uint64_t &pc return advance_to_next_insn(a, pc); } +template +static FORCE_INLINE execute_status execute_FLH(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "flh"); + // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. + if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { + return raise_illegal_insn_exception(a, pc, insn); + } + return execute_FL(a, pc, mcycle, insn); +} + template static FORCE_INLINE execute_status execute_FLW(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "flw"); @@ -3994,6 +4827,15 @@ static FORCE_INLINE execute_status execute_FLD(const STATE_ACCESS a, uint64_t &p return execute_FL(a, pc, mcycle, insn); } +template +static FORCE_INLINE execute_status execute_FMADD_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmadd.h"); + return execute_float_ternary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint16_t s3, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::fma(s1, s2, s3, static_cast(rm), fflags); + }); +} + template static FORCE_INLINE execute_status execute_FMADD_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmadd.s"); @@ -4019,13 +4861,23 @@ static FORCE_INLINE execute_status execute_FMADD(const STATE_ACCESS a, uint64_t return raise_illegal_insn_exception(a, pc, insn); } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { + case insn_FM_funct2_0000000000000000000000000::H: + return execute_FMADD_H(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::S: return execute_FMADD_S(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::D: return execute_FMADD_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FMSUB_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmsub.h"); + return execute_float_ternary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint16_t s3, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::fma(s1, s2, s3 ^ i_sfloat16::SIGN_MASK, static_cast(rm), fflags); + }); } template @@ -4053,13 +4905,24 @@ static FORCE_INLINE execute_status execute_FMSUB(const STATE_ACCESS a, uint64_t return raise_illegal_insn_exception(a, pc, insn); } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { + case insn_FM_funct2_0000000000000000000000000::H: + return execute_FMSUB_H(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::S: return execute_FMSUB_S(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::D: return execute_FMSUB_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FNMADD_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fnmadd.h"); + return execute_float_ternary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint16_t s3, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::fma(s1 ^ i_sfloat16::SIGN_MASK, s2, s3 ^ i_sfloat16::SIGN_MASK, + static_cast(rm), fflags); + }); } template @@ -4089,13 +4952,23 @@ static FORCE_INLINE execute_status execute_FNMADD(const STATE_ACCESS a, uint64_t return raise_illegal_insn_exception(a, pc, insn); } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { + case insn_FM_funct2_0000000000000000000000000::H: + return execute_FNMADD_H(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::S: return execute_FNMADD_S(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::D: return execute_FNMADD_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FNMSUB_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fnmsub.h"); + return execute_float_ternary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint16_t s3, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::fma(s1 ^ i_sfloat16::SIGN_MASK, s2, s3, static_cast(rm), fflags); + }); } template @@ -4123,13 +4996,23 @@ static FORCE_INLINE execute_status execute_FNMSUB(const STATE_ACCESS a, uint64_t return raise_illegal_insn_exception(a, pc, insn); } switch (static_cast(insn_get_funct2_0000000000000000000000000(insn))) { + case insn_FM_funct2_0000000000000000000000000::H: + return execute_FNMSUB_H(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::S: return execute_FNMSUB_S(a, pc, insn); case insn_FM_funct2_0000000000000000000000000::D: return execute_FNMSUB_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FADD_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fadd.h"); + return execute_float_binary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::add(s1, s2, static_cast(rm), fflags); + }); } template @@ -4150,6 +5033,15 @@ static FORCE_INLINE execute_status execute_FADD_D(const STATE_ACCESS a, uint64_t }); } +template +static FORCE_INLINE execute_status execute_FSUB_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsub.h"); + return execute_float_binary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::add(s1, s2 ^ i_sfloat16::SIGN_MASK, static_cast(rm), fflags); + }); +} + template static FORCE_INLINE execute_status execute_FSUB_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsub.s"); @@ -4168,6 +5060,15 @@ static FORCE_INLINE execute_status execute_FSUB_D(const STATE_ACCESS a, uint64_t }); } +template +static FORCE_INLINE execute_status execute_FMUL_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmul.h"); + return execute_float_binary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::mul(s1, s2, static_cast(rm), fflags); + }); +} + template static FORCE_INLINE execute_status execute_FMUL_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmul.s"); @@ -4185,6 +5086,14 @@ static FORCE_INLINE execute_status execute_FMUL_D(const STATE_ACCESS a, uint64_t return i_sfloat64::mul(s1, s2, static_cast(rm), fflags); }); } +template +static FORCE_INLINE execute_status execute_FDIV_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fdiv.h"); + return execute_float_binary_op_rm(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::div(s1, s2, static_cast(rm), fflags); + }); +} template static FORCE_INLINE execute_status execute_FDIV_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { @@ -4249,6 +5158,46 @@ static FORCE_INLINE execute_status execute_float_cmp_op(const STATE_ACCESS a, ui return advance_to_next_insn(a, pc); } +template +static FORCE_INLINE execute_status execute_FSGNJ_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsgnj.h"); + return execute_float_binary_op(a, pc, insn, + [](uint16_t s1, uint16_t s2, const uint32_t * /*fflags*/) -> uint16_t { + return (s1 & ~i_sfloat16::SIGN_MASK) | (s2 & i_sfloat16::SIGN_MASK); + }); +} + +template +static FORCE_INLINE execute_status execute_FSGNJN_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsgnjn.h"); + return execute_float_binary_op(a, pc, insn, + [](uint16_t s1, uint16_t s2, const uint32_t * /*fflags*/) -> uint16_t { + return (s1 & ~i_sfloat16::SIGN_MASK) | ((s2 & i_sfloat16::SIGN_MASK) ^ i_sfloat16::SIGN_MASK); + }); +} + +template +static FORCE_INLINE execute_status execute_FSGNJX_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsgnjx.h"); + return execute_float_binary_op(a, pc, insn, + [](uint16_t s1, uint16_t s2, const uint32_t * /*fflags*/) -> uint16_t { + return s1 ^ (s2 & i_sfloat16::SIGN_MASK); + }); +} + +template +static FORCE_INLINE execute_status execute_FSGN_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + switch (static_cast(insn_get_funct3_000000000000(insn))) { + case insn_FSGN_funct3_000000000000::J: + return execute_FSGNJ_H(a, pc, insn); + case insn_FSGN_funct3_000000000000::JN: + return execute_FSGNJN_H(a, pc, insn); + case insn_FSGN_funct3_000000000000::JX: + return execute_FSGNJX_H(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); +} + template static FORCE_INLINE execute_status execute_FSGNJ_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsgnj.s"); @@ -4285,9 +5234,8 @@ static FORCE_INLINE execute_status execute_FSGN_S(const STATE_ACCESS a, uint64_t return execute_FSGNJN_S(a, pc, insn); case insn_FSGN_funct3_000000000000::JX: return execute_FSGNJX_S(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4326,9 +5274,33 @@ static FORCE_INLINE execute_status execute_FSGN_D(const STATE_ACCESS a, uint64_t return execute_FSGNJN_D(a, pc, insn); case insn_FSGN_funct3_000000000000::JX: return execute_FSGNJX_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FMIN_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmin.h"); + return execute_float_binary_op(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t *fflags) -> uint16_t { return i_sfloat16::min(s1, s2, fflags); }); +} + +template +static FORCE_INLINE execute_status execute_FMAX_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmax.h"); + return execute_float_binary_op(a, pc, insn, + [](uint16_t s1, uint16_t s2, uint32_t *fflags) -> uint16_t { return i_sfloat16::max(s1, s2, fflags); }); +} + +template +static FORCE_INLINE execute_status execute_FMINMAX_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + switch (static_cast(insn_get_funct3_000000000000(insn))) { + case insn_FMIN_FMAX_funct3_000000000000::FMIN: + return execute_FMIN_H(a, pc, insn); + case insn_FMIN_FMAX_funct3_000000000000::FMAX: + return execute_FMAX_H(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4348,13 +5320,12 @@ static FORCE_INLINE execute_status execute_FMAX_S(const STATE_ACCESS a, uint64_t template static FORCE_INLINE execute_status execute_FMINMAX_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { switch (static_cast(insn_get_funct3_000000000000(insn))) { - case insn_FMIN_FMAX_funct3_000000000000::MIN: + case insn_FMIN_FMAX_funct3_000000000000::FMIN: return execute_FMIN_S(a, pc, insn); - case insn_FMIN_FMAX_funct3_000000000000::MAX: + case insn_FMIN_FMAX_funct3_000000000000::FMAX: return execute_FMAX_S(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4374,13 +5345,12 @@ static FORCE_INLINE execute_status execute_FMAX_D(const STATE_ACCESS a, uint64_t template static FORCE_INLINE execute_status execute_FMINMAX_D(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { switch (static_cast(insn_get_funct3_000000000000(insn))) { - case insn_FMIN_FMAX_funct3_000000000000::MIN: + case insn_FMIN_FMAX_funct3_000000000000::FMIN: return execute_FMIN_D(a, pc, insn); - case insn_FMIN_FMAX_funct3_000000000000::MAX: + case insn_FMIN_FMAX_funct3_000000000000::FMAX: return execute_FMAX_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4444,12 +5414,48 @@ static FORCE_INLINE execute_status execute_FCVT_F_X(const STATE_ACCESS a, uint64 return advance_to_next_insn(a, pc); } +template +static FORCE_INLINE execute_status execute_FCVT_H_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.s"); + return execute_FCVT_F_F(a, pc, insn, + [](uint32_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_S_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.s.h"); + return execute_FCVT_F_F(a, pc, insn, + [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint32_t { + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_H_D(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.d"); + return execute_FCVT_F_F(a, pc, insn, + [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_D_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.d.h"); + return execute_FCVT_F_F(a, pc, insn, + [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); + }); +} + template static FORCE_INLINE execute_status execute_FCVT_S_D(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.s.d"); return execute_FCVT_F_F(a, pc, insn, [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint32_t { - return sfloat_cvt_f64_f32(s1, static_cast(rm), fflags); + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); }); } @@ -4457,12 +5463,19 @@ template static FORCE_INLINE execute_status execute_FCVT_D_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.d.s"); return execute_FCVT_F_F(a, pc, insn, - [](uint32_t s1, uint32_t /*rm*/, uint32_t *fflags) -> uint64_t { - // FCVT.D.S will never round, since it's a widen operation. - return sfloat_cvt_f32_f64(s1, fflags); + [](uint32_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + return sfloat_cvt_f_f(s1, static_cast(rm), fflags); }); } +template +static FORCE_INLINE execute_status execute_FSQRT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsqrt.h"); + return execute_float_unary_op_rm(a, pc, insn, [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::sqrt(s1, static_cast(rm), fflags); + }); +} + template static FORCE_INLINE execute_status execute_FSQRT_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fsqrt.s"); @@ -4479,6 +5492,43 @@ static FORCE_INLINE execute_status execute_FSQRT_D(const STATE_ACCESS a, uint64_ }); } +template +static FORCE_INLINE execute_status execute_FLE_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fle.h"); + return execute_float_cmp_op(a, pc, insn, [](uint16_t s1, uint16_t s2, uint32_t *fflags) -> uint64_t { + return static_cast(i_sfloat16::le(s1, s2, fflags)); + }); +} + +template +static FORCE_INLINE execute_status execute_FLT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "flt.h"); + return execute_float_cmp_op(a, pc, insn, [](uint16_t s1, uint16_t s2, uint32_t *fflags) -> uint64_t { + return static_cast(i_sfloat16::lt(s1, s2, fflags)); + }); +} + +template +static FORCE_INLINE execute_status execute_FEQ_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "feq.h"); + return execute_float_cmp_op(a, pc, insn, [](uint16_t s1, uint16_t s2, uint32_t *fflags) -> uint64_t { + return static_cast(i_sfloat16::eq(s1, s2, fflags)); + }); +} + +template +static FORCE_INLINE execute_status execute_FCMP_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + switch (static_cast(insn_get_funct3_000000000000(insn))) { + case insn_FCMP_funct3_000000000000::LT: + return execute_FLT_H(a, pc, insn); + case insn_FCMP_funct3_000000000000::LE: + return execute_FLE_H(a, pc, insn); + case insn_FCMP_funct3_000000000000::EQ: + return execute_FEQ_H(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); +} + template static FORCE_INLINE execute_status execute_FLE_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fle.s"); @@ -4512,9 +5562,8 @@ static FORCE_INLINE execute_status execute_FCMP_S(const STATE_ACCESS a, uint64_t return execute_FLE_S(a, pc, insn); case insn_FCMP_funct3_000000000000::EQ: return execute_FEQ_S(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4550,9 +5599,77 @@ static FORCE_INLINE execute_status execute_FCMP_D(const STATE_ACCESS a, uint64_t return execute_FLE_D(a, pc, insn); case insn_FCMP_funct3_000000000000::EQ: return execute_FEQ_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); +} + +template +static FORCE_INLINE execute_status execute_FCVT_H_W(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.w"); + return execute_FCVT_F_X(a, pc, insn, [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::cvt_i_f(static_cast(s1), static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_H_WU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.wu"); + return execute_FCVT_F_X(a, pc, insn, [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::cvt_i_f(static_cast(s1), static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_H_L(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.l"); + return execute_FCVT_F_X(a, pc, insn, [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::cvt_i_f(static_cast(s1), static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_H_LU(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.h.lu"); + return execute_FCVT_F_X(a, pc, insn, [](uint64_t s1, uint32_t rm, uint32_t *fflags) -> uint16_t { + return i_sfloat16::cvt_i_f(s1, static_cast(rm), fflags); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_W_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.w.h"); + return execute_FCVT_X_F(a, pc, insn, [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + const auto val = i_sfloat16::cvt_f_i(s1, static_cast(rm), fflags); + // For XLEN > 32, FCVT.W.H sign-extends the 32-bit result. + return static_cast(static_cast(val)); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_WU_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.wu.h"); + return execute_FCVT_X_F(a, pc, insn, [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + const auto val = i_sfloat16::cvt_f_i(s1, static_cast(rm), fflags); + // For XLEN > 32, FCVT.WU.H sign-extends the 32-bit result. + return static_cast(static_cast(static_cast(val))); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_L_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.l.h"); + return execute_FCVT_X_F(a, pc, insn, [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + const auto val = i_sfloat16::cvt_f_i(s1, static_cast(rm), fflags); + return static_cast(val); + }); +} + +template +static FORCE_INLINE execute_status execute_FCVT_LU_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fcvt.lu.h"); + return execute_FCVT_X_F(a, pc, insn, [](uint16_t s1, uint32_t rm, uint32_t *fflags) -> uint64_t { + return i_sfloat16::cvt_f_i(s1, static_cast(rm), fflags); + }); } template @@ -4706,6 +5823,12 @@ static FORCE_INLINE execute_status execute_FMV_F_X(const STATE_ACCESS a, uint64_ return advance_to_next_insn(a, pc); } +template +static FORCE_INLINE execute_status execute_FMV_H_X(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmv.h.x"); + return execute_FMV_F_X(a, pc, insn); +} + template static FORCE_INLINE execute_status execute_FMV_W_X(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmv.w.x"); @@ -4718,6 +5841,38 @@ static FORCE_INLINE execute_status execute_FMV_D_X(const STATE_ACCESS a, uint64_ return execute_FMV_F_X(a, pc, insn); } +template +static FORCE_INLINE execute_status execute_FCLASS_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fclass.h"); + return execute_FCLASS(a, pc, insn, [](uint16_t s1) -> uint64_t { return i_sfloat16::fclass(s1); }); +} + +template +static FORCE_INLINE execute_status execute_FMV_X_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fmv.x.h"); + const uint32_t rd = insn_get_rd(insn); + if (unlikely(rd == 0)) { + return advance_to_next_insn(a, pc); + } + const auto val = static_cast(a.read_f(insn_get_rs1(insn))); + // For RV64, the higher 48 bits of the destination register are + // filled with copies of the floating-point number’s sign bit. + // We can perform this with a sign extension. + a.write_x(rd, static_cast(static_cast(static_cast(val)))); + return advance_to_next_insn(a, pc); +} + +template +static FORCE_INLINE execute_status execute_FMV_FCLASS_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + switch (static_cast(insn_get_funct3_000000000000(insn))) { + case insn_FMV_FCLASS_funct3_000000000000::FMV: + return execute_FMV_X_H(a, pc, insn); + case insn_FMV_FCLASS_funct3_000000000000::FCLASS: + return execute_FCLASS_H(a, pc, insn); + } + return raise_illegal_insn_exception(a, pc, insn); +} + template static FORCE_INLINE execute_status execute_FCLASS_S(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { [[maybe_unused]] auto note = dump_insn(a, pc, insn, "fclass.s"); @@ -4746,9 +5901,8 @@ static FORCE_INLINE execute_status execute_FMV_FCLASS_S(const STATE_ACCESS a, ui return execute_FMV_X_W(a, pc, insn); case insn_FMV_FCLASS_funct3_000000000000::FCLASS: return execute_FCLASS_S(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template @@ -4776,110 +5930,152 @@ static FORCE_INLINE execute_status execute_FMV_FCLASS_D(const STATE_ACCESS a, ui return execute_FMV_X_D(a, pc, insn); case insn_FMV_FCLASS_funct3_000000000000::FCLASS: return execute_FCLASS_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template static FORCE_INLINE execute_status execute_FCVT_FMV_FCLASS(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { - switch (static_cast(insn_get_funct7_rs2(insn))) { - case insn_FD_funct7_rs2::FCVT_W_S: + switch (static_cast(insn_get_funct7_rs2(insn))) { + case insn_FDZfh_funct7_rs2::FCVT_H_S: + return execute_FCVT_H_S(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_S_H: + return execute_FCVT_S_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_H_D: + return execute_FCVT_H_D(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_D_H: + return execute_FCVT_D_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_S_D: + return execute_FCVT_S_D(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_D_S: + return execute_FCVT_D_S(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_W_H: + return execute_FCVT_W_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_WU_H: + return execute_FCVT_WU_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_L_H: + return execute_FCVT_L_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_LU_H: + return execute_FCVT_LU_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_W_S: return execute_FCVT_W_S(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_WU_S: + case insn_FDZfh_funct7_rs2::FCVT_WU_S: return execute_FCVT_WU_S(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_L_S: + case insn_FDZfh_funct7_rs2::FCVT_L_S: return execute_FCVT_L_S(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_LU_S: + case insn_FDZfh_funct7_rs2::FCVT_LU_S: return execute_FCVT_LU_S(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_W_D: + case insn_FDZfh_funct7_rs2::FCVT_W_D: return execute_FCVT_W_D(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_WU_D: + case insn_FDZfh_funct7_rs2::FCVT_WU_D: return execute_FCVT_WU_D(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_L_D: + case insn_FDZfh_funct7_rs2::FCVT_L_D: return execute_FCVT_L_D(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_LU_D: + case insn_FDZfh_funct7_rs2::FCVT_LU_D: return execute_FCVT_LU_D(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_S_D: - return execute_FCVT_S_D(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_S_W: + case insn_FDZfh_funct7_rs2::FCVT_H_W: + return execute_FCVT_H_W(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_H_WU: + return execute_FCVT_H_WU(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_H_L: + return execute_FCVT_H_L(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_H_LU: + return execute_FCVT_H_LU(a, pc, insn); + case insn_FDZfh_funct7_rs2::FCVT_S_W: return execute_FCVT_S_W(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_S_WU: + case insn_FDZfh_funct7_rs2::FCVT_S_WU: return execute_FCVT_S_WU(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_S_L: + case insn_FDZfh_funct7_rs2::FCVT_S_L: return execute_FCVT_S_L(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_S_LU: + case insn_FDZfh_funct7_rs2::FCVT_S_LU: return execute_FCVT_S_LU(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_D_S: - return execute_FCVT_D_S(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_D_W: + case insn_FDZfh_funct7_rs2::FCVT_D_W: return execute_FCVT_D_W(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_D_WU: + case insn_FDZfh_funct7_rs2::FCVT_D_WU: return execute_FCVT_D_WU(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_D_L: + case insn_FDZfh_funct7_rs2::FCVT_D_L: return execute_FCVT_D_L(a, pc, insn); - case insn_FD_funct7_rs2::FCVT_D_LU: + case insn_FDZfh_funct7_rs2::FCVT_D_LU: return execute_FCVT_D_LU(a, pc, insn); - case insn_FD_funct7_rs2::FMV_W_X: + case insn_FDZfh_funct7_rs2::FMV_H_X: + return execute_FMV_H_X(a, pc, insn); + case insn_FDZfh_funct7_rs2::FMV_W_X: return execute_FMV_W_X(a, pc, insn); - case insn_FD_funct7_rs2::FMV_D_X: + case insn_FDZfh_funct7_rs2::FMV_D_X: return execute_FMV_D_X(a, pc, insn); - case insn_FD_funct7_rs2::FMV_FCLASS_S: + case insn_FDZfh_funct7_rs2::FMV_FCLASS_H: + return execute_FMV_FCLASS_H(a, pc, insn); + case insn_FDZfh_funct7_rs2::FMV_FCLASS_S: return execute_FMV_FCLASS_S(a, pc, insn); - case insn_FD_funct7_rs2::FMV_FCLASS_D: + case insn_FDZfh_funct7_rs2::FMV_FCLASS_D: return execute_FMV_FCLASS_D(a, pc, insn); - default: - return raise_illegal_insn_exception(a, pc, insn); } + return raise_illegal_insn_exception(a, pc, insn); } template -static FORCE_INLINE execute_status execute_FD(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { +static FORCE_INLINE execute_status execute_FDZfh(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { // If FS is OFF, attempts to read or write the float state will cause an illegal instruction exception. if (unlikely((a.read_mstatus() & MSTATUS_FS_MASK) == MSTATUS_FS_OFF)) { return raise_illegal_insn_exception(a, pc, insn); } - switch (static_cast(insn_get_funct7(insn))) { - case insn_FD_funct7::FADD_S: + switch (static_cast(insn_get_funct7(insn))) { + case insn_FDZfh_funct7::FADD_H: + return execute_FADD_H(a, pc, insn); + case insn_FDZfh_funct7::FADD_S: return execute_FADD_S(a, pc, insn); - case insn_FD_funct7::FADD_D: + case insn_FDZfh_funct7::FADD_D: return execute_FADD_D(a, pc, insn); - case insn_FD_funct7::FSUB_S: + case insn_FDZfh_funct7::FSUB_H: + return execute_FSUB_H(a, pc, insn); + case insn_FDZfh_funct7::FSUB_S: return execute_FSUB_S(a, pc, insn); - case insn_FD_funct7::FSUB_D: + case insn_FDZfh_funct7::FSUB_D: return execute_FSUB_D(a, pc, insn); - case insn_FD_funct7::FMUL_S: + case insn_FDZfh_funct7::FMUL_H: + return execute_FMUL_H(a, pc, insn); + case insn_FDZfh_funct7::FMUL_S: return execute_FMUL_S(a, pc, insn); - case insn_FD_funct7::FMUL_D: + case insn_FDZfh_funct7::FMUL_D: return execute_FMUL_D(a, pc, insn); - case insn_FD_funct7::FDIV_S: + case insn_FDZfh_funct7::FDIV_H: + return execute_FDIV_H(a, pc, insn); + case insn_FDZfh_funct7::FDIV_S: return execute_FDIV_S(a, pc, insn); - case insn_FD_funct7::FDIV_D: + case insn_FDZfh_funct7::FDIV_D: return execute_FDIV_D(a, pc, insn); - case insn_FD_funct7::FSGN_S: + case insn_FDZfh_funct7::FSGN_H: + return execute_FSGN_H(a, pc, insn); + case insn_FDZfh_funct7::FSGN_S: return execute_FSGN_S(a, pc, insn); - case insn_FD_funct7::FSGN_D: + case insn_FDZfh_funct7::FSGN_D: return execute_FSGN_D(a, pc, insn); - case insn_FD_funct7::FMINMAX_S: + case insn_FDZfh_funct7::FMINMAX_H: + return execute_FMINMAX_H(a, pc, insn); + case insn_FDZfh_funct7::FMINMAX_S: return execute_FMINMAX_S(a, pc, insn); - case insn_FD_funct7::FMINMAX_D: + case insn_FDZfh_funct7::FMINMAX_D: return execute_FMINMAX_D(a, pc, insn); - case insn_FD_funct7::FSQRT_S: + case insn_FDZfh_funct7::FSQRT_H: + return execute_FSQRT_H(a, pc, insn); + case insn_FDZfh_funct7::FSQRT_S: return execute_FSQRT_S(a, pc, insn); - case insn_FD_funct7::FSQRT_D: + case insn_FDZfh_funct7::FSQRT_D: return execute_FSQRT_D(a, pc, insn); - case insn_FD_funct7::FCMP_S: + case insn_FDZfh_funct7::FCMP_H: + return execute_FCMP_H(a, pc, insn); + case insn_FDZfh_funct7::FCMP_S: return execute_FCMP_S(a, pc, insn); - case insn_FD_funct7::FCMP_D: + case insn_FDZfh_funct7::FCMP_D: return execute_FCMP_D(a, pc, insn); default: return execute_FCVT_FMV_FCLASS(a, pc, insn); } } -template +template static FORCE_INLINE execute_status execute_C_L(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rd, - uint32_t rs1, int32_t imm) { + uint32_t rs1, U imm) { const uint64_t vaddr = a.read_x(rs1); T val = 0; if (unlikely(!read_virtual_memory(a, pc, mcycle, vaddr + imm, &val))) { @@ -4894,9 +6090,9 @@ static FORCE_INLINE execute_status execute_C_L(const STATE_ACCESS a, uint64_t &p return advance_to_next_insn<2>(a, pc); } -template +template static FORCE_INLINE execute_status execute_C_S(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t rs2, - uint32_t rs1, int32_t imm) { + uint32_t rs1, U imm) { const uint64_t vaddr = a.read_x(rs1); const uint64_t val = a.read_x(rs2); const execute_status status = write_virtual_memory(a, pc, mcycle, vaddr + imm, val); @@ -5207,6 +6403,127 @@ static FORCE_INLINE execute_status execute_C_ADDW(const STATE_ACCESS a, uint64_t }); } +/// \brief Implementation of the C.LBU instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_LBU(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.lbu"); + const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); + const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); + const uint32_t uimm = insn_get_C_LS_B_uimm(insn); + return execute_C_L(a, pc, mcycle, rd, rs1, uimm); +} + +/// \brief Implementation of the C.LHU instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_LHU(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.lhu"); + const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); + const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); + const uint32_t uimm = insn_get_C_LS_H_uimm(insn); + return execute_C_L(a, pc, mcycle, rd, rs1, uimm); +} + +/// \brief Implementation of the C.LH instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_LH(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.lh"); + const uint32_t rd = insn_get_CIW_CL_rd_CS_CA_rs2(insn); + const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); + const auto imm = static_cast(insn_get_C_LS_H_uimm(insn)); + return execute_C_L(a, pc, mcycle, rd, rs1, imm); +} + +/// \brief Implementation of the C.SB instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_SB(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.sb"); + const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); + const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); + const uint32_t uimm = insn_get_C_LS_B_uimm(insn); + return execute_C_S(a, pc, mcycle, rs2, rs1, uimm); +} + +/// \brief Implementation of the C.SH instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_SH(const STATE_ACCESS a, uint64_t &pc, uint64_t mcycle, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.sh"); + const uint32_t rs1 = insn_get_CL_CS_CA_CB_rs1(insn); + const uint32_t rs2 = insn_get_CIW_CL_rd_CS_CA_rs2(insn); + const uint32_t uimm = insn_get_C_LS_H_uimm(insn); + return execute_C_S(a, pc, mcycle, rs2, rs1, uimm); +} + +/// \brief Implementation of the C.ZEXT.B instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_ZEXT_B(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.zext.b"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, static_cast(rs1_value)); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.SEXT.B instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_SEXT_B(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.sext.b"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, static_cast(static_cast(rs1_value))); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.ZEXT.H instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_ZEXT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.zext.h"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, static_cast(rs1_value)); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.SEXT.H instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_SEXT_H(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.sext.h"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, static_cast(static_cast(rs1_value))); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.ZEXT.W instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_ZEXT_W(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.zext.w"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, static_cast(static_cast(rs1_value))); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.NOT instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_NOT(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.not"); + const uint32_t rd = insn_get_CL_CS_CA_CB_rs1(insn); + const uint64_t rs1_value = a.read_x(rd); + a.write_x(rd, ~rs1_value); + return advance_to_next_insn<2>(a, pc); +} + +/// \brief Implementation of the C.MUL instruction (Zcb extension). +template +static FORCE_INLINE execute_status execute_C_MUL(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { + [[maybe_unused]] auto note = dump_insn(a, pc, static_cast(insn), "c.mul"); + return execute_C_arithmetic(a, pc, insn, [](uint64_t rs1_value, uint64_t rs2_value) -> uint64_t { + int64_t val = 0; + __builtin_mul_overflow(static_cast(rs1_value), static_cast(rs2_value), &val); + return static_cast(val); + }); +} + /// \brief Implementation of the C_J instruction. template static FORCE_INLINE execute_status execute_C_J(const STATE_ACCESS a, uint64_t &pc, uint32_t insn) { @@ -5337,8 +6654,7 @@ static FORCE_INLINE execute_status execute_C_ADD(const STATE_ACCESS a, uint64_t const uint32_t rs2 = insn_get_CR_CSS_rs2(insn); const uint64_t rd_value = a.read_x(rd); const uint64_t rs2_value = a.read_x(rs2); - uint64_t val = 0; - __builtin_add_overflow(rd_value, rs2_value, &val); + const uint64_t val = rd_value + rs2_value; a.write_x(rd, val); return advance_to_next_insn<2>(a, pc); } @@ -5450,11 +6766,27 @@ template static FORCE_INLINE fetch_status fetch_insn(const STATE_ACCESS a, uint64_t &pc, uint32_t &insn, uint64_t &last_vaddr_page, i_state_access_fast_addr_t &last_vf_offset, uint64_t &last_pma_index) { [[maybe_unused]] auto note = a.make_scoped_note("fetch_insn"); + // The following if efficiently checks that the current pc is in the same page as the last pc fetch + // and that it's not crossing a page boundary simultaneously. + // This is the hot path and most fetches will fall through inside this if block. + // This early check is not strictly necessary for correctness, + // but it makes the fetch use just about 5 instructions on a x86_64 hardware. + if (likely((pc ^ last_vaddr_page) < (PAGE_OFFSET_MASK - 1))) { + // Here we are sure that reading 4 bytes won't cross a page boundary. + // However pc may not be 4-byte aligned, at worst it could be only 2-byte aligned, + // therefore we must perform a misaligned 4-byte read on a 2-byte aligned pointer. + // In case pc holds a compressed instruction, insn will store 2 additional bytes, + // but this is fine because later the instruction decoder will discard them. + a.template read_memory_word(pc + last_vf_offset, last_pma_index, &insn); + return fetch_status::success; + } + // Otherwise, it's the slow path, fetch pc is either not the same as last cache or crossing a page boundary. + i_state_access_fast_addr_t faddr{0}; const uint64_t pc_vaddr_page = tlb_addr_page(pc); // If pc is in the same page as the last pc fetch, // we can just reuse last fetch translation, skipping TLB or slow address translation altogether. - if (likely(pc_vaddr_page == last_vaddr_page)) { + if (unlikely(pc_vaddr_page == last_vaddr_page)) { faddr = pc + last_vf_offset; } else { // Not in the same page as last the fetch, we need to perform address translation @@ -5641,63 +6973,66 @@ static NO_INLINE execute_status interpret_loop(const STATE_ACCESS a, uint64_t mc INSN_CASE(ANDI_rdN): status = execute_ANDI(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLI_rdN): - status = execute_SLLI(a, pc, insn); + INSN_CASE(SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_rdN): + status = execute_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLI_SRAI_rdN): - status = execute_SRLI_SRAI(a, pc, insn); + INSN_CASE(SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_rdN): + status = execute_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI(a, pc, insn); INSN_BREAK(); - INSN_CASE(ADD_MUL_SUB_rdN): - status = execute_ADD_MUL_SUB(a, pc, insn); + INSN_CASE(ADD_SUB_MUL_rdN): + status = execute_ADD_SUB_MUL(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLL_MULH_rdN): - status = execute_SLL_MULH(a, pc, insn); + INSN_CASE(SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_rdN): + status = execute_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLT_MULHSU_rdN): - status = execute_SLT_MULHSU(a, pc, insn); + INSN_CASE(SLT_MULHSU_SH1ADD_CLMULR_rdN): + status = execute_SLT_MULHSU_SH1ADD_CLMULR(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLTU_MULHU_rdN): - status = execute_SLTU_MULHU(a, pc, insn); + INSN_CASE(SLTU_MULHU_CLMULH_rdN): + status = execute_SLTU_MULHU_CLMULH(a, pc, insn); INSN_BREAK(); - INSN_CASE(XOR_DIV_rdN): - status = execute_XOR_DIV(a, pc, insn); + INSN_CASE(XOR_DIV_SH2ADD_XNOR_MIN_rdN): + status = execute_XOR_DIV_SH2ADD_XNOR_MIN(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRL_DIVU_SRA_rdN): - status = execute_SRL_DIVU_SRA(a, pc, insn); + INSN_CASE(SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_rdN): + status = execute_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ(a, pc, insn); INSN_BREAK(); - INSN_CASE(OR_REM_rdN): - status = execute_OR_REM(a, pc, insn); + INSN_CASE(OR_REM_SH3ADD_ORN_MAX_rdN): + status = execute_OR_REM_SH3ADD_ORN_MAX(a, pc, insn); INSN_BREAK(); - INSN_CASE(AND_REMU_rdN): - status = execute_AND_REMU(a, pc, insn); + INSN_CASE(AND_REMU_ANDN_MAXU_CZERO_NEZ_rdN): + status = execute_AND_REMU_ANDN_MAXU_CZERO_NEZ(a, pc, insn); INSN_BREAK(); INSN_CASE(ADDIW_rdN): status = execute_ADDIW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLIW_rdN): - status = execute_SLLIW(a, pc, insn); + INSN_CASE(SLLIW_SLLI_UW_CLZW_CTZW_CPOPW_rdN): + status = execute_SLLIW_SLLI_UW_CLZW_CTZW_CPOPW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLIW_SRAIW_rdN): - status = execute_SRLIW_SRAIW(a, pc, insn); + INSN_CASE(SRLIW_SRAIW_RORIW_rdN): + status = execute_SRLIW_SRAIW_RORIW(a, pc, insn); INSN_BREAK(); - INSN_CASE(ADDW_MULW_SUBW_rdN): - status = execute_ADDW_MULW_SUBW(a, pc, insn); + INSN_CASE(ADDW_SUBW_MULW_ADD_UW_rdN): + status = execute_ADDW_SUBW_MULW_ADD_UW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLW_rdN): - status = execute_SLLW(a, pc, insn); + INSN_CASE(SLLW_ROLW_rdN): + status = execute_SLLW_ROLW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLW_DIVUW_SRAW_rdN): - status = execute_SRLW_DIVUW_SRAW(a, pc, insn); + INSN_CASE(SRLW_SRAW_DIVUW_RORW_rdN): + status = execute_SRLW_SRAW_DIVUW_RORW(a, pc, insn); INSN_BREAK(); - INSN_CASE(DIVW_rdN): - status = execute_DIVW(a, pc, insn); + INSN_CASE(DIVW_SH2ADD_UW_ZEXT_H_rdN): + status = execute_DIVW_SH2ADD_UW_ZEXT_H(a, pc, insn); INSN_BREAK(); - INSN_CASE(REMW_rdN): - status = execute_REMW(a, pc, insn); + INSN_CASE(REMW_SH3ADD_UW_rdN): + status = execute_REMW_SH3ADD_UW(a, pc, insn); INSN_BREAK(); INSN_CASE(REMUW_rdN): status = execute_REMUW(a, pc, insn); INSN_BREAK(); + INSN_CASE(SH1ADD_UW_rdN): + status = execute_SH1ADD_UW(a, pc, insn); + INSN_BREAK(); INSN_CASE(LD_rdN): status = execute_LD(a, pc, mcycle, insn); INSN_BREAK(); @@ -5844,9 +7179,46 @@ static NO_INLINE execute_status interpret_loop(const STATE_ACCESS a, uint64_t mc INSN_CASE(C_EBREAK): status = execute_C_EBREAK(a, pc, insn); INSN_BREAK(); - // FD extensions - INSN_CASE(FD): - status = execute_FD(a, pc, insn); + // Zcb extension + INSN_CASE(C_LBU): + status = execute_C_LBU(a, pc, mcycle, insn); + INSN_BREAK(); + INSN_CASE(C_LHU): + status = execute_C_LHU(a, pc, mcycle, insn); + INSN_BREAK(); + INSN_CASE(C_LH): + status = execute_C_LH(a, pc, mcycle, insn); + INSN_BREAK(); + INSN_CASE(C_SB): + status = execute_C_SB(a, pc, mcycle, insn); + INSN_BREAK(); + INSN_CASE(C_SH): + status = execute_C_SH(a, pc, mcycle, insn); + INSN_BREAK(); + INSN_CASE(C_ZEXT_B): + status = execute_C_ZEXT_B(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_SEXT_B): + status = execute_C_SEXT_B(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_ZEXT_H): + status = execute_C_ZEXT_H(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_SEXT_H): + status = execute_C_SEXT_H(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_ZEXT_W): + status = execute_C_ZEXT_W(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_NOT): + status = execute_C_NOT(a, pc, insn); + INSN_BREAK(); + INSN_CASE(C_MUL): + status = execute_C_MUL(a, pc, insn); + INSN_BREAK(); + // F, D, Zfh extensions + INSN_CASE(FDZfh): + status = execute_FDZfh(a, pc, insn); INSN_BREAK(); INSN_CASE(FLD): status = execute_FLD(a, pc, mcycle, insn); @@ -5854,12 +7226,18 @@ static NO_INLINE execute_status interpret_loop(const STATE_ACCESS a, uint64_t mc INSN_CASE(FLW): status = execute_FLW(a, pc, mcycle, insn); INSN_BREAK(); + INSN_CASE(FLH): + status = execute_FLH(a, pc, mcycle, insn); + INSN_BREAK(); INSN_CASE(FSD): status = execute_FSD(a, pc, mcycle, insn); INSN_BREAK(); INSN_CASE(FSW): status = execute_FSW(a, pc, mcycle, insn); INSN_BREAK(); + INSN_CASE(FSH): + status = execute_FSH(a, pc, mcycle, insn); + INSN_BREAK(); INSN_CASE(FMADD): status = execute_FMADD(a, pc, insn); INSN_BREAK(); @@ -5933,63 +7311,66 @@ static NO_INLINE execute_status interpret_loop(const STATE_ACCESS a, uint64_t mc INSN_CASE(ANDI_rd0): status = execute_ANDI(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLI_rd0): - status = execute_SLLI(a, pc, insn); + INSN_CASE(SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_rd0): + status = execute_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLI_SRAI_rd0): - status = execute_SRLI_SRAI(a, pc, insn); + INSN_CASE(SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_rd0): + status = execute_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI(a, pc, insn); INSN_BREAK(); - INSN_CASE(ADD_MUL_SUB_rd0): - status = execute_ADD_MUL_SUB(a, pc, insn); + INSN_CASE(ADD_SUB_MUL_rd0): + status = execute_ADD_SUB_MUL(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLL_MULH_rd0): - status = execute_SLL_MULH(a, pc, insn); + INSN_CASE(SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_rd0): + status = execute_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLT_MULHSU_rd0): - status = execute_SLT_MULHSU(a, pc, insn); + INSN_CASE(SLT_MULHSU_SH1ADD_CLMULR_rd0): + status = execute_SLT_MULHSU_SH1ADD_CLMULR(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLTU_MULHU_rd0): - status = execute_SLTU_MULHU(a, pc, insn); + INSN_CASE(SLTU_MULHU_CLMULH_rd0): + status = execute_SLTU_MULHU_CLMULH(a, pc, insn); INSN_BREAK(); - INSN_CASE(XOR_DIV_rd0): - status = execute_XOR_DIV(a, pc, insn); + INSN_CASE(XOR_DIV_SH2ADD_XNOR_MIN_rd0): + status = execute_XOR_DIV_SH2ADD_XNOR_MIN(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRL_DIVU_SRA_rd0): - status = execute_SRL_DIVU_SRA(a, pc, insn); + INSN_CASE(SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_rd0): + status = execute_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ(a, pc, insn); INSN_BREAK(); - INSN_CASE(OR_REM_rd0): - status = execute_OR_REM(a, pc, insn); + INSN_CASE(OR_REM_SH3ADD_ORN_MAX_rd0): + status = execute_OR_REM_SH3ADD_ORN_MAX(a, pc, insn); INSN_BREAK(); - INSN_CASE(AND_REMU_rd0): - status = execute_AND_REMU(a, pc, insn); + INSN_CASE(AND_REMU_ANDN_MAXU_CZERO_NEZ_rd0): + status = execute_AND_REMU_ANDN_MAXU_CZERO_NEZ(a, pc, insn); INSN_BREAK(); INSN_CASE(ADDIW_rd0): status = execute_ADDIW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLIW_rd0): - status = execute_SLLIW(a, pc, insn); + INSN_CASE(SLLIW_SLLI_UW_CLZW_CTZW_CPOPW_rd0): + status = execute_SLLIW_SLLI_UW_CLZW_CTZW_CPOPW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLIW_SRAIW_rd0): - status = execute_SRLIW_SRAIW(a, pc, insn); + INSN_CASE(SRLIW_SRAIW_RORIW_rd0): + status = execute_SRLIW_SRAIW_RORIW(a, pc, insn); INSN_BREAK(); - INSN_CASE(ADDW_MULW_SUBW_rd0): - status = execute_ADDW_MULW_SUBW(a, pc, insn); + INSN_CASE(ADDW_SUBW_MULW_ADD_UW_rd0): + status = execute_ADDW_SUBW_MULW_ADD_UW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SLLW_rd0): - status = execute_SLLW(a, pc, insn); + INSN_CASE(SLLW_ROLW_rd0): + status = execute_SLLW_ROLW(a, pc, insn); INSN_BREAK(); - INSN_CASE(SRLW_DIVUW_SRAW_rd0): - status = execute_SRLW_DIVUW_SRAW(a, pc, insn); + INSN_CASE(SRLW_SRAW_DIVUW_RORW_rd0): + status = execute_SRLW_SRAW_DIVUW_RORW(a, pc, insn); INSN_BREAK(); - INSN_CASE(DIVW_rd0): - status = execute_DIVW(a, pc, insn); + INSN_CASE(DIVW_SH2ADD_UW_ZEXT_H_rd0): + status = execute_DIVW_SH2ADD_UW_ZEXT_H(a, pc, insn); INSN_BREAK(); - INSN_CASE(REMW_rd0): - status = execute_REMW(a, pc, insn); + INSN_CASE(REMW_SH3ADD_UW_rd0): + status = execute_REMW_SH3ADD_UW(a, pc, insn); INSN_BREAK(); INSN_CASE(REMUW_rd0): status = execute_REMUW(a, pc, insn); INSN_BREAK(); + INSN_CASE(SH1ADD_UW_rd0): + status = execute_SH1ADD_UW(a, pc, insn); + INSN_BREAK(); INSN_CASE(LD_rd0): status = execute_LD(a, pc, mcycle, insn); INSN_BREAK(); diff --git a/src/machine.h b/src/machine.h index aec6d8d08..7766e6039 100644 --- a/src/machine.h +++ b/src/machine.h @@ -71,8 +71,8 @@ class machine final { machine_console m_console; ///< Console instance mutable machine_address_ranges m_ars; ///< Address ranges mutable hash_tree m_ht; ///< Top level hash tree - processor_state *m_s; ///< Big machine processor state - uarch_processor_state *m_us; ///< Microarchitecture processor state + processor_state *const m_s; ///< Big machine processor state + uarch_processor_state *const m_us; ///< Microarchitecture processor state std::unordered_map m_counters; ///< Counters used for statistics collection diff --git a/src/riscv-constants.h b/src/riscv-constants.h index b540b5d19..689175583 100644 --- a/src/riscv-constants.h +++ b/src/riscv-constants.h @@ -32,12 +32,10 @@ namespace cartesi { #define NO_SATP_MODE_SV57 // NOLINT(cppcoreguidelines-macro-usage) /// \brief Global RISC-V constants -enum RISCV_constants { - XLEN = 64, ///< Maximum XLEN - FLEN = 64, ///< Maximum FLEN - ASIDLEN = 0, ///< Number of implemented ASID bits - ASIDMAX = 16 ///< Maximum number of implemented ASID bits -}; +constexpr uint32_t XLEN = 64; ///< Maximum XLEN +constexpr uint32_t FLEN = 64; ///< Maximum FLEN +constexpr uint32_t ASIDLEN = 0; ///< Number of implemented ASID bits +constexpr uint32_t ASIDMAX = 16; ///< Maximum number of implemented ASID bits /// \brief Register counts enum REG_COUNT { X_REG_COUNT = 32, F_REG_COUNT = 32, UARCH_X_REG_COUNT = 32 }; @@ -128,10 +126,17 @@ enum MISA_shifts { MISA_EXT_F_SHIFT = ('F' - 'A'), MISA_EXT_D_SHIFT = ('D' - 'A'), MISA_EXT_C_SHIFT = ('C' - 'A'), + MISA_EXT_B_SHIFT = ('B' - 'A'), MISA_MXL_SHIFT = (XLEN - 2) }; +/// \brief Supported RISC-V ISA extensions, used by the Device Tree during boot. +/// \details See also +/// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/riscv/extensions.yaml +/// https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=gcc/config/riscv/riscv-profiles.def;hb=HEAD +constexpr const char ISA_string[] = "rv64imafdcsu_zicntr_zicsr_zifencei_zihpm_zba_zbb_zbc_zbs_zcb_zfh"; + /// \brief misa masks enum MISA_masks : uint64_t { MISA_EXT_S_MASK = UINT64_C(1) << MISA_EXT_S_SHIFT, ///< Supervisor mode implemented @@ -142,6 +147,7 @@ enum MISA_masks : uint64_t { MISA_EXT_F_MASK = UINT64_C(1) << MISA_EXT_F_SHIFT, ///< Single-precision floating-point extension MISA_EXT_D_MASK = UINT64_C(1) << MISA_EXT_D_SHIFT, ///< Double-precision floating-point extension MISA_EXT_C_MASK = UINT64_C(1) << MISA_EXT_C_SHIFT, ///< Compressed extension + MISA_EXT_B_MASK = UINT64_C(1) << MISA_EXT_B_SHIFT, ///< Bitmanip extension }; /// \brief misa constants @@ -440,8 +446,8 @@ enum CARTESI_init : uint64_t { MCAUSE_INIT = UINT64_C(0), ///< Initial value for mcause MTVAL_INIT = UINT64_C(0), ///< Initial value for mtval MISA_INIT = (MISA_MXL_VALUE << MISA_MXL_SHIFT) | MISA_EXT_S_MASK | MISA_EXT_U_MASK | MISA_EXT_I_MASK | - MISA_EXT_M_MASK | MISA_EXT_A_MASK | MISA_EXT_F_MASK | MISA_EXT_D_MASK | - MISA_EXT_C_MASK, ///< Initial value for misa + MISA_EXT_M_MASK | MISA_EXT_A_MASK | MISA_EXT_F_MASK | MISA_EXT_D_MASK | MISA_EXT_C_MASK | + MISA_EXT_B_MASK, ///< Initial value for misa MIE_INIT = UINT64_C(0), ///< Initial value for mie MIP_INIT = UINT64_C(0), ///< Initial value for mip MEDELEG_INIT = UINT64_C(0), ///< Initial value for medeleg @@ -630,16 +636,53 @@ enum class CSR_address : uint32_t { tdata3 = 0x7a3, }; -/// \brief The result of insn >> 26 (6 most significant bits of funct7) can be -/// used to identify the SRI instructions -enum insn_SRLI_SRAI_funct7_sr1 : uint32_t { SRLI = 0b000000, SRAI = 0b010000 }; +/// \brief funct7_sr1 constants for SRLI, SRAI, RORI, ORC_B, REV8, BEXTI instructions +enum class insn_SRLI_SRAI_RORI_ORC_B_REV8_BEXTI_funct7_sr1 : uint32_t { + SRLI = 0b000000, + SRAI = 0b010000, + RORI = 0b011000, + ORC_B = 0b001010, + REV8 = 0b011010, + BEXTI = 0b010010, +}; + +/// \brief funct7_rs2 constants for ORC_B instructions +enum class insn_ORC_B_funct7_rs2 : uint32_t { + ORC_B = 0b001010000111, +}; + +/// \brief funct7_rs2 constants for REV8 instructions +enum class insn_REV8_funct7_rs2 : uint32_t { + REV8 = 0b011010111000, +}; + +/// \brief funct7_sr1 constants for SLLI, CLZ, CTZ, CPOP, SEXT.B, SEXT.H, BCLRI, BINVI, BSETI instructions +enum class insn_SLLI_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_BCLRI_BINVI_BSETI_funct7_sr1 : uint32_t { + SLLI = 0b000000, + CLZ_CTZ_CPOP_SEXT_B_SEXT_H = 0b011000, + BCLRI = 0b010010, + BINVI = 0b011010, + BSETI = 0b001010, +}; + +/// \brief funct7_rs2 constants for CLZ, CTZ, CPOP, SEXT.B, SEXT.H instructions +enum class insn_CLZ_CTZ_CPOP_SEXT_B_SEXT_H_funct7_rs2 : uint32_t { + CLZ = 0b0110000'00000, + CTZ = 0b0110000'00001, + CPOP = 0b0110000'00010, + SEXT_B = 0b0110000'00100, + SEXT_H = 0b0110000'00101, +}; -/// \brief funct7 constants for SRW instructions -enum insn_SRLIW_SRAIW_funct7 : uint32_t { SRLIW = 0b0000000, SRAIW = 0b0100000 }; +/// \brief funct7 constants for SRLIW, SRAIW, RORIW instructions +enum class insn_SRLIW_SRAIW_RORIW_funct7 : uint32_t { + SRLIW = 0b0000000, + SRAIW = 0b0100000, + RORIW = 0b0110000, +}; -/// \brief The result of insn >> 27 (5 most significant bits of funct7) can be -/// used to identify the atomic operation -enum insn_AMO_funct7_sr2 : uint32_t { +/// \brief funct7_sr2 constants for AMO instructions +enum class insn_AMO_funct7_sr2 : uint32_t { AMOADD = 0b00000, AMOSWAP = 0b00001, LR = 0b00010, @@ -653,53 +696,99 @@ enum insn_AMO_funct7_sr2 : uint32_t { AMOMAXU = 0b11100 }; -/// \brief funct7 constants for ADD, MUL, SUB instructions -enum insn_ADD_MUL_SUB_funct7 : uint32_t { ADD = 0b0000000, MUL = 0b0000001, SUB = 0b0100000 }; +/// \brief funct7 constants for ADD, SUB, MUL instructions +enum class insn_ADD_SUB_MUL_funct7 : uint32_t { + ADD = 0b0000000, + MUL = 0b0000001, + SUB = 0b0100000, +}; -/// \brief funct7 constants for SLL, MULH instructions -enum insn_SLL_MULH_funct7 : uint32_t { SLL = 0b0000000, MULH = 0b0000001 }; +/// \brief funct7 constants for SLL, MULH, ROL, CLMUL, BCLR, BINV, BSET instructions +enum class insn_SLL_MULH_ROL_CLMUL_BCLR_BINV_BSET_funct7 : uint32_t { + SLL = 0b0000000, + MULH = 0b0000001, + ROL = 0b0110000, + CLMUL = 0b0000101, + BCLR = 0b0100100, + BINV = 0b0110100, + BSET = 0b0010100, +}; -/// \brief funct7 constants for SLT, MULHSU instructions -enum insn_SLT_MULHSU_funct7 : uint32_t { SLT = 0b0000000, MULHSU = 0b0000001 }; +/// \brief funct7 constants for SLT, MULHSU, SH1ADD, CLMULR instructions +enum class insn_SLT_MULHSU_SH1ADD_CLMULR_funct7 : uint32_t { + SLT = 0b0000000, + MULHSU = 0b0000001, + SH1ADD = 0b0010000, + CLMULR = 0b0000101, +}; -/// \brief funct7 constants for SLTU, MULHU instructions -enum insn_SLTU_MULHU_funct7 : uint32_t { SLTU = 0b0000000, MULHU = 0b0000001 }; +/// \brief funct7 constants for SLTU, MULHU, CLMULH instructions +enum class insn_SLTU_MULHU_CLMULH_funct7 : uint32_t { + SLTU = 0b0000000, + MULHU = 0b0000001, + CLMULH = 0b0000101, +}; -/// \brief funct7 constants for XOR, DIV instructions -enum insn_XOR_DIV_funct7 : uint32_t { +/// \brief funct7 constants for XOR, DIV, SH2ADD, XNOR, MIN instructions +enum class insn_XOR_DIV_SH2ADD_XNOR_MIN_funct7 : uint32_t { XOR = 0b0000000, DIV = 0b0000001, + SH2ADD = 0b0010000, + XNOR = 0b0100000, + MIN = 0b0000101, }; -/// \brief funct7 constants for SRL, DIVU, SRA instructions -enum insn_SRL_DIVU_SRA_funct7 : uint32_t { +/// \brief funct7 constants for SRL, SRA, DIVU, MINU, ROR, BEXT, CZERO.EQZ instructions +enum class insn_SRL_SRA_DIVU_MINU_ROR_BEXT_CZERO_EQZ_funct7 : uint32_t { SRL = 0b0000000, - DIVU = 0b0000001, SRA = 0b0100000, + DIVU = 0b0000001, + MINU = 0b0000101, + ROR = 0b0110000, + BEXT = 0b0100100, + CZERO_EQZ = 0b0000111, }; /// \brief funct7 constants for floating-point instructions -enum insn_FD_funct7 : uint32_t { +enum class insn_FDZfh_funct7 : uint32_t { + FADD_H = 0b0000010, FADD_S = 0b0000000, FADD_D = 0b0000001, + FSUB_H = 0b0000110, FSUB_S = 0b0000100, FSUB_D = 0b0000101, + FMUL_H = 0b0001010, FMUL_S = 0b0001000, FMUL_D = 0b0001001, + FDIV_H = 0b0001110, FDIV_S = 0b0001100, FDIV_D = 0b0001101, + FSGN_H = 0b0010010, FSGN_S = 0b0010000, FSGN_D = 0b0010001, + FMINMAX_H = 0b0010110, FMINMAX_S = 0b0010100, FMINMAX_D = 0b0010101, + FSQRT_H = 0b0101110, FSQRT_S = 0b0101100, FSQRT_D = 0b0101101, + FCMP_H = 0b1010010, FCMP_S = 0b1010000, - FCMP_D = 0b1010001 + FCMP_D = 0b1010001, }; /// \brief funct7_rs2 constants for floating-point instructions -enum insn_FD_funct7_rs2 : uint32_t { +enum class insn_FDZfh_funct7_rs2 : uint32_t { + FCVT_H_S = 0b010001000000, + FCVT_S_H = 0b010000000010, + FCVT_H_D = 0b010001000001, + FCVT_D_H = 0b010000100010, + FCVT_S_D = 0b010000000001, + FCVT_D_S = 0b010000100000, + FCVT_W_H = 0b110001000000, + FCVT_WU_H = 0b110001000001, + FCVT_L_H = 0b110001000010, + FCVT_LU_H = 0b110001000011, FCVT_W_S = 0b110000000000, FCVT_WU_S = 0b110000000001, FCVT_L_S = 0b110000000010, @@ -708,49 +797,116 @@ enum insn_FD_funct7_rs2 : uint32_t { FCVT_WU_D = 0b110000100001, FCVT_L_D = 0b110000100010, FCVT_LU_D = 0b110000100011, - FCVT_S_D = 0b010000000001, + FCVT_H_W = 0b110101000000, + FCVT_H_WU = 0b110101000001, + FCVT_H_L = 0b110101000010, + FCVT_H_LU = 0b110101000011, FCVT_S_W = 0b110100000000, FCVT_S_WU = 0b110100000001, FCVT_S_L = 0b110100000010, FCVT_S_LU = 0b110100000011, - FCVT_D_S = 0b010000100000, FCVT_D_W = 0b110100100000, FCVT_D_WU = 0b110100100001, FCVT_D_L = 0b110100100010, FCVT_D_LU = 0b110100100011, + FMV_H_X = 0b111101000000, FMV_W_X = 0b111100000000, FMV_D_X = 0b111100100000, + FMV_FCLASS_H = 0b111001000000, FMV_FCLASS_S = 0b111000000000, - FMV_FCLASS_D = 0b111000100000 + FMV_FCLASS_D = 0b111000100000, }; -/// \brief rm constants for FSGNJ floating-point instructions -enum insn_FSGN_funct3_000000000000 : uint32_t { J = 0b000000000000000, JN = 0b001000000000000, JX = 0b010000000000000 }; +/// \brief rm constants for FSGN floating-point instructions +enum class insn_FSGN_funct3_000000000000 : uint32_t { + J = 0b000000000000000, + JN = 0b001000000000000, + JX = 0b010000000000000, +}; /// \brief rm constants for FMIN and FMAX floating-point instructions -enum insn_FMIN_FMAX_funct3_000000000000 : uint32_t { - MIN = 0b000000000000000, - MAX = 0b001000000000000, +enum class insn_FMIN_FMAX_funct3_000000000000 : uint32_t { + FMIN = 0b000000000000000, + FMAX = 0b001000000000000, }; /// \brief rm constants for FLE, FLT, and FEQ floating-point instructions -enum insn_FCMP_funct3_000000000000 : uint32_t { +enum class insn_FCMP_funct3_000000000000 : uint32_t { LE = 0b000000000000000, LT = 0b001000000000000, EQ = 0b010000000000000, }; -/// \brief funct7 constants for OR, REM instructions -enum insn_OR_REM_funct7 : uint32_t { OR = 0b0000000, REM = 0b0000001 }; +/// \brief funct7 constants for OR, REM, SH3ADD, ORN, MAX instructions +enum class insn_OR_REM_SH3ADD_ORN_MAX_funct7 : uint32_t { + OR = 0b0000000, + REM = 0b0000001, + SH3ADD = 0b0010000, + ORN = 0b0100000, + MAX = 0b0000101, +}; + +/// \brief funct7 constants for AND, REMU, ANDN, MAXU, CZERO.NEZ instructions +enum class insn_AND_REMU_ANDN_MAXU_CZERO_NEZ_funct7 : uint32_t { + AND = 0b0000000, + REMU = 0b0000001, + ANDN = 0b0100000, + MAXU = 0b0000101, + CZERO_NEZ = 0b0000111, +}; + +/// \brief funct7 constants for ADDW, SUBW, MULW, ADD.UW instructions +enum class insn_ADDW_SUBW_MULW_ADD_UW_funct7 : uint32_t { + ADDW = 0b0000000, + SUBW = 0b0100000, + MULW = 0b0000001, + ADD_UW = 0b0000100, +}; + +/// \brief funct7 constants for SRLW, SRAW, DIVUW, RORW instructions +enum class insn_SRLW_SRAW_DIVUW_RORW_funct7 : uint32_t { + SRLW = 0b0000000, + SRAW = 0b0100000, + DIVUW = 0b0000001, + RORW = 0b0110000, +}; + +/// \brief funct7 constants for DIVW, SH2ADD.UW, ZEXT.H instructions +enum class insn_DIVW_SH2ADD_UW_ZEXT_H_funct7 : uint32_t { + DIVW = 0b0000001, + SH2ADD_UW = 0b0010000, + ZEXT_H = 0b0000100, +}; + +/// \brief funct7 constants for REMW, SH3ADD.UW instructions +enum class insn_REMW_SH3ADD_UW_funct7 : uint32_t { + REMW = 0b0000001, + SH3ADD_UW = 0b0010000, +}; + +/// \brief funct7 constants for SLLW, ROLW instructions +enum class insn_SLLW_ROLW_funct7 : uint32_t { + SLLW = 0b0000000, + ROLW = 0b0110000, +}; -/// \brief funct7 constants for AND, REMU instructions -enum insn_AND_REMU_funct7 : uint32_t { AND = 0b0000000, REMU = 0b0000001 }; +/// \brief funct7 constants for SLLIW, CLZW, CTZW, CPOPW instructions +enum class insn_SLLIW_CLZW_CTZW_CPOPW_funct7 : uint32_t { + SLLIW = 0b0000000, + CLZW_CTZW_CPOPW = 0b0110000, +}; -/// \brief funct7 constants for ADDW, MULW, SUBW instructions -enum insn_ADDW_MULW_SUBW_funct7 : uint32_t { ADDW = 0b0000000, MULW = 0b0000001, SUBW = 0b0100000 }; +/// \brief rs2 constants for CLZW, CTZW, CPOPW instructions +enum class insn_CLZW_CTZW_CPOPW_rs2 : uint32_t { + CLZW = 0b00000, + CTZW = 0b00001, + CPOPW = 0b00010, +}; -/// \brief funct7 constants for SRLW, DIVUW, SRAW instructions -enum insn_SRLW_DIVUW_SRAW_funct7 : uint32_t { SRLW = 0b0000000, DIVUW = 0b0000001, SRAW = 0b0100000 }; +/// \brief funct7_sr1 constants for SLLI.UW instruction +enum class insn_SLLI_UW_funct7_sr1 : uint32_t { + SLLI_UW = 0b000010, +}; /// \brief Privileged instructions, except for SFENCE.VMA, have no parameters enum class insn_privileged : uint32_t { @@ -762,13 +918,17 @@ enum class insn_privileged : uint32_t { }; /// \brief funct2 constants for FMADD, FMSUB, FNMADD, FMNSUB instructions -enum insn_FM_funct2_0000000000000000000000000 : uint32_t { +enum class insn_FM_funct2_0000000000000000000000000 : uint32_t { + H = 0b100000000000000000000000000, S = 0b000000000000000000000000000, D = 0b010000000000000000000000000 }; /// \brief rm constants for FMV and FCLASS instructions -enum insn_FMV_FCLASS_funct3_000000000000 : uint32_t { FMV = 0b000000000000000, FCLASS = 0b001000000000000 }; +enum class insn_FMV_FCLASS_funct3_000000000000 : uint32_t { + FMV = 0b000000000000000, + FCLASS = 0b001000000000000, +}; } // namespace cartesi diff --git a/src/soft-float.h b/src/soft-float.h index 38b0fd93c..efca02470 100644 --- a/src/soft-float.h +++ b/src/soft-float.h @@ -58,6 +58,11 @@ namespace cartesi { template static inline int clz(UINT x); +template <> +inline int clz(uint16_t x) { + return x == 0 ? 16 : (__builtin_clz(x) - 16); +} + template <> inline int clz(uint32_t x) { return x == 0 ? 32 : __builtin_clz(x); @@ -72,6 +77,11 @@ inline int clz(uint64_t x) { template struct make_long_uint {}; +template <> +struct make_long_uint { + using type = uint32_t; +}; + template <> struct make_long_uint { using type = uint64_t; @@ -152,23 +162,17 @@ template struct i_sfloat { using F_UINT = T; - /// \brief soft float constants - enum SFLOAT_constants : int { - MANT_SIZE = MANT, - EXP_SIZE = EXP, - F_SIZE = sizeof(F_UINT) * 8, - IMANT_SIZE = (F_SIZE - 2), // internal mantissa size - RND_SIZE = (IMANT_SIZE - MANT_SIZE) - }; - - /// \brief soft float masks - enum SFLOAT_masks : F_UINT { - EXP_MASK = ((static_cast(1) << EXP_SIZE) - 1), - MANT_MASK = ((static_cast(1) << MANT_SIZE) - 1), - SIGN_MASK = (static_cast(1) << (F_SIZE - 1)), - QNAN_MASK = (static_cast(1) << (MANT_SIZE - 1)), - F_QNAN = ((EXP_MASK << MANT_SIZE) | (static_cast(1) << (MANT_SIZE - 1))) - }; + /// Soft float constants + static constexpr int MANT_SIZE = MANT; + static constexpr int EXP_SIZE = EXP; + static constexpr int F_SIZE = sizeof(F_UINT) * 8; + static constexpr int IMANT_SIZE = F_SIZE - 2; // internal mantissa size + static constexpr int RND_SIZE = IMANT_SIZE - MANT_SIZE; + static constexpr F_UINT EXP_MASK = (static_cast(1) << EXP_SIZE) - 1; + static constexpr F_UINT MANT_MASK = (static_cast(1) << MANT_SIZE) - 1; + static constexpr F_UINT SIGN_MASK = static_cast(1) << (F_SIZE - 1); + static constexpr F_UINT QNAN_MASK = static_cast(1) << (MANT_SIZE - 1); + static constexpr F_UINT F_QNAN = (EXP_MASK << MANT_SIZE) | (static_cast(1) << (MANT_SIZE - 1)); /// \brief Packs a float to its binary representation. static F_UINT pack(uint32_t a_sign, uint32_t a_exp, F_UINT a_mant) { @@ -410,7 +414,7 @@ struct i_sfloat { } const int32_t r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 2; F_UINT r_mant_low = 0; - F_UINT r_mant = mul_u(&r_mant_low, a_mant << RND_SIZE, b_mant << (RND_SIZE + 1)); + F_UINT r_mant = mul_u(&r_mant_low, a_mant << RND_SIZE, b_mant << (RND_SIZE + 1)); r_mant |= (r_mant_low != 0); return normalize(r_sign, r_exp, r_mant, rm, pfflags); } @@ -472,7 +476,7 @@ struct i_sfloat { // multiply int32_t r_exp = a_exp + b_exp - (1 << (EXP_SIZE - 1)) + 3; F_UINT r_mant0 = 0; - F_UINT r_mant1 = mul_u(&r_mant0, a_mant << RND_SIZE, b_mant << RND_SIZE); + F_UINT r_mant1 = mul_u(&r_mant0, a_mant << RND_SIZE, b_mant << RND_SIZE); // normalize to F_SIZE - 3 if (r_mant1 < (static_cast(1) << (F_SIZE - 3))) { r_mant1 = (r_mant1 << 1) | (r_mant0 >> (F_SIZE - 1)); @@ -595,7 +599,7 @@ struct i_sfloat { } const int32_t r_exp = a_exp - b_exp + (1 << (EXP_SIZE - 1)) - 1; F_UINT r = 0; - F_UINT r_mant = divrem_u(&r, a_mant, static_cast(0), b_mant << 2); + F_UINT r_mant = divrem_u(&r, a_mant, static_cast(0), b_mant << 2); if (r != 0) { r_mant |= 1; } @@ -763,6 +767,16 @@ struct i_sfloat { if (a_exp == EXP_MASK && a_mant != 0) { a_sign = 0; // NaN is like +infinity } + ICVT_UINT r_max = 0; + if constexpr (IS_UNSIGNED) { + r_max = static_cast(a_sign) - 1; + } else { + r_max = (static_cast(1) << (ICVT_SIZE - 1)) - static_cast(a_sign ^ 1); + } + if (unlikely(a_exp == EXP_MASK)) { + *pfflags |= FFLAGS_NV_MASK; + return r_max; + } if (a_exp == 0) { a_exp = 1; } else { @@ -770,12 +784,6 @@ struct i_sfloat { } a_mant <<= RND_SIZE; a_exp = a_exp - (EXP_MASK / 2) - MANT_SIZE; - ICVT_UINT r_max = 0; - if constexpr (IS_UNSIGNED) { - r_max = static_cast(a_sign) - 1; - } else { - r_max = (static_cast(1) << (ICVT_SIZE - 1)) - static_cast(a_sign ^ 1); - } ICVT_UINT r = 0; if (a_exp >= 0) { if (likely(a_exp <= (ICVT_SIZE - 1 - MANT_SIZE))) { @@ -844,7 +852,7 @@ struct i_sfloat { r = -static_cast(a); } } - int32_t a_exp = (EXP_MASK / 2) + F_SIZE - 2; + int32_t a_exp = (EXP_MASK / 2) + IMANT_SIZE; // need to reduce range before generic float normalization const int l = ICVT_SIZE - clz(r) - (F_SIZE - 1); if (l > 0) { @@ -857,65 +865,68 @@ struct i_sfloat { } }; +using i_sfloat16 = i_sfloat; // Interface for half-precision floating-point using i_sfloat32 = i_sfloat; // Interface for single-precision floating-point using i_sfloat64 = i_sfloat; // Interface for double-precision floating-point -/// \brief Conversion from float32 to float64. -static NO_INLINE uint64_t sfloat_cvt_f32_f64(uint32_t a, uint32_t *pfflags) { - uint32_t a_sign = 0; - int32_t a_exp = 0; - i_sfloat64::F_UINT a_mant = i_sfloat32::unpack(&a_sign, &a_exp, a); - if (unlikely(a_exp == 0xff)) { - if (a_mant != 0) { // NaN - if (i_sfloat32::issignan(a)) { - *pfflags |= FFLAGS_NV_MASK; - } - return i_sfloat64::F_QNAN; - } // infinity - return i_sfloat64::pack(a_sign, i_sfloat64::EXP_MASK, 0); - } - if (a_exp == 0) { - if (a_mant == 0) { // zero - return i_sfloat64::pack(a_sign, 0, 0); +/// \brief Conversion between float formats +template +static NO_INLINE SFLOAT_DST::F_UINT sfloat_cvt_f_f(typename SFLOAT_SRC::F_UINT a, FRM_modes rm, uint32_t *pfflags) { + if constexpr (SFLOAT_DST::F_SIZE > SFLOAT_SRC::F_SIZE) { // Widen operation + uint32_t a_sign = 0; + int32_t a_exp = 0; + typename SFLOAT_DST::F_UINT a_mant = SFLOAT_SRC::unpack(&a_sign, &a_exp, a); + if (unlikely(a_exp == SFLOAT_SRC::EXP_MASK)) { + if (a_mant != 0) { // NaN + if (SFLOAT_SRC::issignan(a)) { + *pfflags |= FFLAGS_NV_MASK; + } + return SFLOAT_DST::F_QNAN; + } // infinity + return SFLOAT_DST::pack(a_sign, SFLOAT_DST::EXP_MASK, 0); } - a_mant = i_sfloat32::mant_normalize_subnormal(&a_exp, a_mant); - } - // convert the exponent value - a_exp = a_exp - 0x7f + (static_cast(i_sfloat64::EXP_MASK) / 2); - // shift the mantissa - a_mant <<= i_sfloat64::MANT_SIZE - 23; - // we assume the target float is large enough to that no - // normalization is necessary - return i_sfloat64::pack(a_sign, a_exp, a_mant); -} - -/// \brief Conversion from float64 to float32. -static NO_INLINE uint32_t sfloat_cvt_f64_f32(uint64_t a, FRM_modes rm, uint32_t *pfflags) { - uint32_t a_sign = 0; - int32_t a_exp = 0; - i_sfloat64::F_UINT a_mant = i_sfloat64::unpack(&a_sign, &a_exp, a); - if (unlikely(a_exp == i_sfloat64::EXP_MASK)) { - if (a_mant != 0) { // nan - if (i_sfloat64::issignan(a)) { - *pfflags |= FFLAGS_NV_MASK; + if (a_exp == 0) { + if (a_mant == 0) { // zero + return SFLOAT_DST::pack(a_sign, 0, 0); + } + a_mant = SFLOAT_SRC::mant_normalize_subnormal(&a_exp, a_mant); + } + // convert the exponent value + a_exp += static_cast(SFLOAT_DST::EXP_MASK - SFLOAT_SRC::EXP_MASK) / 2; + // shift the mantissa + a_mant <<= SFLOAT_DST::MANT_SIZE - SFLOAT_SRC::MANT_SIZE; + // we assume the target float is large enough to that no + // normalization is necessary + return SFLOAT_DST::pack(a_sign, a_exp, a_mant); + } else if constexpr (SFLOAT_DST::F_SIZE < SFLOAT_SRC::F_SIZE) { // Narrow operation + uint32_t a_sign = 0; + int32_t a_exp = 0; + auto a_mant = SFLOAT_SRC::unpack(&a_sign, &a_exp, a); + if (unlikely(a_exp == SFLOAT_SRC::EXP_MASK)) { + if (a_mant != 0) { // nan + if (SFLOAT_SRC::issignan(a)) { + *pfflags |= FFLAGS_NV_MASK; + } + return SFLOAT_DST::F_QNAN; + } // infinity + return SFLOAT_DST::pack(a_sign, SFLOAT_DST::EXP_MASK, 0); + } + if (a_exp == 0) { + if (a_mant == 0) { // zero + return SFLOAT_DST::pack(a_sign, 0, 0); } - return i_sfloat32::F_QNAN; - } // infinity - return i_sfloat32::pack(a_sign, 0xff, 0); - } - if (a_exp == 0) { - if (a_mant == 0) { // zero - return i_sfloat32::pack(a_sign, 0, 0); + SFLOAT_SRC::mant_normalize_subnormal(&a_exp, a_mant); + } else { + a_mant |= static_cast(1) << SFLOAT_SRC::MANT_SIZE; } - i_sfloat64::mant_normalize_subnormal(&a_exp, a_mant); + // convert the exponent value + a_exp += static_cast(SFLOAT_DST::EXP_MASK - SFLOAT_SRC::EXP_MASK) / 2; + // shift the mantissa + a_mant = SFLOAT_SRC::mant_rshift_rnd(a_mant, SFLOAT_SRC::MANT_SIZE - SFLOAT_DST::IMANT_SIZE); + return SFLOAT_DST::normalize(a_sign, a_exp, static_cast(a_mant), rm, pfflags); } else { - a_mant |= static_cast(1) << i_sfloat64::MANT_SIZE; + return a; } - // convert the exponent value - a_exp = a_exp - (static_cast(i_sfloat64::EXP_MASK) / 2) + 0x7f; - // shift the mantissa - a_mant = i_sfloat64::mant_rshift_rnd(a_mant, i_sfloat64::MANT_SIZE - (32 - 2)); - return i_sfloat32::normalize(a_sign, a_exp, a_mant, rm, pfflags); } } // namespace cartesi diff --git a/src/state-access.h b/src/state-access.h index 93a1c11a0..e21a79f43 100644 --- a/src/state-access.h +++ b/src/state-access.h @@ -58,13 +58,19 @@ class state_access : public i_accept_scoped_notes, public i_accept_counters { - // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) - machine &m_m; ///< Associated machine + // NOLINTBEGIN(cppcoreguidelines-avoid-const-or-ref-data-members) + //??(edubart): Storing reference to the processor state removes an extra indirection when accessing registers, + // however other indirections are happening on operations that access the processor state through the machine state + // (eg. TLB write). We should rethink in the future how to make state accessor use a single reference again + // without causing extra indirections. + processor_state &m_s; ///< Associated processor state + machine &m_m; ///< Associated machine + // NOLINTEND(cppcoreguidelines-avoid-const-or-ref-data-members) public: /// \brief Constructor from machine state. /// \param m Pointer to machine state. - explicit state_access(machine &m) : m_m(m) { + explicit state_access(machine &m) : m_s(m.get_state()), m_m(m) { ; } @@ -75,44 +81,44 @@ class state_access : friend i_state_access; uint64_t do_read_x(int i) const { - return m_m.get_state().shadow.registers.x[i]; + return m_s.shadow.registers.x[i]; } void do_write_x(int i, uint64_t val) const { assert(i != 0); - m_m.get_state().shadow.registers.x[i] = val; + m_s.shadow.registers.x[i] = val; } uint64_t do_read_f(int i) const { - return m_m.get_state().shadow.registers.f[i]; + return m_s.shadow.registers.f[i]; } void do_write_f(int i, uint64_t val) const { - m_m.get_state().shadow.registers.f[i] = val; + m_s.shadow.registers.f[i] = val; } uint64_t do_read_pc() const { - return m_m.get_state().shadow.registers.pc; + return m_s.shadow.registers.pc; } void do_write_pc(uint64_t val) const { - m_m.get_state().shadow.registers.pc = val; + m_s.shadow.registers.pc = val; } uint64_t do_read_fcsr() const { - return m_m.get_state().shadow.registers.fcsr; + return m_s.shadow.registers.fcsr; } void do_write_fcsr(uint64_t val) const { - m_m.get_state().shadow.registers.fcsr = val; + m_s.shadow.registers.fcsr = val; } uint64_t do_read_icycleinstret() const { - return m_m.get_state().shadow.registers.icycleinstret; + return m_s.shadow.registers.icycleinstret; } void do_write_icycleinstret(uint64_t val) const { - m_m.get_state().shadow.registers.icycleinstret = val; + m_s.shadow.registers.icycleinstret = val; } uint64_t do_read_mvendorid() const { // NOLINT(readability-convert-member-functions-to-static) @@ -128,279 +134,279 @@ class state_access : } uint64_t do_read_mcycle() const { - return m_m.get_state().shadow.registers.mcycle; + return m_s.shadow.registers.mcycle; } void do_write_mcycle(uint64_t val) const { - m_m.get_state().shadow.registers.mcycle = val; + m_s.shadow.registers.mcycle = val; } uint64_t do_read_mstatus() const { - return m_m.get_state().shadow.registers.mstatus; + return m_s.shadow.registers.mstatus; } void do_write_mstatus(uint64_t val) const { - m_m.get_state().shadow.registers.mstatus = val; + m_s.shadow.registers.mstatus = val; } uint64_t do_read_menvcfg() const { - return m_m.get_state().shadow.registers.menvcfg; + return m_s.shadow.registers.menvcfg; } void do_write_menvcfg(uint64_t val) const { - m_m.get_state().shadow.registers.menvcfg = val; + m_s.shadow.registers.menvcfg = val; } uint64_t do_read_mtvec() const { - return m_m.get_state().shadow.registers.mtvec; + return m_s.shadow.registers.mtvec; } void do_write_mtvec(uint64_t val) const { - m_m.get_state().shadow.registers.mtvec = val; + m_s.shadow.registers.mtvec = val; } uint64_t do_read_mscratch() const { - return m_m.get_state().shadow.registers.mscratch; + return m_s.shadow.registers.mscratch; } void do_write_mscratch(uint64_t val) const { - m_m.get_state().shadow.registers.mscratch = val; + m_s.shadow.registers.mscratch = val; } uint64_t do_read_mepc() const { - return m_m.get_state().shadow.registers.mepc; + return m_s.shadow.registers.mepc; } void do_write_mepc(uint64_t val) const { - m_m.get_state().shadow.registers.mepc = val; + m_s.shadow.registers.mepc = val; } uint64_t do_read_mcause() const { - return m_m.get_state().shadow.registers.mcause; + return m_s.shadow.registers.mcause; } void do_write_mcause(uint64_t val) const { - m_m.get_state().shadow.registers.mcause = val; + m_s.shadow.registers.mcause = val; } uint64_t do_read_mtval() const { - return m_m.get_state().shadow.registers.mtval; + return m_s.shadow.registers.mtval; } void do_write_mtval(uint64_t val) const { - m_m.get_state().shadow.registers.mtval = val; + m_s.shadow.registers.mtval = val; } uint64_t do_read_misa() const { - return m_m.get_state().shadow.registers.misa; + return m_s.shadow.registers.misa; } void do_write_misa(uint64_t val) const { - m_m.get_state().shadow.registers.misa = val; + m_s.shadow.registers.misa = val; } uint64_t do_read_mie() const { - return m_m.get_state().shadow.registers.mie; + return m_s.shadow.registers.mie; } void do_write_mie(uint64_t val) const { - m_m.get_state().shadow.registers.mie = val; + m_s.shadow.registers.mie = val; } uint64_t do_read_mip() const { - return m_m.get_state().shadow.registers.mip; + return m_s.shadow.registers.mip; } void do_write_mip(uint64_t val) const { - m_m.get_state().shadow.registers.mip = val; + m_s.shadow.registers.mip = val; } uint64_t do_read_medeleg() const { - return m_m.get_state().shadow.registers.medeleg; + return m_s.shadow.registers.medeleg; } void do_write_medeleg(uint64_t val) const { - m_m.get_state().shadow.registers.medeleg = val; + m_s.shadow.registers.medeleg = val; } uint64_t do_read_mideleg() const { - return m_m.get_state().shadow.registers.mideleg; + return m_s.shadow.registers.mideleg; } void do_write_mideleg(uint64_t val) const { - m_m.get_state().shadow.registers.mideleg = val; + m_s.shadow.registers.mideleg = val; } uint64_t do_read_mcounteren() const { - return m_m.get_state().shadow.registers.mcounteren; + return m_s.shadow.registers.mcounteren; } void do_write_mcounteren(uint64_t val) const { - m_m.get_state().shadow.registers.mcounteren = val; + m_s.shadow.registers.mcounteren = val; } uint64_t do_read_senvcfg() const { - return m_m.get_state().shadow.registers.senvcfg; + return m_s.shadow.registers.senvcfg; } void do_write_senvcfg(uint64_t val) const { - m_m.get_state().shadow.registers.senvcfg = val; + m_s.shadow.registers.senvcfg = val; } uint64_t do_read_stvec() const { - return m_m.get_state().shadow.registers.stvec; + return m_s.shadow.registers.stvec; } void do_write_stvec(uint64_t val) const { - m_m.get_state().shadow.registers.stvec = val; + m_s.shadow.registers.stvec = val; } uint64_t do_read_sscratch() const { - return m_m.get_state().shadow.registers.sscratch; + return m_s.shadow.registers.sscratch; } void do_write_sscratch(uint64_t val) const { - m_m.get_state().shadow.registers.sscratch = val; + m_s.shadow.registers.sscratch = val; } uint64_t do_read_sepc() const { - return m_m.get_state().shadow.registers.sepc; + return m_s.shadow.registers.sepc; } void do_write_sepc(uint64_t val) const { - m_m.get_state().shadow.registers.sepc = val; + m_s.shadow.registers.sepc = val; } uint64_t do_read_scause() const { - return m_m.get_state().shadow.registers.scause; + return m_s.shadow.registers.scause; } void do_write_scause(uint64_t val) const { - m_m.get_state().shadow.registers.scause = val; + m_s.shadow.registers.scause = val; } uint64_t do_read_stval() const { - return m_m.get_state().shadow.registers.stval; + return m_s.shadow.registers.stval; } void do_write_stval(uint64_t val) const { - m_m.get_state().shadow.registers.stval = val; + m_s.shadow.registers.stval = val; } uint64_t do_read_satp() const { - return m_m.get_state().shadow.registers.satp; + return m_s.shadow.registers.satp; } void do_write_satp(uint64_t val) const { - m_m.get_state().shadow.registers.satp = val; + m_s.shadow.registers.satp = val; } uint64_t do_read_scounteren() const { - return m_m.get_state().shadow.registers.scounteren; + return m_s.shadow.registers.scounteren; } void do_write_scounteren(uint64_t val) const { - m_m.get_state().shadow.registers.scounteren = val; + m_s.shadow.registers.scounteren = val; } uint64_t do_read_ilrsc() const { - return m_m.get_state().shadow.registers.ilrsc; + return m_s.shadow.registers.ilrsc; } void do_write_ilrsc(uint64_t val) const { - m_m.get_state().shadow.registers.ilrsc = val; + m_s.shadow.registers.ilrsc = val; } uint64_t do_read_iprv() const { - return m_m.get_state().shadow.registers.iprv; + return m_s.shadow.registers.iprv; } void do_write_iprv(uint64_t val) const { - m_m.get_state().shadow.registers.iprv = val; + m_s.shadow.registers.iprv = val; } uint64_t do_read_iflags_X() const { - return m_m.get_state().shadow.registers.iflags.X; + return m_s.shadow.registers.iflags.X; } void do_write_iflags_X(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.X = val; + m_s.shadow.registers.iflags.X = val; } uint64_t do_read_iflags_Y() const { - return m_m.get_state().shadow.registers.iflags.Y; + return m_s.shadow.registers.iflags.Y; } void do_write_iflags_Y(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.Y = val; + m_s.shadow.registers.iflags.Y = val; } uint64_t do_read_iflags_H() const { - return m_m.get_state().shadow.registers.iflags.H; + return m_s.shadow.registers.iflags.H; } void do_write_iflags_H(uint64_t val) const { - m_m.get_state().shadow.registers.iflags.H = val; + m_s.shadow.registers.iflags.H = val; } uint64_t do_read_iunrep() const { - return m_m.get_state().shadow.registers.iunrep; + return m_s.shadow.registers.iunrep; } void do_write_iunrep(uint64_t val) const { - m_m.get_state().shadow.registers.iunrep = val; + m_s.shadow.registers.iunrep = val; } uint64_t do_read_clint_mtimecmp() const { - return m_m.get_state().shadow.registers.clint.mtimecmp; + return m_s.shadow.registers.clint.mtimecmp; } void do_write_clint_mtimecmp(uint64_t val) const { - m_m.get_state().shadow.registers.clint.mtimecmp = val; + m_s.shadow.registers.clint.mtimecmp = val; } uint64_t do_read_plic_girqpend() const { - return m_m.get_state().shadow.registers.plic.girqpend; + return m_s.shadow.registers.plic.girqpend; } void do_write_plic_girqpend(uint64_t val) const { - m_m.get_state().shadow.registers.plic.girqpend = val; + m_s.shadow.registers.plic.girqpend = val; } uint64_t do_read_plic_girqsrvd() const { - return m_m.get_state().shadow.registers.plic.girqsrvd; + return m_s.shadow.registers.plic.girqsrvd; } void do_write_plic_girqsrvd(uint64_t val) const { - m_m.get_state().shadow.registers.plic.girqsrvd = val; + m_s.shadow.registers.plic.girqsrvd = val; } uint64_t do_read_htif_fromhost() const { - return m_m.get_state().shadow.registers.htif.fromhost; + return m_s.shadow.registers.htif.fromhost; } void do_write_htif_fromhost(uint64_t val) const { - m_m.get_state().shadow.registers.htif.fromhost = val; + m_s.shadow.registers.htif.fromhost = val; } uint64_t do_read_htif_tohost() const { - return m_m.get_state().shadow.registers.htif.tohost; + return m_s.shadow.registers.htif.tohost; } void do_write_htif_tohost(uint64_t val) const { - m_m.get_state().shadow.registers.htif.tohost = val; + m_s.shadow.registers.htif.tohost = val; } uint64_t do_read_htif_ihalt() const { - return m_m.get_state().shadow.registers.htif.ihalt; + return m_s.shadow.registers.htif.ihalt; } uint64_t do_read_htif_iconsole() const { - return m_m.get_state().shadow.registers.htif.iconsole; + return m_s.shadow.registers.htif.iconsole; } uint64_t do_read_htif_iyield() const { - return m_m.get_state().shadow.registers.htif.iyield; + return m_s.shadow.registers.htif.iyield; } bool do_read_memory(uint64_t paddr, unsigned char *data, uint64_t length) const { @@ -456,17 +462,17 @@ class state_access : template uint64_t do_read_tlb_vaddr_page(uint64_t slot_index) const { - return m_m.get_state().penumbra.tlb[SET][slot_index].vaddr_page; + return m_s.penumbra.tlb[SET][slot_index].vaddr_page; } template host_addr do_read_tlb_vf_offset(uint64_t slot_index) const { - return m_m.get_state().penumbra.tlb[SET][slot_index].vh_offset; + return m_s.penumbra.tlb[SET][slot_index].vh_offset; } template uint64_t do_read_tlb_pma_index(uint64_t slot_index) const { - return m_m.get_state().shadow.tlb[SET][slot_index].pma_index; + return m_s.shadow.tlb[SET][slot_index].pma_index; } template diff --git a/tests/lua/cartesi-machine-tests.lua b/tests/lua/cartesi-machine-tests.lua index 77d11bf1a..d1e9a1bfe 100755 --- a/tests/lua/cartesi-machine-tests.lua +++ b/tests/lua/cartesi-machine-tests.lua @@ -26,273 +26,391 @@ local jsonrpc -- Tests Cases -- format {"ram_image_file", number_of_cycles, halt_payload} local riscv_tests = { - { "rv64mi-p-access.bin", 140 }, - { "rv64mi-p-breakpoint.bin", 111 }, - { "rv64mi-p-csr.bin", 293 }, - { "rv64mi-p-illegal.bin", 357 }, - { "rv64mi-p-ld-misaligned.bin", 365 }, - { "rv64mi-p-lh-misaligned.bin", 117 }, - { "rv64mi-p-lw-misaligned.bin", 177 }, - { "rv64mi-p-ma_addr.bin", 738 }, - { "rv64mi-p-ma_fetch.bin", 134 }, - { "rv64mi-p-mcsr.bin", 99 }, - { "rv64mi-p-sbreak.bin", 107 }, - { "rv64mi-p-scall.bin", 91 }, - { "rv64mi-p-sd-misaligned.bin", 385 }, - { "rv64mi-p-sh-misaligned.bin", 125 }, - { "rv64mi-p-sw-misaligned.bin", 181 }, - { "rv64si-p-csr.bin", 192 }, - { "rv64si-p-dirty.bin", 173 }, - { "rv64si-p-icache-alias.bin", 223 }, - { "rv64si-p-ma_fetch.bin", 121 }, - { "rv64si-p-sbreak.bin", 101 }, - { "rv64si-p-scall.bin", 108 }, - { "rv64si-p-wfi.bin", 87 }, - { "rv64ua-p-amoadd_d.bin", 104 }, - { "rv64ua-p-amoadd_w.bin", 101 }, - { "rv64ua-p-amoand_d.bin", 101 }, - { "rv64ua-p-amoand_w.bin", 100 }, - { "rv64ua-p-amomax_d.bin", 100 }, - { "rv64ua-p-amomax_w.bin", 100 }, - { "rv64ua-p-amomaxu_d.bin", 100 }, - { "rv64ua-p-amomaxu_w.bin", 100 }, - { "rv64ua-p-amomin_d.bin", 100 }, - { "rv64ua-p-amomin_w.bin", 100 }, - { "rv64ua-p-amominu_d.bin", 100 }, - { "rv64ua-p-amominu_w.bin", 100 }, - { "rv64ua-p-amoor_d.bin", 99 }, - { "rv64ua-p-amoor_w.bin", 99 }, - { "rv64ua-p-amoswap_d.bin", 101 }, - { "rv64ua-p-amoswap_w.bin", 100 }, - { "rv64ua-p-amoxor_d.bin", 102 }, - { "rv64ua-p-amoxor_w.bin", 104 }, - { "rv64ua-p-lrsc.bin", 6276 }, - { "rv64ua-v-amoadd_d.bin", 12868 }, - { "rv64ua-v-amoadd_w.bin", 12865 }, - { "rv64ua-v-amoand_d.bin", 12877 }, - { "rv64ua-v-amoand_w.bin", 12876 }, - { "rv64ua-v-amomax_d.bin", 12858 }, - { "rv64ua-v-amomax_w.bin", 12858 }, - { "rv64ua-v-amomaxu_d.bin", 12858 }, - { "rv64ua-v-amomaxu_w.bin", 12858 }, - { "rv64ua-v-amomin_d.bin", 12858 }, - { "rv64ua-v-amomin_w.bin", 12858 }, - { "rv64ua-v-amominu_d.bin", 12864 }, - { "rv64ua-v-amominu_w.bin", 12864 }, - { "rv64ua-v-amoor_d.bin", 12857 }, - { "rv64ua-v-amoor_w.bin", 12857 }, - { "rv64ua-v-amoswap_d.bin", 12877 }, - { "rv64ua-v-amoswap_w.bin", 12876 }, - { "rv64ua-v-amoxor_d.bin", 12860 }, - { "rv64ua-v-amoxor_w.bin", 12862 }, - { "rv64ua-v-lrsc.bin", 19034 }, - { "rv64ui-p-add.bin", 505 }, - { "rv64ui-p-addi.bin", 280 }, - { "rv64ui-p-addiw.bin", 277 }, - { "rv64ui-p-addw.bin", 500 }, - { "rv64ui-p-and.bin", 580 }, - { "rv64ui-p-andi.bin", 251 }, - { "rv64ui-p-auipc.bin", 94 }, - { "rv64ui-p-beq.bin", 326 }, - { "rv64ui-p-bge.bin", 344 }, - { "rv64ui-p-bgeu.bin", 434 }, - { "rv64ui-p-blt.bin", 326 }, - { "rv64ui-p-bltu.bin", 412 }, - { "rv64ui-p-bne.bin", 326 }, - { "rv64ui-p-fence_i.bin", 336 }, - { "rv64ui-p-jal.bin", 90 }, - { "rv64ui-p-jalr.bin", 150 }, - { "rv64ui-p-lb.bin", 288 }, - { "rv64ui-p-lbu.bin", 288 }, - { "rv64ui-p-ld.bin", 470 }, - { "rv64ui-p-lh.bin", 304 }, - { "rv64ui-p-lhu.bin", 313 }, - { "rv64ui-p-lui.bin", 100 }, - { "rv64ui-p-lw.bin", 318 }, - { "rv64ui-p-lwu.bin", 352 }, - { "rv64ui-p-or.bin", 613 }, - { "rv64ui-p-ori.bin", 244 }, - { "rv64ui-p-sb.bin", 489 }, - { "rv64ui-p-sh.bin", 542 }, - { "rv64ui-p-sw.bin", 549 }, - { "rv64ui-p-sd.bin", 661 }, - { "rv64ui-p-simple.bin", 76 }, - { "rv64ui-p-sll.bin", 575 }, - { "rv64ui-p-slli.bin", 305 }, - { "rv64ui-p-slliw.bin", 312 }, - { "rv64ui-p-sllw.bin", 575 }, - { "rv64ui-p-slt.bin", 494 }, - { "rv64ui-p-slti.bin", 272 }, - { "rv64ui-p-sltiu.bin", 272 }, - { "rv64ui-p-sltu.bin", 511 }, - { "rv64ui-p-sra.bin", 547 }, - { "rv64ui-p-srai.bin", 293 }, - { "rv64ui-p-sraiw.bin", 339 }, - { "rv64ui-p-sraw.bin", 587 }, - { "rv64ui-p-srl.bin", 589 }, - { "rv64ui-p-srli.bin", 314 }, - { "rv64ui-p-srliw.bin", 321 }, - { "rv64ui-p-srlw.bin", 581 }, - { "rv64ui-p-sub.bin", 496 }, - { "rv64ui-p-subw.bin", 492 }, - { "rv64ui-p-xor.bin", 608 }, - { "rv64ui-p-xori.bin", 242 }, - { "rv64ui-v-add.bin", 7908 }, - { "rv64ui-v-addi.bin", 7683 }, - { "rv64ui-v-addiw.bin", 7680 }, - { "rv64ui-v-addw.bin", 7903 }, - { "rv64ui-v-and.bin", 7983 }, - { "rv64ui-v-andi.bin", 7654 }, - { "rv64ui-v-auipc.bin", 7496 }, - { "rv64ui-v-beq.bin", 7729 }, - { "rv64ui-v-bge.bin", 7746 }, - { "rv64ui-v-bgeu.bin", 7837 }, - { "rv64ui-v-blt.bin", 7729 }, - { "rv64ui-v-bltu.bin", 7815 }, - { "rv64ui-v-bne.bin", 7729 }, - { "rv64ui-v-fence_i.bin", 13125 }, - { "rv64ui-v-jal.bin", 7493 }, - { "rv64ui-v-jalr.bin", 7553 }, - { "rv64ui-v-lb.bin", 13535 }, - { "rv64ui-v-lbu.bin", 13535 }, - { "rv64ui-v-ld.bin", 13717 }, - { "rv64ui-v-lh.bin", 13551 }, - { "rv64ui-v-lhu.bin", 13560 }, - { "rv64ui-v-lui.bin", 7503 }, - { "rv64ui-v-lw.bin", 13565 }, - { "rv64ui-v-lwu.bin", 13599 }, - { "rv64ui-v-or.bin", 8016 }, - { "rv64ui-v-ori.bin", 7647 }, - { "rv64ui-v-sb.bin", 13247 }, - { "rv64ui-v-sd.bin", 19263 }, - { "rv64ui-v-sh.bin", 13300 }, - { "rv64ui-v-simple.bin", 7479 }, - { "rv64ui-v-sll.bin", 13822 }, - { "rv64ui-v-slli.bin", 7708 }, - { "rv64ui-v-slliw.bin", 7715 }, - { "rv64ui-v-sllw.bin", 13822 }, - { "rv64ui-v-slt.bin", 7897 }, - { "rv64ui-v-slti.bin", 7675 }, - { "rv64ui-v-sltiu.bin", 7675 }, - { "rv64ui-v-sltu.bin", 7914 }, - { "rv64ui-v-sra.bin", 7950 }, - { "rv64ui-v-srai.bin", 7696 }, - { "rv64ui-v-sraiw.bin", 7742 }, - { "rv64ui-v-sraw.bin", 13834 }, - { "rv64ui-v-srl.bin", 13836 }, - { "rv64ui-v-srli.bin", 7717 }, - { "rv64ui-v-srliw.bin", 7724 }, - { "rv64ui-v-srlw.bin", 13828 }, - { "rv64ui-v-sub.bin", 7899 }, - { "rv64ui-v-subw.bin", 7895 }, - { "rv64ui-v-sw.bin", 13307 }, - { "rv64ui-v-xor.bin", 8011 }, - { "rv64ui-v-xori.bin", 7645 }, - { "rv64um-p-div.bin", 136 }, - { "rv64um-p-divu.bin", 142 }, - { "rv64um-p-divuw.bin", 134 }, - { "rv64um-p-divw.bin", 131 }, - { "rv64um-p-mul.bin", 495 }, - { "rv64um-p-mulh.bin", 503 }, - { "rv64um-p-mulhsu.bin", 503 }, - { "rv64um-p-mulhu.bin", 535 }, - { "rv64um-p-mulw.bin", 434 }, - { "rv64um-p-rem.bin", 135 }, - { "rv64um-p-remu.bin", 136 }, - { "rv64um-p-remuw.bin", 131 }, - { "rv64um-p-remw.bin", 137 }, - { "rv64um-v-div.bin", 7539 }, - { "rv64um-v-divu.bin", 7545 }, - { "rv64um-v-divuw.bin", 7537 }, - { "rv64um-v-divw.bin", 7534 }, - { "rv64um-v-mul.bin", 7898 }, - { "rv64um-v-mulh.bin", 7906 }, - { "rv64um-v-mulhsu.bin", 7906 }, - { "rv64um-v-mulhu.bin", 7938 }, - { "rv64um-v-mulw.bin", 7837 }, - { "rv64um-v-rem.bin", 7538 }, - { "rv64um-v-remu.bin", 7539 }, - { "rv64um-v-remuw.bin", 7534 }, - { "rv64um-v-remw.bin", 7540 }, - -- C extension tests - { "rv64uc-p-rvc.bin", 295 }, - { "rv64uc-v-rvc.bin", 18913 }, - -- float tests - { "rv64uf-p-fadd.bin", 210 }, - { "rv64uf-p-fclass.bin", 147 }, - { "rv64uf-p-fcmp.bin", 260 }, - { "rv64uf-p-fcvt.bin", 152 }, - { "rv64uf-p-fcvt_w.bin", 550 }, - { "rv64uf-p-fdiv.bin", 171 }, - { "rv64uf-p-fmadd.bin", 236 }, - { "rv64uf-p-fmin.bin", 314 }, - { "rv64uf-p-ldst.bin", 106 }, - { "rv64uf-p-move.bin", 255 }, - { "rv64uf-p-recoding.bin", 113 }, - { "rv64uf-v-fadd.bin", 13455 }, - { "rv64uf-v-fclass.bin", 7548 }, - { "rv64uf-v-fcmp.bin", 13505 }, - { "rv64uf-v-fcvt.bin", 13397 }, - { "rv64uf-v-fcvt_w.bin", 19639 }, - { "rv64uf-v-fdiv.bin", 13416 }, - { "rv64uf-v-fmadd.bin", 13481 }, - { "rv64uf-v-fmin.bin", 13559 }, - { "rv64uf-v-ldst.bin", 12896 }, - { "rv64uf-v-move.bin", 7656 }, - { "rv64uf-v-recoding.bin", 13358 }, - { "rv64ud-p-fadd.bin", 210 }, - { "rv64ud-p-fclass.bin", 153 }, - { "rv64ud-p-fcmp.bin", 260 }, - { "rv64ud-p-fcvt.bin", 192 }, - { "rv64ud-p-fcvt_w.bin", 610 }, - { "rv64ud-p-fdiv.bin", 184 }, - { "rv64ud-p-fmadd.bin", 236 }, - { "rv64ud-p-fmin.bin", 314 }, - { "rv64ud-p-ldst.bin", 125 }, - { "rv64ud-p-move.bin", 1030 }, - { "rv64ud-p-recoding.bin", 138 }, - { "rv64ud-p-structural.bin", 203 }, - { "rv64ud-v-fadd.bin", 13455 }, - { "rv64ud-v-fclass.bin", 7554 }, - { "rv64ud-v-fcmp.bin", 13505 }, - { "rv64ud-v-fcvt.bin", 13437 }, - { "rv64ud-v-fcvt_w.bin", 19699 }, - { "rv64ud-v-fdiv.bin", 13429 }, - { "rv64ud-v-fmadd.bin", 13481 }, - { "rv64ud-v-fmin.bin", 13559 }, - { "rv64ud-v-ldst.bin", 12891 }, - { "rv64ud-v-move.bin", 14275 }, - { "rv64ud-v-recoding.bin", 12934 }, - { "rv64ud-v-structural.bin", 7604 }, - { "fclass.bin", 453 }, - { "fcvt.bin", 17610 }, - { "fcmp.bin", 46783 }, - { "funary.bin", 2830 }, - { "fbinary_s.bin", 204280 }, - { "fbinary_d.bin", 204280 }, - { "fternary_s.bin", 216780 }, - { "fternary_d.bin", 216780 }, + { "rv64mi-p-breakpoint.bin", 122 }, + { "rv64mi-p-csr.bin", 281 }, + { "rv64mi-p-illegal.bin", 361 }, + { "rv64mi-p-instret_overflow.bin", 98 }, + { "rv64mi-p-ld-misaligned.bin", 369 }, + { "rv64mi-p-lh-misaligned.bin", 121 }, + { "rv64mi-p-lw-misaligned.bin", 181 }, + { "rv64mi-p-ma_addr.bin", 744 }, + { "rv64mi-p-ma_fetch.bin", 127 }, + { "rv64mi-p-mcsr.bin", 103 }, + { "rv64mi-p-sbreak.bin", 111 }, + { "rv64mi-p-scall.bin", 95 }, + { "rv64mi-p-sd-misaligned.bin", 389 }, + { "rv64mi-p-sh-misaligned.bin", 129 }, + { "rv64mi-p-sw-misaligned.bin", 185 }, + { "rv64mi-p-zicntr.bin", 120 }, + { "rv64si-p-csr.bin", 196 }, + { "rv64si-p-dirty.bin", 177 }, + { "rv64si-p-icache-alias.bin", 227 }, + { "rv64si-p-ma_fetch.bin", 125 }, + { "rv64si-p-sbreak.bin", 105 }, + { "rv64si-p-scall.bin", 112 }, + { "rv64si-p-wfi.bin", 91 }, + { "rv64ua-p-amoadd_d.bin", 108 }, + { "rv64ua-p-amoadd_w.bin", 105 }, + { "rv64ua-p-amoand_d.bin", 105 }, + { "rv64ua-p-amoand_w.bin", 104 }, + { "rv64ua-p-amomax_d.bin", 104 }, + { "rv64ua-p-amomax_w.bin", 118 }, + { "rv64ua-p-amomaxu_d.bin", 104 }, + { "rv64ua-p-amomaxu_w.bin", 118 }, + { "rv64ua-p-amomin_d.bin", 104 }, + { "rv64ua-p-amomin_w.bin", 118 }, + { "rv64ua-p-amominu_d.bin", 104 }, + { "rv64ua-p-amominu_w.bin", 118 }, + { "rv64ua-p-amoor_d.bin", 103 }, + { "rv64ua-p-amoor_w.bin", 103 }, + { "rv64ua-p-amoswap_d.bin", 105 }, + { "rv64ua-p-amoswap_w.bin", 104 }, + { "rv64ua-p-amoxor_d.bin", 106 }, + { "rv64ua-p-amoxor_w.bin", 108 }, + { "rv64ua-p-lrsc.bin", 6282 }, + { "rv64ua-v-amoadd_d.bin", 13282 }, + { "rv64ua-v-amoadd_w.bin", 13279 }, + { "rv64ua-v-amoand_d.bin", 13291 }, + { "rv64ua-v-amoand_w.bin", 13290 }, + { "rv64ua-v-amomax_d.bin", 13272 }, + { "rv64ua-v-amomax_w.bin", 13286 }, + { "rv64ua-v-amomaxu_d.bin", 13272 }, + { "rv64ua-v-amomaxu_w.bin", 13286 }, + { "rv64ua-v-amomin_d.bin", 13272 }, + { "rv64ua-v-amomin_w.bin", 13304 }, + { "rv64ua-v-amominu_d.bin", 13278 }, + { "rv64ua-v-amominu_w.bin", 13292 }, + { "rv64ua-v-amoor_d.bin", 13271 }, + { "rv64ua-v-amoor_w.bin", 13271 }, + { "rv64ua-v-amoswap_d.bin", 13291 }, + { "rv64ua-v-amoswap_w.bin", 13290 }, + { "rv64ua-v-amoxor_d.bin", 13274 }, + { "rv64ua-v-amoxor_w.bin", 13276 }, + { "rv64ua-v-lrsc.bin", 19450 }, + { "rv64uc-p-rvc.bin", 299 }, + { "rv64uc-v-rvc.bin", 19360 }, + { "rv64ud-p-fadd.bin", 214 }, + { "rv64ud-p-fclass.bin", 157 }, + { "rv64ud-p-fcmp.bin", 264 }, + { "rv64ud-p-fcvt.bin", 196 }, + { "rv64ud-p-fcvt_w.bin", 614 }, + { "rv64ud-p-fdiv.bin", 188 }, + { "rv64ud-p-fmadd.bin", 240 }, + { "rv64ud-p-fmin.bin", 318 }, + { "rv64ud-p-ldst.bin", 129 }, + { "rv64ud-p-move.bin", 1034 }, + { "rv64ud-p-recoding.bin", 142 }, + { "rv64ud-p-structural.bin", 207 }, + { "rv64ud-v-fadd.bin", 13869 }, + { "rv64ud-v-fclass.bin", 7947 }, + { "rv64ud-v-fcmp.bin", 13919 }, + { "rv64ud-v-fcvt.bin", 13851 }, + { "rv64ud-v-fcvt_w.bin", 20134 }, + { "rv64ud-v-fdiv.bin", 13843 }, + { "rv64ud-v-fmadd.bin", 13895 }, + { "rv64ud-v-fmin.bin", 13973 }, + { "rv64ud-v-ldst.bin", 13305 }, + { "rv64ud-v-move.bin", 14689 }, + { "rv64ud-v-recoding.bin", 13348 }, + { "rv64ud-v-structural.bin", 7997 }, + { "rv64uf-p-fadd.bin", 214 }, + { "rv64uf-p-fclass.bin", 151 }, + { "rv64uf-p-fcmp.bin", 264 }, + { "rv64uf-p-fcvt.bin", 156 }, + { "rv64uf-p-fcvt_w.bin", 554 }, + { "rv64uf-p-fdiv.bin", 175 }, + { "rv64uf-p-fmadd.bin", 240 }, + { "rv64uf-p-fmin.bin", 318 }, + { "rv64uf-p-ldst.bin", 110 }, + { "rv64uf-p-move.bin", 259 }, + { "rv64uf-p-recoding.bin", 117 }, + { "rv64uf-v-fadd.bin", 13869 }, + { "rv64uf-v-fclass.bin", 7941 }, + { "rv64uf-v-fcmp.bin", 13919 }, + { "rv64uf-v-fcvt.bin", 13811 }, + { "rv64uf-v-fcvt_w.bin", 20074 }, + { "rv64uf-v-fdiv.bin", 13830 }, + { "rv64uf-v-fmadd.bin", 13895 }, + { "rv64uf-v-fmin.bin", 13973 }, + { "rv64uf-v-ldst.bin", 13310 }, + { "rv64uf-v-move.bin", 8049 }, + { "rv64uf-v-recoding.bin", 13772 }, + { "rv64ui-p-add.bin", 509 }, + { "rv64ui-p-addi.bin", 284 }, + { "rv64ui-p-addiw.bin", 281 }, + { "rv64ui-p-addw.bin", 504 }, + { "rv64ui-p-and.bin", 584 }, + { "rv64ui-p-andi.bin", 255 }, + { "rv64ui-p-auipc.bin", 98 }, + { "rv64ui-p-beq.bin", 330 }, + { "rv64ui-p-bge.bin", 348 }, + { "rv64ui-p-bgeu.bin", 438 }, + { "rv64ui-p-blt.bin", 330 }, + { "rv64ui-p-bltu.bin", 416 }, + { "rv64ui-p-bne.bin", 330 }, + { "rv64ui-p-fence_i.bin", 338 }, + { "rv64ui-p-jal.bin", 94 }, + { "rv64ui-p-jalr.bin", 154 }, + { "rv64ui-p-lb.bin", 292 }, + { "rv64ui-p-lbu.bin", 292 }, + { "rv64ui-p-ld.bin", 474 }, + { "rv64ui-p-ld_st.bin", 1454 }, + { "rv64ui-p-lh.bin", 308 }, + { "rv64ui-p-lhu.bin", 317 }, + { "rv64ui-p-lui.bin", 104 }, + { "rv64ui-p-lw.bin", 322 }, + { "rv64ui-p-lwu.bin", 356 }, + { "rv64ui-p-or.bin", 617 }, + { "rv64ui-p-ori.bin", 248 }, + { "rv64ui-p-sb.bin", 493 }, + { "rv64ui-p-sd.bin", 665 }, + { "rv64ui-p-sh.bin", 546 }, + { "rv64ui-p-simple.bin", 80 }, + { "rv64ui-p-sll.bin", 579 }, + { "rv64ui-p-slli.bin", 309 }, + { "rv64ui-p-slliw.bin", 316 }, + { "rv64ui-p-sllw.bin", 579 }, + { "rv64ui-p-slt.bin", 498 }, + { "rv64ui-p-slti.bin", 276 }, + { "rv64ui-p-sltiu.bin", 276 }, + { "rv64ui-p-sltu.bin", 515 }, + { "rv64ui-p-sra.bin", 551 }, + { "rv64ui-p-srai.bin", 297 }, + { "rv64ui-p-sraiw.bin", 343 }, + { "rv64ui-p-sraw.bin", 591 }, + { "rv64ui-p-srl.bin", 593 }, + { "rv64ui-p-srli.bin", 318 }, + { "rv64ui-p-srliw.bin", 325 }, + { "rv64ui-p-srlw.bin", 585 }, + { "rv64ui-p-st_ld.bin", 764 }, + { "rv64ui-p-sub.bin", 500 }, + { "rv64ui-p-subw.bin", 496 }, + { "rv64ui-p-sw.bin", 553 }, + { "rv64ui-p-xor.bin", 612 }, + { "rv64ui-p-xori.bin", 246 }, + { "rv64ui-v-add.bin", 8301 }, + { "rv64ui-v-addi.bin", 8076 }, + { "rv64ui-v-addiw.bin", 8073 }, + { "rv64ui-v-addw.bin", 8296 }, + { "rv64ui-v-and.bin", 14241 }, + { "rv64ui-v-andi.bin", 8047 }, + { "rv64ui-v-auipc.bin", 7889 }, + { "rv64ui-v-beq.bin", 8122 }, + { "rv64ui-v-bge.bin", 8139 }, + { "rv64ui-v-bgeu.bin", 8230 }, + { "rv64ui-v-blt.bin", 8122 }, + { "rv64ui-v-bltu.bin", 8208 }, + { "rv64ui-v-bne.bin", 8122 }, + { "rv64ui-v-fence_i.bin", 13535 }, + { "rv64ui-v-jal.bin", 7886 }, + { "rv64ui-v-jalr.bin", 7946 }, + { "rv64ui-v-lb.bin", 13949 }, + { "rv64ui-v-lbu.bin", 13949 }, + { "rv64ui-v-ld.bin", 14131 }, + { "rv64ui-v-ld_st.bin", 26352 }, + { "rv64ui-v-lh.bin", 13965 }, + { "rv64ui-v-lhu.bin", 13974 }, + { "rv64ui-v-lui.bin", 7896 }, + { "rv64ui-v-lw.bin", 13979 }, + { "rv64ui-v-lwu.bin", 14013 }, + { "rv64ui-v-or.bin", 14274 }, + { "rv64ui-v-ori.bin", 8040 }, + { "rv64ui-v-sb.bin", 13661 }, + { "rv64ui-v-sd.bin", 19698 }, + { "rv64ui-v-sh.bin", 13714 }, + { "rv64ui-v-simple.bin", 7872 }, + { "rv64ui-v-sll.bin", 14236 }, + { "rv64ui-v-slli.bin", 8101 }, + { "rv64ui-v-slliw.bin", 8108 }, + { "rv64ui-v-sllw.bin", 14236 }, + { "rv64ui-v-slt.bin", 8290 }, + { "rv64ui-v-slti.bin", 8068 }, + { "rv64ui-v-sltiu.bin", 8068 }, + { "rv64ui-v-sltu.bin", 8307 }, + { "rv64ui-v-sra.bin", 14208 }, + { "rv64ui-v-srai.bin", 8089 }, + { "rv64ui-v-sraiw.bin", 8135 }, + { "rv64ui-v-sraw.bin", 14248 }, + { "rv64ui-v-srl.bin", 14250 }, + { "rv64ui-v-srli.bin", 8110 }, + { "rv64ui-v-srliw.bin", 8117 }, + { "rv64ui-v-srlw.bin", 14242 }, + { "rv64ui-v-st_ld.bin", 19797 }, + { "rv64ui-v-sub.bin", 8292 }, + { "rv64ui-v-subw.bin", 8288 }, + { "rv64ui-v-sw.bin", 13721 }, + { "rv64ui-v-xor.bin", 14269 }, + { "rv64ui-v-xori.bin", 8038 }, + { "rv64um-p-div.bin", 148 }, + { "rv64um-p-divu.bin", 146 }, + { "rv64um-p-divuw.bin", 138 }, + { "rv64um-p-divw.bin", 141 }, + { "rv64um-p-mul.bin", 499 }, + { "rv64um-p-mulh.bin", 507 }, + { "rv64um-p-mulhsu.bin", 507 }, + { "rv64um-p-mulhu.bin", 539 }, + { "rv64um-p-mulw.bin", 438 }, + { "rv64um-p-rem.bin", 139 }, + { "rv64um-p-remu.bin", 140 }, + { "rv64um-p-remuw.bin", 135 }, + { "rv64um-p-remw.bin", 141 }, + { "rv64um-v-div.bin", 7940 }, + { "rv64um-v-divu.bin", 7938 }, + { "rv64um-v-divuw.bin", 7930 }, + { "rv64um-v-divw.bin", 7933 }, + { "rv64um-v-mul.bin", 8291 }, + { "rv64um-v-mulh.bin", 8299 }, + { "rv64um-v-mulhsu.bin", 8299 }, + { "rv64um-v-mulhu.bin", 8331 }, + { "rv64um-v-mulw.bin", 8230 }, + { "rv64um-v-rem.bin", 7931 }, + { "rv64um-v-remu.bin", 7932 }, + { "rv64um-v-remuw.bin", 7927 }, + { "rv64um-v-remw.bin", 7933 }, + { "rv64uzba-p-add_uw.bin", 513 }, + { "rv64uzba-p-sh1add.bin", 516 }, + { "rv64uzba-p-sh1add_uw.bin", 520 }, + { "rv64uzba-p-sh2add.bin", 516 }, + { "rv64uzba-p-sh2add_uw.bin", 520 }, + { "rv64uzba-p-sh3add.bin", 516 }, + { "rv64uzba-p-sh3add_uw.bin", 520 }, + { "rv64uzba-p-slli_uw.bin", 321 }, + { "rv64uzba-v-add_uw.bin", 8243 }, + { "rv64uzba-v-sh1add.bin", 8246 }, + { "rv64uzba-v-sh1add_uw.bin", 8250 }, + { "rv64uzba-v-sh2add.bin", 8246 }, + { "rv64uzba-v-sh2add_uw.bin", 8250 }, + { "rv64uzba-v-sh3add.bin", 8246 }, + { "rv64uzba-v-sh3add_uw.bin", 8250 }, + { "rv64uzba-v-slli_uw.bin", 8051 }, + { "rv64uzbb-p-andn.bin", 593 }, + { "rv64uzbb-p-clz.bin", 270 }, + { "rv64uzbb-p-clzw.bin", 257 }, + { "rv64uzbb-p-cpop.bin", 270 }, + { "rv64uzbb-p-cpopw.bin", 257 }, + { "rv64uzbb-p-ctz.bin", 270 }, + { "rv64uzbb-p-ctzw.bin", 258 }, + { "rv64uzbb-p-max.bin", 503 }, + { "rv64uzbb-p-maxu.bin", 532 }, + { "rv64uzbb-p-min.bin", 499 }, + { "rv64uzbb-p-minu.bin", 521 }, + { "rv64uzbb-p-orc_b.bin", 294 }, + { "rv64uzbb-p-orn.bin", 602 }, + { "rv64uzbb-p-rev8.bin", 312 }, + { "rv64uzbb-p-rol.bin", 584 }, + { "rv64uzbb-p-rolw.bin", 583 }, + { "rv64uzbb-p-ror.bin", 611 }, + { "rv64uzbb-p-rori.bin", 324 }, + { "rv64uzbb-p-roriw.bin", 282 }, + { "rv64uzbb-p-rorw.bin", 543 }, + { "rv64uzbb-p-sext_b.bin", 270 }, + { "rv64uzbb-p-sext_h.bin", 273 }, + { "rv64uzbb-p-xnor.bin", 601 }, + { "rv64uzbb-p-zext_h.bin", 277 }, + { "rv64uzbb-v-andn.bin", 14251 }, + { "rv64uzbb-v-clz.bin", 8063 }, + { "rv64uzbb-v-clzw.bin", 8050 }, + { "rv64uzbb-v-cpop.bin", 8063 }, + { "rv64uzbb-v-cpopw.bin", 8050 }, + { "rv64uzbb-v-ctz.bin", 8063 }, + { "rv64uzbb-v-ctzw.bin", 8051 }, + { "rv64uzbb-v-max.bin", 8296 }, + { "rv64uzbb-v-maxu.bin", 14190 }, + { "rv64uzbb-v-min.bin", 8292 }, + { "rv64uzbb-v-minu.bin", 8314 }, + { "rv64uzbb-v-orc_b.bin", 8087 }, + { "rv64uzbb-v-orn.bin", 14260 }, + { "rv64uzbb-v-rev8.bin", 8105 }, + { "rv64uzbb-v-rol.bin", 14242 }, + { "rv64uzbb-v-rolw.bin", 14241 }, + { "rv64uzbb-v-ror.bin", 14269 }, + { "rv64uzbb-v-rori.bin", 8117 }, + { "rv64uzbb-v-roriw.bin", 8075 }, + { "rv64uzbb-v-rorw.bin", 14201 }, + { "rv64uzbb-v-sext_b.bin", 8063 }, + { "rv64uzbb-v-sext_h.bin", 8066 }, + { "rv64uzbb-v-xnor.bin", 14259 }, + { "rv64uzbb-v-zext_h.bin", 8070 }, + { "rv64uzbc-p-clmul.bin", 500 }, + { "rv64uzbc-p-clmulh.bin", 505 }, + { "rv64uzbc-p-clmulr.bin", 503 }, + { "rv64uzbc-v-clmul.bin", 8292 }, + { "rv64uzbc-v-clmulh.bin", 8297 }, + { "rv64uzbc-v-clmulr.bin", 8295 }, + { "rv64uzbs-p-bclr.bin", 699 }, + { "rv64uzbs-p-bclri.bin", 354 }, + { "rv64uzbs-p-bext.bin", 634 }, + { "rv64uzbs-p-bexti.bin", 324 }, + { "rv64uzbs-p-binv.bin", 598 }, + { "rv64uzbs-p-binvi.bin", 319 }, + { "rv64uzbs-p-bset.bin", 701 }, + { "rv64uzbs-p-bseti.bin", 362 }, + { "rv64uzbs-v-bclr.bin", 14292 }, + { "rv64uzbs-v-bclri.bin", 8083 }, + { "rv64uzbs-v-bext.bin", 14227 }, + { "rv64uzbs-v-bexti.bin", 8053 }, + { "rv64uzbs-v-binv.bin", 14191 }, + { "rv64uzbs-v-binvi.bin", 8048 }, + { "rv64uzbs-v-bset.bin", 14294 }, + { "rv64uzbs-v-bseti.bin", 8091 }, + { "rv64uzfh-p-fadd.bin", 214 }, + { "rv64uzfh-p-fclass.bin", 151 }, + { "rv64uzfh-p-fcmp.bin", 156 }, + { "rv64uzfh-p-fcvt.bin", 184 }, + { "rv64uzfh-p-fcvt_w.bin", 554 }, + { "rv64uzfh-p-fdiv.bin", 175 }, + { "rv64uzfh-p-fmadd.bin", 240 }, + { "rv64uzfh-p-fmin.bin", 318 }, + { "rv64uzfh-p-ldst.bin", 114 }, + { "rv64uzfh-p-move.bin", 247 }, + { "rv64uzfh-p-recoding.bin", 117 }, + { "rv64uzfh-v-fadd.bin", 13869 }, + { "rv64uzfh-v-fclass.bin", 7941 }, + { "rv64uzfh-v-fcmp.bin", 13811 }, + { "rv64uzfh-v-fcvt.bin", 13839 }, + { "rv64uzfh-v-fcvt_w.bin", 20074 }, + { "rv64uzfh-v-fdiv.bin", 13830 }, + { "rv64uzfh-v-fmadd.bin", 13895 }, + { "rv64uzfh-v-fmin.bin", 13973 }, + { "rv64uzfh-v-ldst.bin", 13314 }, + { "rv64uzfh-v-move.bin", 8037 }, + { "rv64uzfh-v-recoding.bin", 13772 }, + + -- extensions that are built, but unsupported yet + -- { "rv64mi-p-pmpaddr.bin", 10000 }, + -- { "rv64mzicbo-p-zero.bin", 10000 }, + -- { "rv64ui-p-ma_data.bin", 10000 }, + -- { "rv64ui-v-ma_data.bin", 10000 }, + -- cartesi tests - { "ebreak.bin", 17 }, - { "pte_reserved_exception.bin", 30 }, - { "sd_pma_overflow.bin", 12 }, - { "xpie_exceptions.bin", 47 }, - { "dont_write_x0.bin", 64 }, - { "mcycle_write.bin", 14 }, - { "lrsc_semantics.bin", 31 }, + { "access.bin", 97 }, + { "amo.bin", 166 }, + { "clint_ops.bin", 133 }, + { "compressed.bin", 374 }, { "csr_counters.bin", 737 }, { "csr_semantics.bin", 378 }, - { "amo.bin", 162 }, - { "access.bin", 97 }, + { "dont_write_x0.bin", 64 }, + { "ebreak.bin", 17 }, + { "fbinary_d.bin", 204284 }, + { "fbinary_s.bin", 204284 }, + { "fclass.bin", 457 }, + { "fcmp.bin", 46787 }, + { "fcvt.bin", 17614 }, + { "fternary_d.bin", 216784 }, + { "fternary_s.bin", 216784 }, + { "funary.bin", 2834 }, + { "htif_invalid_ops.bin", 109 }, + { "illegal_insn.bin", 972 }, { "interrupts.bin", 8209 }, + { "lrsc_semantics.bin", 31 }, + { "mcycle_write.bin", 14 }, { "mtime_interrupt.bin", 16404 }, - { "illegal_insn.bin", 972 }, - { "version_check.bin", 26 }, - { "translate_vaddr.bin", 343 }, - { "htif_invalid_ops.bin", 109 }, - { "clint_ops.bin", 133 }, + { "pte_reserved_exception.bin", 30 }, + { "sd_pma_overflow.bin", 12 }, { "shadow_ops.bin", 78 }, - { "compressed.bin", 410 }, + { "translate_vaddr.bin", 343 }, + { "version_check.bin", 26 }, + { "xpie_exceptions.bin", 47 }, } local log_annotations = false @@ -660,13 +778,25 @@ local function fatal(fmt, ...) error(string.format(fmt, ...)) end local function check_and_print_result(machine, ctx) + local cycles = machine:read_reg("mcycle") + if machine:read_reg("iflags_H") ~= 1 then + fatal("%s: failed. terminated at mcycle = %d without halt\n", ctx.ram_image, cycles) + end + local halt_payload = machine:read_reg("htif_tohost_data") >> 1 local expected_halt_payload = ctx.expected_halt_payload or 0 if halt_payload ~= expected_halt_payload then - fatal("%s: failed. returned halt payload %d, expected %d\n", ctx.ram_image, halt_payload, expected_halt_payload) + fatal( + "%s: failed. halted at mcycle = %d with payload %d, expected %d\n", + ctx.ram_image, + cycles, + halt_payload, + expected_halt_payload + ) end - local cycles = machine:read_reg("mcycle") + -- print(string.format([[{ "%s", %d },]], ctx.ram_image, cycles)) + local expected_cycles = ctx.expected_cycles or 0 if cycles ~= expected_cycles then fatal("%s: failed. terminated with mcycle = %d, expected %d\n", ctx.ram_image, cycles, expected_cycles) diff --git a/tests/lua/spec-collect-hashes.lua b/tests/lua/spec-collect-hashes.lua index 586faa36c..6fcf3b4fb 100644 --- a/tests/lua/spec-collect-hashes.lua +++ b/tests/lua/spec-collect-hashes.lua @@ -204,7 +204,7 @@ describe("collect hashes", function() local create_machine = desc.create_machine describe(desc.name, function() it("should fail when collecting with invalid arguments", function() - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:run(1) expect.fail(function() machine:collect_mcycle_root_hashes(32, 32, 32) @@ -221,7 +221,7 @@ describe("collect hashes", function() end) it("should fail when collecting with incompatible back trees", function() - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) expect.fail(function() machine:collect_mcycle_root_hashes(32, 32, 0, 0, { log2_max_leaves = 1, @@ -244,8 +244,8 @@ describe("collect hashes", function() it("should fail when collecting with unsupported machines", function() local unrep_machine = - create_machine({ ram = { length = 4096 }, processor = { registers = { iunrep = 1 } } }) - local soft_machine = create_machine({ ram = { length = 4096 } }, { soft_yield = true }) + create_machine({ ram = { length = 0x10000 }, processor = { registers = { iunrep = 1 } } }) + local soft_machine = create_machine({ ram = { length = 0x10000 } }, { soft_yield = true }) expect.fail(function() unrep_machine:collect_mcycle_root_hashes(32, 32) end, "cannot collect hashes from unreproducible machines") @@ -265,7 +265,7 @@ describe("collect hashes", function() local mcycle_end = 1 local mcycle_period = 32 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:run(mcycle_start) local log2_bundle_mcycle_count = 0 expect.equal( @@ -312,7 +312,7 @@ describe("collect hashes", function() local mcycle_end = 4 local mcycle_period = 4 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:run(mcycle_start) local collected = machine:collect_mcycle_root_hashes(mcycle_end, mcycle_period, mcycle_phase) expect.equal(machine:read_reg("mcycle"), mcycle_end) @@ -322,7 +322,7 @@ describe("collect hashes", function() mcycle_phase = 0, }) - local machine_uarch = create_machine({ ram = { length = 4096 } }) + local machine_uarch = create_machine({ ram = { length = 0x10000 } }) machine_uarch:run(mcycle_start) local collected_uarch = machine_uarch:collect_uarch_cycle_root_hashes(mcycle_end) expect.equal(machine_uarch:read_reg("mcycle"), mcycle_end) @@ -340,13 +340,13 @@ describe("collect hashes", function() local mcycle_period = 4 local mcycle_start = mcycle_period local mcycle_phase = 1 - local compare_machine = cartesi.machine({ ram = { length = 4096 } }) + local compare_machine = cartesi.machine({ ram = { length = 0x10000 } }) compare_machine:run(mcycle_start) local expected_root_hash_period = compare_machine:get_root_hash() compare_machine:run(mcycle_end) local expected_root_hash_final = compare_machine:get_root_hash() - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:run(1) local collected = machine:collect_mcycle_root_hashes(mcycle_end, mcycle_period, mcycle_phase) expect.equal(machine:read_reg("mcycle"), mcycle_end) @@ -363,7 +363,7 @@ describe("collect hashes", function() local mcycle_end = 32 local mcycle_period = 32 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("iflags_H", 1) local expected_root_hash = machine:get_root_hash() @@ -388,7 +388,7 @@ describe("collect hashes", function() local mcycle_end = 32 local mcycle_period = 32 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("iflags_Y", 1) local expected_root_hash = machine:get_root_hash() @@ -412,7 +412,7 @@ describe("collect hashes", function() local mcycle_end = 0 local mcycle_period = 32 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("iflags_H", 1) local expected_root_hash = machine:get_root_hash() @@ -435,7 +435,7 @@ describe("collect hashes", function() local mcycle_end = 0 local mcycle_period = 32 local mcycle_phase = 1 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("iflags_Y", 1) local expected_root_hash = machine:get_root_hash() @@ -456,7 +456,7 @@ describe("collect hashes", function() it("should collect mcycles during mcycle overflow", function() local mcycle_period = 32 - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("mcycle", cartesi.MCYCLE_MAX - 1) local collected = machine:collect_mcycle_root_hashes( cartesi.MCYCLE_MAX, @@ -486,7 +486,7 @@ describe("collect hashes", function() end) it("should collect uarch cycles during mcycle overflows", function() - local machine = create_machine({ ram = { length = 4096 } }) + local machine = create_machine({ ram = { length = 0x10000 } }) machine:write_reg("mcycle", cartesi.MCYCLE_MAX - 1) local collected_uarch = machine:collect_uarch_cycle_root_hashes(cartesi.MCYCLE_MAX) expect.equal(collected_uarch.break_reason, cartesi.BREAK_REASON_REACHED_TARGET_MCYCLE) @@ -505,7 +505,7 @@ describe("collect hashes", function() local add_machine_config = { ram = { - length = 4096, -- non power of 2 on purpose to exercise address range boundaries + length = 0x10000, backing_store = { data_filename = tests_util.tests_path .. "rv64ui-p-add.bin", }, @@ -513,7 +513,7 @@ describe("collect hashes", function() } local yield_machine_config = { ram = { - length = 8191 * 4096, -- non power of 2 on purpose to exercise address range boundaries + length = 0x10000, backing_store = { data_filename = tests_util.tests_path .. "htif_yield.bin", }, @@ -814,7 +814,7 @@ describe("collect hashes", function() }, } local empty_machine_config = { - ram = { length = 4096 }, + ram = { length = 0x10000 }, hash_tree = { hash_function = hash_function, }, diff --git a/tests/machine/src/compressed.S b/tests/machine/src/compressed.S index e164c92d9..30b4a1f36 100644 --- a/tests/machine/src/compressed.S +++ b/tests/machine/src/compressed.S @@ -140,11 +140,6 @@ test_illegal_insns: // Invalid instruction (no bit set) TEST_RAWC_ILLEGAL_INSN(0x0) - // Reserved ranges - TEST_RAWC_ILLEGAL_INSN((0b100 << SHIFT_C_FUNCT3) | (0 << SHIFT_C_OPCODE)) - TEST_RAWC_ILLEGAL_INSN((0b100 << SHIFT_C_FUNCT3) | (0b111 << 10) | (0b10 << 5) | (1 << SHIFT_C_OPCODE)) - TEST_RAWC_ILLEGAL_INSN((0b100 << SHIFT_C_FUNCT3) | (0b111 << 10) | (0b11 << 5) | (1 << SHIFT_C_OPCODE)) - // Quadrant 0 TEST_RAWC_ILLEGAL_INSN((0 << SHIFT_C_FUNCT3) | (1 << SHIFT_C_Q0_RD) | (0 << SHIFT_C_OPCODE)) // C.ADDI4SPN with imm=0 diff --git a/tests/machine/src/illegal_insn.S b/tests/machine/src/illegal_insn.S index 4e6d36761..c84943527 100644 --- a/tests/machine/src/illegal_insn.S +++ b/tests/machine/src/illegal_insn.S @@ -72,7 +72,7 @@ _start: TEST_ILLEGAL_INSN((0b0110011 << SHIFT_OPCODE) | (0b111 << SHIFT_FUNCT3) | (0b0000010 << SHIFT_FUNCT7)) // SLLIW/SRLIW/SRAIW with invalid funct7 - TEST_ILLEGAL_INSN((0b0011011 << SHIFT_OPCODE) | (0b001 << SHIFT_FUNCT3) | (0b0000010 << SHIFT_FUNCT7)) + TEST_ILLEGAL_INSN((0b0011011 << SHIFT_OPCODE) | (0b001 << SHIFT_FUNCT3) | (0b0000110 << SHIFT_FUNCT7)) TEST_ILLEGAL_INSN((0b0011011 << SHIFT_OPCODE) | (0b101 << SHIFT_FUNCT3) | (0b0000010 << SHIFT_FUNCT7)) // ADDW/SUBW/SLLW/SRLW/SRAW/MULW/DIVW/DIVUW/REMW/REMUW with invalid funct7 diff --git a/third-party/riscv-tests b/third-party/riscv-tests index a64ad67b8..3d6b6aed5 160000 --- a/third-party/riscv-tests +++ b/third-party/riscv-tests @@ -1 +1 @@ -Subproject commit a64ad67b8235c681cd244b087ced36c4d5df3cb9 +Subproject commit 3d6b6aed5175e924ccf8ef6af734ac38c016c95b diff --git a/tools/gen-interpret-jump-table.lua b/tools/gen-interpret-jump-table.lua index eb88fd34c..93e5fee54 100755 --- a/tools/gen-interpret-jump-table.lua +++ b/tools/gen-interpret-jump-table.lua @@ -88,6 +88,61 @@ local insns = { { bits = "0000001__________110_____0111011", name = "REMW", rd0_special = true }, { bits = "0000001__________111_____0111011", name = "REMUW", rd0_special = true }, + -- Zba extension + { bits = "0000100__________000_____0111011", name = "ADD.UW", rd0_special = true }, + { bits = "0010000__________010_____0110011", name = "SH1ADD", rd0_special = true }, + { bits = "0010000__________010_____0111011", name = "SH1ADD.UW", rd0_special = true }, + { bits = "0010000__________100_____0110011", name = "SH2ADD", rd0_special = true }, + { bits = "0010000__________100_____0111011", name = "SH2ADD.UW", rd0_special = true }, + { bits = "0010000__________110_____0110011", name = "SH3ADD", rd0_special = true }, + { bits = "0010000__________110_____0111011", name = "SH3ADD.UW", rd0_special = true }, + { bits = "000010___________001_____0011011", name = "SLLI.UW", rd0_special = true }, + + -- Zbb extension + { bits = "0100000__________111_____0110011", name = "ANDN", rd0_special = true }, + { bits = "0100000__________110_____0110011", name = "ORN", rd0_special = true }, + { bits = "0100000__________100_____0110011", name = "XNOR", rd0_special = true }, + { bits = "011000000000_____001_____0010011", name = "CLZ", rd0_special = true }, + { bits = "011000000000_____001_____0011011", name = "CLZW", rd0_special = true }, + { bits = "011000000001_____001_____0010011", name = "CTZ", rd0_special = true }, + { bits = "011000000001_____001_____0011011", name = "CTZW", rd0_special = true }, + { bits = "011000000010_____001_____0010011", name = "CPOP", rd0_special = true }, + { bits = "011000000010_____001_____0011011", name = "CPOPW", rd0_special = true }, + { bits = "0000101__________110_____0110011", name = "MAX", rd0_special = true }, + { bits = "0000101__________111_____0110011", name = "MAXU", rd0_special = true }, + { bits = "0000101__________100_____0110011", name = "MIN", rd0_special = true }, + { bits = "0000101__________101_____0110011", name = "MINU", rd0_special = true }, + { bits = "011000000100_____001_____0010011", name = "SEXT.B", rd0_special = true }, + { bits = "011000000101_____001_____0010011", name = "SEXT.H", rd0_special = true }, + { bits = "000010000000_____100_____0111011", name = "ZEXT.H", rd0_special = true }, + { bits = "0110000__________001_____0110011", name = "ROL", rd0_special = true }, + { bits = "0110000__________001_____0111011", name = "ROLW", rd0_special = true }, + { bits = "0110000__________101_____0110011", name = "ROR", rd0_special = true }, + { bits = "011000___________101_____0010011", name = "RORI", rd0_special = true }, + { bits = "0110000__________101_____0011011", name = "RORIW", rd0_special = true }, + { bits = "0110000__________101_____0111011", name = "RORW", rd0_special = true }, + { bits = "001010000111_____101_____0010011", name = "ORC.B", rd0_special = true }, + { bits = "011010111000_____101_____0010011", name = "REV8", rd0_special = true }, + + -- Zbc extension + { bits = "0000101__________001_____0110011", name = "CLMUL", rd0_special = true }, + { bits = "0000101__________011_____0110011", name = "CLMULH", rd0_special = true }, + { bits = "0000101__________010_____0110011", name = "CLMULR", rd0_special = true }, + + -- Zbs extension + { bits = "0100100__________001_____0110011", name = "BCLR", rd0_special = true }, + { bits = "010010___________001_____0010011", name = "BCLRI", rd0_special = true }, + { bits = "0100100__________101_____0110011", name = "BEXT", rd0_special = true }, + { bits = "010010___________101_____0010011", name = "BEXTI", rd0_special = true }, + { bits = "0110100__________001_____0110011", name = "BINV", rd0_special = true }, + { bits = "011010___________001_____0010011", name = "BINVI", rd0_special = true }, + { bits = "0010100__________001_____0110011", name = "BSET", rd0_special = true }, + { bits = "001010___________001_____0010011", name = "BSETI", rd0_special = true }, + + -- Zicond + { bits = "0000111__________101_____0110011", name = "CZERO.EQZ", rd0_special = true }, + { bits = "0000111__________111_____0110011", name = "CZERO.NEZ", rd0_special = true }, + -- RV32A { bits = "00010__00000_____010_____0101111", name = "LR.W" }, { bits = "00011____________010_____0101111", name = "SC.W" }, @@ -183,6 +238,42 @@ local insns = { { bits = "110100100011_____________1010011", name = "FCVT.D.LU", rm = true }, { bits = "111100100000_____000_____1010011", name = "FMV.D.X" }, + -- Zfh + { bits = "_________________001_____0000111", name = "FLH" }, + { bits = "_________________001_____0100111", name = "FSH" }, + { bits = "_____10__________________1000011", name = "FMADD.H", rm = true }, + { bits = "_____10__________________1000111", name = "FMSUB.H", rm = true }, + { bits = "_____10__________________1001011", name = "FNMSUB.H", rm = true }, + { bits = "_____10__________________1001111", name = "FNMADD.H", rm = true }, + { bits = "0000010__________________1010011", name = "FADD.H", rm = true }, + { bits = "0000110__________________1010011", name = "FSUB.H", rm = true }, + { bits = "0001010__________________1010011", name = "FMUL.H", rm = true }, + { bits = "0001110__________________1010011", name = "FDIV.H", rm = true }, + { bits = "010111000000_____________1010011", name = "FSQRT.H", rm = true }, + { bits = "0010010__________000_____1010011", name = "FSGNJ.H" }, + { bits = "0010010__________001_____1010011", name = "FSGNJN.H" }, + { bits = "0010010__________010_____1010011", name = "FSGNJX.H" }, + { bits = "0010110__________000_____1010011", name = "FMIN.H" }, + { bits = "0010110__________001_____1010011", name = "FMAX.H" }, + { bits = "010000000010_____________1010011", name = "FCVT.S.H", rm = true }, + { bits = "010001000000_____________1010011", name = "FCVT.H.S", rm = true }, + { bits = "010000100010_____________1010011", name = "FCVT.D.H", rm = true }, + { bits = "010001000001_____________1010011", name = "FCVT.H.D", rm = true }, + { bits = "1010010__________010_____1010011", name = "FEQ.H" }, + { bits = "1010010__________001_____1010011", name = "FLT.H" }, + { bits = "1010010__________000_____1010011", name = "FLE.H" }, + { bits = "111001000000_____001_____1010011", name = "FCLASS.H" }, + { bits = "110001000000_____________1010011", name = "FCVT.W.H", rm = true }, + { bits = "110001000001_____________1010011", name = "FCVT.WU.H", rm = true }, + { bits = "111001000000_____000_____1010011", name = "FMV.X.H" }, + { bits = "110101000000_____________1010011", name = "FCVT.H.W", rm = true }, + { bits = "110101000001_____________1010011", name = "FCVT.H.WU", rm = true }, + { bits = "111101000000_____000_____1010011", name = "FMV.H.X" }, + { bits = "110001000010_____________1010011", name = "FCVT.L.H", rm = true }, + { bits = "110001000011_____________1010011", name = "FCVT.LU.H", rm = true }, + { bits = "110101000010_____________1010011", name = "FCVT.H.L", rm = true }, + { bits = "110101000011_____________1010011", name = "FCVT.H.LU", rm = true }, + -- Zifencei extension { bits = "_________________001_____0001111", name = "FENCE.I" }, @@ -244,6 +335,42 @@ do add_c_insn({ bits = "110" .. tobase2(mid, 11) .. "00", name = "C.SW" }) add_c_insn({ bits = "111" .. tobase2(mid, 11) .. "00", name = "C.SD" }) end + + -- Zcb quadrant 0 + for rs1 = 0, (1 << 3) - 1 do + for rs2 = 0, (1 << 3) - 1 do + for uimm = 0, (1 << 2) - 1 do + add_c_insn({ + bits = "100010" .. tobase2(rs1, 3) .. tobase2(uimm, 2) .. tobase2(rs2, 3) .. "00", + name = "C.SB", + }) + end + for uimm = 0, 1 do + add_c_insn({ + bits = "100011" .. tobase2(rs1, 3) .. "0" .. tobase2(uimm, 1) .. tobase2(rs2, 3) .. "00", + name = "C.SH", + }) + end + end + for rd = 0, (1 << 3) - 1 do + for uimm = 0, (1 << 2) - 1 do + add_c_insn({ + bits = "100000" .. tobase2(rs1, 3) .. tobase2(uimm, 2) .. tobase2(rd, 3) .. "00", + name = "C.LBU", + }) + end + for uimm = 0, 1 do + add_c_insn({ + bits = "100001" .. tobase2(rs1, 3) .. "0" .. tobase2(uimm, 1) .. tobase2(rd, 3) .. "00", + name = "C.LHU", + }) + add_c_insn({ + bits = "100001" .. tobase2(rs1, 3) .. "1" .. tobase2(uimm, 1) .. tobase2(rd, 3) .. "00", + name = "C.LH", + }) + end + end + end end do -- quadrant 1 @@ -318,6 +445,19 @@ do add_c_insn({ bits = "110" .. tobase2(mid, 11) .. "01", name = "C.BEQZ" }) add_c_insn({ bits = "111" .. tobase2(mid, 11) .. "01", name = "C.BNEZ" }) end + + -- Zcb quadrant 1 + for rs1 = 0, (1 << 3) - 1 do + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "000" .. "01", name = "C.ZEXT.B" }) + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "001" .. "01", name = "C.SEXT.B" }) + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "010" .. "01", name = "C.ZEXT.H" }) + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "011" .. "01", name = "C.SEXT.H" }) + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "100" .. "01", name = "C.ZEXT.W" }) + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "11" .. "101" .. "01", name = "C.NOT" }) + for rs2 = 0, (1 << 3) - 1 do + add_c_insn({ bits = "100111" .. tobase2(rs1, 3) .. "10" .. tobase2(rs2, 3) .. "01", name = "C.MUL" }) + end + end end do -- quadrant 2 @@ -399,23 +539,18 @@ end -- Table use to rename a group of instructions to a single name. local group_names = { - -- I - ["ADD|SUB|MUL"] = "ADD_MUL_SUB", - ["ADDW|SUBW|MULW"] = "ADDW_MULW_SUBW", - ["SRL|SRA|DIVU"] = "SRL_DIVU_SRA", - ["SRLW|SRAW|DIVUW"] = "SRLW_DIVUW_SRAW", -- A ["LR.W|SC.W|AMOSWAP.W|AMOADD.W|AMOXOR.W|AMOAND.W|AMOOR.W|AMOMIN.W|AMOMAX.W|AMOMINU.W|AMOMAXU.W"] = "AMO_W", ["LR.D|SC.D|AMOSWAP.D|AMOADD.D|AMOXOR.D|AMOAND.D|AMOOR.D|AMOMIN.D|AMOMAX.D|AMOMINU.D|AMOMAXU.D"] = "AMO_D", -- FD - ["FMADD.S|FMADD.D"] = "FMADD", - ["FMSUB.S|FMSUB.D"] = "FMSUB", - ["FNMADD.S|FNMADD.D"] = "FNMADD", - ["FNMSUB.S|FNMSUB.D"] = "FNMSUB", - ["FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FSGNJ.S|FMIN.S|FCVT.W.S|FCVT.WU.S|FMV.X.W|FLE.S|FCVT.S.W|FCVT.S.WU|FMV.W.X|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FSGNJ.D|FMIN.D|FCVT.S.D|FCVT.D.S|FLE.D|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FMV.X.D|FCVT.D.L|FCVT.D.LU|FMV.D.X"] = "FD", - ["FSGNJN.S|FMAX.S|FLT.S|FCLASS.S|FSGNJN.D|FMAX.D|FLT.D|FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU"] = "FD", - ["FSGNJX.S|FEQ.S|FSGNJX.D|FEQ.D|FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU"] = "FD", - ["FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU"] = "FD", + ["FMADD.S|FMADD.D|FMADD.H"] = "FMADD", + ["FMSUB.S|FMSUB.D|FMSUB.H"] = "FMSUB", + ["FNMADD.S|FNMADD.D|FNMADD.H"] = "FNMADD", + ["FNMSUB.S|FNMSUB.D|FNMSUB.H"] = "FNMSUB", + ["FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FSGNJ.S|FMIN.S|FCVT.W.S|FCVT.WU.S|FMV.X.W|FLE.S|FCVT.S.W|FCVT.S.WU|FMV.W.X|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FSGNJ.D|FMIN.D|FCVT.S.D|FCVT.D.S|FLE.D|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FMV.X.D|FCVT.D.L|FCVT.D.LU|FMV.D.X|FADD.H|FSUB.H|FMUL.H|FDIV.H|FSQRT.H|FSGNJ.H|FMIN.H|FCVT.S.H|FCVT.H.S|FCVT.D.H|FCVT.H.D|FLE.H|FCVT.W.H|FCVT.WU.H|FMV.X.H|FCVT.H.W|FCVT.H.WU|FMV.H.X|FCVT.L.H|FCVT.LU.H|FCVT.H.L|FCVT.H.LU"] = "FDZfh", + ["FSGNJN.S|FMAX.S|FLT.S|FCLASS.S|FSGNJN.D|FMAX.D|FLT.D|FSGNJN.H|FMAX.H|FLT.H|FCLASS.H|FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU|FADD.H|FSUB.H|FMUL.H|FDIV.H|FSQRT.H|FCVT.S.H|FCVT.H.S|FCVT.D.H|FCVT.H.D|FCVT.W.H|FCVT.WU.H|FCVT.H.W|FCVT.H.WU|FCVT.L.H|FCVT.LU.H|FCVT.H.L|FCVT.H.LU"] = "FDZfh", + ["FSGNJX.S|FEQ.S|FSGNJX.D|FEQ.D|FSGNJX.H|FEQ.H|FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU|FADD.H|FSUB.H|FMUL.H|FDIV.H|FSQRT.H|FCVT.S.H|FCVT.H.S|FCVT.D.H|FCVT.H.D|FCVT.W.H|FCVT.WU.H|FCVT.H.W|FCVT.H.WU|FCVT.L.H|FCVT.LU.H|FCVT.H.L|FCVT.H.LU"] = "FDZfh", + ["FADD.S|FSUB.S|FMUL.S|FDIV.S|FSQRT.S|FCVT.W.S|FCVT.WU.S|FCVT.S.W|FCVT.S.WU|FCVT.L.S|FCVT.LU.S|FCVT.S.L|FCVT.S.LU|FADD.D|FSUB.D|FMUL.D|FDIV.D|FSQRT.D|FCVT.S.D|FCVT.D.S|FCLASS.D|FCVT.W.D|FCVT.WU.D|FCVT.D.W|FCVT.D.WU|FCVT.L.D|FCVT.LU.D|FCVT.D.L|FCVT.D.LU|FADD.H|FSUB.H|FMUL.H|FDIV.H|FSQRT.H|FCVT.S.H|FCVT.H.S|FCVT.D.H|FCVT.H.D|FCVT.W.H|FCVT.WU.H|FCVT.H.W|FCVT.H.WU|FCVT.L.H|FCVT.LU.H|FCVT.H.L|FCVT.H.LU"] = "FDZfh", -- privileged ["ECALL|EBREAK|SRET|MRET|MNRET|WFI|SFENCE.VMA"] = "PRIVILEGED", ["SFENCE.VMA"] = "PRIVILEGED", @@ -480,7 +615,7 @@ for i = 0, ((1 << mask_bits) - 1) do table.insert(labels, { name = name, i = firstindex * 10 }) end end - assert(#name < 18, namekey) + assert(#name < 64, namekey) for rd = 0, 31 do local ename = name if rd0_special then