disassembler.rs 18 KB


  1. // SPDX-License-Identifier: (Apache-2.0 OR MIT)
  2. // Copyright 2017 6WIND S.A. <quentin.monnet@6wind.com>
  3. //! Functions in this module are used to handle eBPF programs with a higher level representation,
  4. //! for example to disassemble the code into a human-readable format.
  5. use ebpf;
  6. #[inline]
  7. fn alu_imm_str(name: &str, insn: &ebpf::Insn) -> String {
  8. format!("{name} r{}, {:#x}", insn.dst, insn.imm)
  9. }
  10. #[inline]
  11. fn alu_reg_str(name: &str, insn: &ebpf::Insn) -> String {
  12. format!("{name} r{}, r{}", insn.dst, insn.src)
  13. }
  14. #[inline]
  15. fn byteswap_str(name: &str, insn: &ebpf::Insn) -> String {
  16. match insn.imm {
  17. 16 | 32 | 64 => {},
  18. _ => println!("[Disassembler] Warning: Invalid offset value for {name} insn")
  19. }
  20. format!("{name}{} r{}", insn.imm, insn.dst)
  21. }
  22. #[inline]
  23. fn ld_st_imm_str(name: &str, insn: &ebpf::Insn) -> String {
  24. if insn.off >= 0 {
  25. format!("{name} [r{}+{:#x}], {:#x}", insn.dst, insn.off, insn.imm)
  26. } else {
  27. format!("{name} [r{}-{:#x}], {:#x}", insn.dst, -insn.off, insn.imm)
  28. }
  29. }
  30. #[inline]
  31. fn ld_reg_str(name: &str, insn: &ebpf::Insn) -> String {
  32. if insn.off >= 0 {
  33. format!("{name} r{}, [r{}+{:#x}]", insn.dst, insn.src, insn.off)
  34. } else {
  35. format!("{name} r{}, [r{}-{:#x}]", insn.dst, insn.src, -insn.off)
  36. }
  37. }
  38. #[inline]
  39. fn st_reg_str(name: &str, insn: &ebpf::Insn) -> String {
  40. if insn.off >= 0 {
  41. format!("{name} [r{}+{:#x}], r{}", insn.dst, insn.off, insn.src)
  42. } else {
  43. format!("{name} [r{}-{:#x}], r{}", insn.dst, -insn.off, insn.src)
  44. }
  45. }
  46. #[inline]
  47. fn ldabs_str(name: &str, insn: &ebpf::Insn) -> String {
  48. format!("{name} {:#x}", insn.imm)
  49. }
  50. #[inline]
  51. fn ldind_str(name: &str, insn: &ebpf::Insn) -> String {
  52. format!("{name} r{}, {:#x}", insn.src, insn.imm)
  53. }
  54. #[inline]
  55. fn jmp_imm_str(name: &str, insn: &ebpf::Insn) -> String {
  56. if insn.off >= 0 {
  57. format!("{name} r{}, {:#x}, +{:#x}", insn.dst, insn.imm, insn.off)
  58. } else {
  59. format!("{name} r{}, {:#x}, -{:#x}", insn.dst, insn.imm, -insn.off)
  60. }
  61. }
  62. #[inline]
  63. fn jmp_reg_str(name: &str, insn: &ebpf::Insn) -> String {
  64. if insn.off >= 0 {
  65. format!("{name} r{}, r{}, +{:#x}", insn.dst, insn.src, insn.off)
  66. } else {
  67. format!("{name} r{}, r{}, -{:#x}", insn.dst, insn.src, -insn.off)
  68. }
  69. }
  70. /// High-level representation of an eBPF instruction.
  71. ///
  72. /// In addition to standard operation code and various operand, this struct has the following
  73. /// properties:
  74. ///
  75. /// * It stores a name, corresponding to a mnemonic for the operation code.
  76. /// * It also stores a description, which is a mnemonic for the full instruction, using the actual
  77. /// values of the relevant operands, and that can be used for disassembling the eBPF program for
  78. /// example.
  79. /// * Immediate values are stored in an `i64` instead of a traditional i32, in order to merge the
  80. /// two parts of (otherwise double-length) `LD_DW_IMM` instructions.
  81. ///
  82. /// See <https://www.kernel.org/doc/Documentation/networking/filter.txt> for the Linux kernel
  83. /// documentation about eBPF, or <https://github.com/iovisor/bpf-docs/blob/master/eBPF.md> for a
  84. /// more concise version.
  85. #[derive(Debug, PartialEq, Eq)]
  86. pub struct HLInsn {
  87. /// Operation code.
  88. pub opc: u8,
  89. /// Name (mnemonic). This name is not canon.
  90. pub name: String,
  91. /// Description of the instruction. This is not canon.
  92. pub desc: String,
  93. /// Destination register operand.
  94. pub dst: u8,
  95. /// Source register operand.
  96. pub src: u8,
  97. /// Offset operand.
  98. pub off: i16,
  99. /// Immediate value operand. For `LD_DW_IMM` instructions, contains the whole value merged from
  100. /// the two 8-bytes parts of the instruction.
  101. pub imm: i64,
  102. }
  103. /// Return a vector of `struct HLInsn` built from an eBPF program.
  104. ///
  105. /// This is made public to provide a way to manipulate a program as a vector of instructions, in a
  106. /// high-level format, for example for dumping the program instruction after instruction with a
  107. /// custom format.
  108. ///
  109. /// Note that the two parts of `LD_DW_IMM` instructions (that have the size of two standard
  110. /// instructions) are considered as making a single immediate value. As a consequence, the number
  111. /// of instructions stored in the vector may not be equal to the size in bytes of the program
  112. /// divided by the length of an instructions.
  113. ///
  114. /// To do so, the immediate value operand is stored as an `i64` instead as an i32, so be careful
  115. /// when you use it (see example `examples/to_json.rs`).
  116. ///
  117. /// This is to oppose to `ebpf::to_insn_vec()` function, that treats instructions on a low-level
  118. /// ground and do not merge the parts of `LD_DW_IMM`. Also, the version in `ebpf` module does not
  119. /// use names or descriptions when storing the instructions.
  120. ///
  121. /// # Examples
  122. ///
  123. /// ```
  124. /// use rbpf::disassembler;
  125. ///
  126. /// let prog = &[
  127. /// 0x18, 0x00, 0x00, 0x00, 0x88, 0x77, 0x66, 0x55,
  128. /// 0x00, 0x00, 0x00, 0x00, 0x44, 0x33, 0x22, 0x11,
  129. /// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  130. /// ];
  131. ///
  132. /// let v = disassembler::to_insn_vec(prog);
  133. /// assert_eq!(v, vec![
  134. /// disassembler::HLInsn {
  135. /// opc: 0x18,
  136. /// name: "lddw".to_string(),
  137. /// desc: "lddw r0, 0x1122334455667788".to_string(),
  138. /// dst: 0,
  139. /// src: 0,
  140. /// off: 0,
  141. /// imm: 0x1122334455667788
  142. /// },
  143. /// disassembler::HLInsn {
  144. /// opc: 0x95,
  145. /// name: "exit".to_string(),
  146. /// desc: "exit".to_string(),
  147. /// dst: 0,
  148. /// src: 0,
  149. /// off: 0,
  150. /// imm: 0
  151. /// },
  152. /// ]);
  153. /// ```
  154. pub fn to_insn_vec(prog: &[u8]) -> Vec<HLInsn> {
  155. if prog.len() % ebpf::INSN_SIZE != 0 {
  156. panic!("[Disassembler] Error: eBPF program length must be a multiple of {:?} octets",
  157. ebpf::INSN_SIZE);
  158. }
  159. if prog.is_empty() {
  160. return vec![];
  161. }
  162. let mut res = vec![];
  163. let mut insn_ptr:usize = 0;
  164. while insn_ptr * ebpf::INSN_SIZE < prog.len() {
  165. let insn = ebpf::get_insn(prog, insn_ptr);
  166. let name;
  167. let desc;
  168. let mut imm = insn.imm as i64;
  169. match insn.opc {
  170. // BPF_LD class
  171. ebpf::LD_ABS_B => { name = "ldabsb"; desc = ldabs_str(name, &insn); },
  172. ebpf::LD_ABS_H => { name = "ldabsh"; desc = ldabs_str(name, &insn); },
  173. ebpf::LD_ABS_W => { name = "ldabsw"; desc = ldabs_str(name, &insn); },
  174. ebpf::LD_ABS_DW => { name = "ldabsdw"; desc = ldabs_str(name, &insn); },
  175. ebpf::LD_IND_B => { name = "ldindb"; desc = ldind_str(name, &insn); },
  176. ebpf::LD_IND_H => { name = "ldindh"; desc = ldind_str(name, &insn); },
  177. ebpf::LD_IND_W => { name = "ldindw"; desc = ldind_str(name, &insn); },
  178. ebpf::LD_IND_DW => { name = "ldinddw"; desc = ldind_str(name, &insn); },
  179. ebpf::LD_DW_IMM => {
  180. insn_ptr += 1;
  181. let next_insn = ebpf::get_insn(prog, insn_ptr);
  182. imm = ((insn.imm as u32) as u64 + ((next_insn.imm as u64) << 32)) as i64;
  183. name = "lddw"; desc = format!("{name} r{:}, {imm:#x}", insn.dst);
  184. },
  185. // BPF_LDX class
  186. ebpf::LD_B_REG => { name = "ldxb"; desc = ld_reg_str(name, &insn); },
  187. ebpf::LD_H_REG => { name = "ldxh"; desc = ld_reg_str(name, &insn); },
  188. ebpf::LD_W_REG => { name = "ldxw"; desc = ld_reg_str(name, &insn); },
  189. ebpf::LD_DW_REG => { name = "ldxdw"; desc = ld_reg_str(name, &insn); },
  190. // BPF_ST class
  191. ebpf::ST_B_IMM => { name = "stb"; desc = ld_st_imm_str(name, &insn); },
  192. ebpf::ST_H_IMM => { name = "sth"; desc = ld_st_imm_str(name, &insn); },
  193. ebpf::ST_W_IMM => { name = "stw"; desc = ld_st_imm_str(name, &insn); },
  194. ebpf::ST_DW_IMM => { name = "stdw"; desc = ld_st_imm_str(name, &insn); },
  195. // BPF_STX class
  196. ebpf::ST_B_REG => { name = "stxb"; desc = st_reg_str(name, &insn); },
  197. ebpf::ST_H_REG => { name = "stxh"; desc = st_reg_str(name, &insn); },
  198. ebpf::ST_W_REG => { name = "stxw"; desc = st_reg_str(name, &insn); },
  199. ebpf::ST_DW_REG => { name = "stxdw"; desc = st_reg_str(name, &insn); },
  200. ebpf::ST_W_XADD => { name = "stxxaddw"; desc = st_reg_str(name, &insn); },
  201. ebpf::ST_DW_XADD => { name = "stxxadddw"; desc = st_reg_str(name, &insn); },
  202. // BPF_ALU class
  203. ebpf::ADD32_IMM => { name = "add32"; desc = alu_imm_str(name, &insn); },
  204. ebpf::ADD32_REG => { name = "add32"; desc = alu_reg_str(name, &insn); },
  205. ebpf::SUB32_IMM => { name = "sub32"; desc = alu_imm_str(name, &insn); },
  206. ebpf::SUB32_REG => { name = "sub32"; desc = alu_reg_str(name, &insn); },
  207. ebpf::MUL32_IMM => { name = "mul32"; desc = alu_imm_str(name, &insn); },
  208. ebpf::MUL32_REG => { name = "mul32"; desc = alu_reg_str(name, &insn); },
  209. ebpf::DIV32_IMM => { name = "div32"; desc = alu_imm_str(name, &insn); },
  210. ebpf::DIV32_REG => { name = "div32"; desc = alu_reg_str(name, &insn); },
  211. ebpf::OR32_IMM => { name = "or32"; desc = alu_imm_str(name, &insn); },
  212. ebpf::OR32_REG => { name = "or32"; desc = alu_reg_str(name, &insn); },
  213. ebpf::AND32_IMM => { name = "and32"; desc = alu_imm_str(name, &insn); },
  214. ebpf::AND32_REG => { name = "and32"; desc = alu_reg_str(name, &insn); },
  215. ebpf::LSH32_IMM => { name = "lsh32"; desc = alu_imm_str(name, &insn); },
  216. ebpf::LSH32_REG => { name = "lsh32"; desc = alu_reg_str(name, &insn); },
  217. ebpf::RSH32_IMM => { name = "rsh32"; desc = alu_imm_str(name, &insn); },
  218. ebpf::RSH32_REG => { name = "rsh32"; desc = alu_reg_str(name, &insn); },
  219. ebpf::NEG32 => { name = "neg32"; desc = format!("{name} r{:}", insn.dst); },
  220. ebpf::MOD32_IMM => { name = "mod32"; desc = alu_imm_str(name, &insn); },
  221. ebpf::MOD32_REG => { name = "mod32"; desc = alu_reg_str(name, &insn); },
  222. ebpf::XOR32_IMM => { name = "xor32"; desc = alu_imm_str(name, &insn); },
  223. ebpf::XOR32_REG => { name = "xor32"; desc = alu_reg_str(name, &insn); },
  224. ebpf::MOV32_IMM => { name = "mov32"; desc = alu_imm_str(name, &insn); },
  225. ebpf::MOV32_REG => { name = "mov32"; desc = alu_reg_str(name, &insn); },
  226. ebpf::ARSH32_IMM => { name = "arsh32"; desc = alu_imm_str(name, &insn); },
  227. ebpf::ARSH32_REG => { name = "arsh32"; desc = alu_reg_str(name, &insn); },
  228. ebpf::LE => { name = "le"; desc = byteswap_str(name, &insn); },
  229. ebpf::BE => { name = "be"; desc = byteswap_str(name, &insn); },
  230. // BPF_ALU64 class
  231. ebpf::ADD64_IMM => { name = "add64"; desc = alu_imm_str(name, &insn); },
  232. ebpf::ADD64_REG => { name = "add64"; desc = alu_reg_str(name, &insn); },
  233. ebpf::SUB64_IMM => { name = "sub64"; desc = alu_imm_str(name, &insn); },
  234. ebpf::SUB64_REG => { name = "sub64"; desc = alu_reg_str(name, &insn); },
  235. ebpf::MUL64_IMM => { name = "mul64"; desc = alu_imm_str(name, &insn); },
  236. ebpf::MUL64_REG => { name = "mul64"; desc = alu_reg_str(name, &insn); },
  237. ebpf::DIV64_IMM => { name = "div64"; desc = alu_imm_str(name, &insn); },
  238. ebpf::DIV64_REG => { name = "div64"; desc = alu_reg_str(name, &insn); },
  239. ebpf::OR64_IMM => { name = "or64"; desc = alu_imm_str(name, &insn); },
  240. ebpf::OR64_REG => { name = "or64"; desc = alu_reg_str(name, &insn); },
  241. ebpf::AND64_IMM => { name = "and64"; desc = alu_imm_str(name, &insn); },
  242. ebpf::AND64_REG => { name = "and64"; desc = alu_reg_str(name, &insn); },
  243. ebpf::LSH64_IMM => { name = "lsh64"; desc = alu_imm_str(name, &insn); },
  244. ebpf::LSH64_REG => { name = "lsh64"; desc = alu_reg_str(name, &insn); },
  245. ebpf::RSH64_IMM => { name = "rsh64"; desc = alu_imm_str(name, &insn); },
  246. ebpf::RSH64_REG => { name = "rsh64"; desc = alu_reg_str(name, &insn); },
  247. ebpf::NEG64 => { name = "neg64"; desc = format!("{name} r{:}", insn.dst); },
  248. ebpf::MOD64_IMM => { name = "mod64"; desc = alu_imm_str(name, &insn); },
  249. ebpf::MOD64_REG => { name = "mod64"; desc = alu_reg_str(name, &insn); },
  250. ebpf::XOR64_IMM => { name = "xor64"; desc = alu_imm_str(name, &insn); },
  251. ebpf::XOR64_REG => { name = "xor64"; desc = alu_reg_str(name, &insn); },
  252. ebpf::MOV64_IMM => { name = "mov64"; desc = alu_imm_str(name, &insn); },
  253. ebpf::MOV64_REG => { name = "mov64"; desc = alu_reg_str(name, &insn); },
  254. ebpf::ARSH64_IMM => { name = "arsh64"; desc = alu_imm_str(name, &insn); },
  255. ebpf::ARSH64_REG => { name = "arsh64"; desc = alu_reg_str(name, &insn); },
  256. // BPF_JMP class
  257. ebpf::JA => { name = "ja"; desc = if insn.off >= 0 { format!("{name} +{:#x}", insn.off) } else { format!("{name} -{:#x}", -insn.off) } },
  258. ebpf::JEQ_IMM => { name = "jeq"; desc = jmp_imm_str(name, &insn); },
  259. ebpf::JEQ_REG => { name = "jeq"; desc = jmp_reg_str(name, &insn); },
  260. ebpf::JGT_IMM => { name = "jgt"; desc = jmp_imm_str(name, &insn); },
  261. ebpf::JGT_REG => { name = "jgt"; desc = jmp_reg_str(name, &insn); },
  262. ebpf::JGE_IMM => { name = "jge"; desc = jmp_imm_str(name, &insn); },
  263. ebpf::JGE_REG => { name = "jge"; desc = jmp_reg_str(name, &insn); },
  264. ebpf::JLT_IMM => { name = "jlt"; desc = jmp_imm_str(name, &insn); },
  265. ebpf::JLT_REG => { name = "jlt"; desc = jmp_reg_str(name, &insn); },
  266. ebpf::JLE_IMM => { name = "jle"; desc = jmp_imm_str(name, &insn); },
  267. ebpf::JLE_REG => { name = "jle"; desc = jmp_reg_str(name, &insn); },
  268. ebpf::JSET_IMM => { name = "jset"; desc = jmp_imm_str(name, &insn); },
  269. ebpf::JSET_REG => { name = "jset"; desc = jmp_reg_str(name, &insn); },
  270. ebpf::JNE_IMM => { name = "jne"; desc = jmp_imm_str(name, &insn); },
  271. ebpf::JNE_REG => { name = "jne"; desc = jmp_reg_str(name, &insn); },
  272. ebpf::JSGT_IMM => { name = "jsgt"; desc = jmp_imm_str(name, &insn); },
  273. ebpf::JSGT_REG => { name = "jsgt"; desc = jmp_reg_str(name, &insn); },
  274. ebpf::JSGE_IMM => { name = "jsge"; desc = jmp_imm_str(name, &insn); },
  275. ebpf::JSGE_REG => { name = "jsge"; desc = jmp_reg_str(name, &insn); },
  276. ebpf::JSLT_IMM => { name = "jslt"; desc = jmp_imm_str(name, &insn); },
  277. ebpf::JSLT_REG => { name = "jslt"; desc = jmp_reg_str(name, &insn); },
  278. ebpf::JSLE_IMM => { name = "jsle"; desc = jmp_imm_str(name, &insn); },
  279. ebpf::JSLE_REG => { name = "jsle"; desc = jmp_reg_str(name, &insn); },
  280. ebpf::CALL => { name = "call"; desc = format!("{name} {:#x}", insn.imm); },
  281. ebpf::TAIL_CALL => { name = "tail_call"; desc = name.to_string(); },
  282. ebpf::EXIT => { name = "exit"; desc = name.to_string(); },
  283. // BPF_JMP32 class
  284. ebpf::JEQ_IMM32 => { name = "jeq32"; desc = jmp_imm_str(name, &insn); },
  285. ebpf::JEQ_REG32 => { name = "jeq32"; desc = jmp_reg_str(name, &insn); },
  286. ebpf::JGT_IMM32 => { name = "jgt32"; desc = jmp_imm_str(name, &insn); },
  287. ebpf::JGT_REG32 => { name = "jgt32"; desc = jmp_reg_str(name, &insn); },
  288. ebpf::JGE_IMM32 => { name = "jge32"; desc = jmp_imm_str(name, &insn); },
  289. ebpf::JGE_REG32 => { name = "jge32"; desc = jmp_reg_str(name, &insn); },
  290. ebpf::JLT_IMM32 => { name = "jlt32"; desc = jmp_imm_str(name, &insn); },
  291. ebpf::JLT_REG32 => { name = "jlt32"; desc = jmp_reg_str(name, &insn); },
  292. ebpf::JLE_IMM32 => { name = "jle32"; desc = jmp_imm_str(name, &insn); },
  293. ebpf::JLE_REG32 => { name = "jle32"; desc = jmp_reg_str(name, &insn); },
  294. ebpf::JSET_IMM32 => { name = "jset32"; desc = jmp_imm_str(name, &insn); },
  295. ebpf::JSET_REG32 => { name = "jset32"; desc = jmp_reg_str(name, &insn); },
  296. ebpf::JNE_IMM32 => { name = "jne32"; desc = jmp_imm_str(name, &insn); },
  297. ebpf::JNE_REG32 => { name = "jne32"; desc = jmp_reg_str(name, &insn); },
  298. ebpf::JSGT_IMM32 => { name = "jsgt32"; desc = jmp_imm_str(name, &insn); },
  299. ebpf::JSGT_REG32 => { name = "jsgt32"; desc = jmp_reg_str(name, &insn); },
  300. ebpf::JSGE_IMM32 => { name = "jsge32"; desc = jmp_imm_str(name, &insn); },
  301. ebpf::JSGE_REG32 => { name = "jsge32"; desc = jmp_reg_str(name, &insn); },
  302. ebpf::JSLT_IMM32 => { name = "jslt32"; desc = jmp_imm_str(name, &insn); },
  303. ebpf::JSLT_REG32 => { name = "jslt32"; desc = jmp_reg_str(name, &insn); },
  304. ebpf::JSLE_IMM32 => { name = "jsle32"; desc = jmp_imm_str(name, &insn); },
  305. ebpf::JSLE_REG32 => { name = "jsle32"; desc = jmp_reg_str(name, &insn); },
  306. _ => {
  307. panic!("[Disassembler] Error: unknown eBPF opcode {:#2x} (insn #{:?})",
  308. insn.opc, insn_ptr);
  309. },
  310. };
  311. let hl_insn = HLInsn {
  312. opc: insn.opc,
  313. name: name.to_string(),
  314. desc,
  315. dst: insn.dst,
  316. src: insn.src,
  317. off: insn.off,
  318. imm,
  319. };
  320. res.push(hl_insn);
  321. insn_ptr += 1;
  322. };
  323. res
  324. }
  325. /// Disassemble an eBPF program into human-readable instructions and prints it to standard output.
  326. ///
  327. /// The program is not checked for errors or inconsistencies.
  328. ///
  329. /// # Examples
  330. ///
  331. /// ```
  332. /// use rbpf::disassembler;
  333. /// let prog = &[
  334. /// 0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
  335. /// 0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
  336. /// 0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  337. /// 0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
  338. /// 0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
  339. /// 0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
  340. /// ];
  341. /// disassembler::disassemble(prog);
  342. /// # // "\nadd64 r1, 0x605\nmov64 r2, 0x32\nmov64 r1, r0\nbe16 r0\nneg64 r2\nexit"
  343. /// ```
  344. ///
  345. /// This will produce the following output:
  346. ///
  347. /// ```test
  348. /// add64 r1, 0x605
  349. /// mov64 r2, 0x32
  350. /// mov64 r1, r0
  351. /// be16 r0
  352. /// neg64 r2
  353. /// exit
  354. /// ```
  355. pub fn disassemble(prog: &[u8]) {
  356. for insn in to_insn_vec(prog) {
  357. println!("{}", insn.desc);
  358. }
  359. }