From 01f2f3f6ef4d076c0c10a8a7b42624416d56b523 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Sat, 19 Jun 2010 17:05:36 +0000 Subject: net: optimize Berkeley Packet Filter (BPF) processing Gcc is currenlty not in the ability to optimize the switch statement in sk_run_filter() because of dense case labels. This patch replace the OR'd labels with ordered sequenced case labels. The sk_chk_filter() function is modified to patch/replace the original OPCODES in a ordered but equivalent form. gcc is now in the ability to transform the switch statement in sk_run_filter into a jump table of complexity O(1). Until this patch gcc generates a sequence of conditional branches (O(n) of 567 byte .text segment size (arch x86_64): 7ff: 8b 06 mov (%rsi),%eax 801: 66 83 f8 35 cmp $0x35,%ax 805: 0f 84 d0 02 00 00 je adb 80b: 0f 87 07 01 00 00 ja 918 811: 66 83 f8 15 cmp $0x15,%ax 815: 0f 84 c5 02 00 00 je ae0 81b: 77 73 ja 890 81d: 66 83 f8 04 cmp $0x4,%ax 821: 0f 84 17 02 00 00 je a3e 827: 77 29 ja 852 829: 66 83 f8 01 cmp $0x1,%ax [...] With the modification the compiler translate the switch statement into the following jump table fragment: 7ff: 66 83 3e 2c cmpw $0x2c,(%rsi) 803: 0f 87 1f 02 00 00 ja a28 809: 0f b7 06 movzwl (%rsi),%eax 80c: ff 24 c5 00 00 00 00 jmpq *0x0(,%rax,8) 813: 44 89 e3 mov %r12d,%ebx 816: e9 43 03 00 00 jmpq b5e 81b: 41 89 dc mov %ebx,%r12d 81e: e9 3b 03 00 00 jmpq b5e Furthermore, I reordered the instructions to reduce cache line misses by order the most common instruction to the start. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- include/linux/filter.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include/linux/filter.h') diff --git a/include/linux/filter.h b/include/linux/filter.h index 151f5d703b7e..69b43dbea6c6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -91,6 +91,54 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define BPF_TAX 0x00 #define BPF_TXA 0x80 +enum { + BPF_S_RET_K = 0, + BPF_S_RET_A, + BPF_S_ALU_ADD_K, + BPF_S_ALU_ADD_X, + BPF_S_ALU_SUB_K, + BPF_S_ALU_SUB_X, + BPF_S_ALU_MUL_K, + BPF_S_ALU_MUL_X, + BPF_S_ALU_DIV_X, + BPF_S_ALU_AND_K, + BPF_S_ALU_AND_X, + BPF_S_ALU_OR_K, + BPF_S_ALU_OR_X, + BPF_S_ALU_LSH_K, + BPF_S_ALU_LSH_X, + BPF_S_ALU_RSH_K, + BPF_S_ALU_RSH_X, + BPF_S_ALU_NEG, + BPF_S_LD_W_ABS, + BPF_S_LD_H_ABS, + BPF_S_LD_B_ABS, + BPF_S_LD_W_LEN, + BPF_S_LD_W_IND, + BPF_S_LD_H_IND, + BPF_S_LD_B_IND, + BPF_S_LD_IMM, + BPF_S_LDX_W_LEN, + BPF_S_LDX_B_MSH, + BPF_S_LDX_IMM, + BPF_S_MISC_TAX, + BPF_S_MISC_TXA, + BPF_S_ALU_DIV_K, + BPF_S_LD_MEM, + BPF_S_LDX_MEM, + BPF_S_ST, + BPF_S_STX, + BPF_S_JMP_JA, + BPF_S_JMP_JEQ_K, + BPF_S_JMP_JEQ_X, + BPF_S_JMP_JGE_K, + BPF_S_JMP_JGE_X, + BPF_S_JMP_JGT_K, + BPF_S_JMP_JGT_X, + BPF_S_JMP_JSET_K, + BPF_S_JMP_JSET_X, +}; + #ifndef BPF_MAXINSNS #define BPF_MAXINSNS 4096 #endif -- cgit v1.2.3