// Amnesia Security AES Implementation // Based on code (c) Jari Ruusu 2004 and (c) Dr. Brian Gladman 2001 // Modifications (c) Patrick Simmons 2010 //This is designed to work with loop-aes // void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f) // void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) // void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) #if defined(USE_UNDERLINE) # define aes_set_key _aes_set_key # define aes_encrypt _aes_encrypt # define aes_decrypt _aes_decrypt #endif #if !defined(ALIGN64BYTES) # define ALIGN64BYTES 64 #endif .file "aes-amnesia.S" .globl aes_set_key .globl aes_encrypt .globl aes_decrypt .section .rodata copyright: .ascii " \000" .ascii "Copyright (c) 2001, Dr Brian Gladman , Worcester, UK.\000" .ascii "All rights reserved.\000" .ascii " \000" .ascii "TERMS\000" .ascii " \000" .ascii " Redistribution and use in source and binary forms, with or without\000" .ascii " modification, are permitted subject to the following conditions:\000" .ascii " \000" .ascii " 1. Redistributions of source code must retain the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer.\000" .ascii " \000" .ascii " 2. Redistributions in binary form must reproduce the above copyright\000" .ascii " notice, this list of conditions and the following disclaimer in the\000" .ascii " documentation and/or other materials provided with the distribution.\000" .ascii " \000" .ascii " 3. The copyright holder's name must not be used to endorse or promote\000" .ascii " any products derived from this software without his specific prior\000" .ascii " written permission.\000" .ascii " \000" .ascii " This software is provided 'as is' with no express or implied warranties\000" .ascii " of correctness or fitness for purpose.\000" .ascii " \000" #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) // offsets in context structure #define nkey 0 // key length, size 4 #define nrnd 4 // number of rounds, size 4 #define ekey 8 // encryption key schedule base address, size 256 #define dkey 264 // decryption key schedule base address, size 256 //#define DEBUG_HARNESS #ifdef DEBUG_HARNESS .extern dummy_wrmsr .extern dummy_rdmsr harness_wrmsr: RET PUSHQ %RAX PUSHQ %RCX PUSHQ %RDX PUSHQ %RSI PUSHQ %RDI PUSHQ %R8 PUSHQ %R9 PUSHQ %R10 PUSHQ %R11 MOVL %ECX, %EDI MOVL %EDX, %ESI MOVL %EAX, %EDX CALL dummy_wrmsr POPQ %R11 POPQ %R10 POPQ %R9 POPQ %R8 POPQ %RDI POPQ %RSI POPQ %RDX POPQ %RCX POPQ %RAX RET harness_rdmsr: RET PUSHQ %RCX PUSHQ %RSI PUSHQ %RDI PUSHQ %R8 PUSHQ %R9 PUSHQ %R10 PUSHQ %R11 MOVL %ECX, %EDI CALL dummy_rdmsr MOVQ %RAX, %RDX SHRQ $32, %RDX ADDL $0, %EAX POPQ %R11 POPQ %R10 POPQ %R9 POPQ %R8 POPQ %RDI POPQ %RSI POPQ %RCX RET #endif // This macro performs a forward encryption cycle. It is entered with // the first previous round column values in I1E, I2E, I3E and I4E and // exits with the final values OU1, OU2, OU3 and OU4 registers. //Directly uses RDI, RSI, R8, and R13 #define fwd_rnd(p1,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \ movzbl I1B,%edi ;\ movzbl I2B,%esi ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ shrl $8,I3E ;\ shrl $8,I4E ;\ xorl p1(,%rdi,4),OU1 ;\ xorl p1(,%rsi,4),OU2 ;\ xorl p1(,%r8,4),OU3 ;\ xorl p1(,%r13,4),OU4 ;\ movzbl I2H,%esi ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ movzbl I1H,%edi ;\ shrl $8,I3E ;\ shrl $8,I4E ;\ xorl p1+tlen(,%rsi,4),OU1 ;\ xorl p1+tlen(,%r8,4),OU2 ;\ xorl p1+tlen(,%r13,4),OU3 ;\ xorl p1+tlen(,%rdi,4),OU4 ;\ shrl $16,I1E ;\ shrl $16,I2E ;\ movzbl I3B,%r8d ;\ movzbl I4B,%r13d ;\ movzbl I1B,%edi ;\ movzbl I2B,%esi ;\ xorl p1+2*tlen(,%r8,4),OU1 ;\ xorl p1+2*tlen(,%r13,4),OU2 ;\ xorl p1+2*tlen(,%rdi,4),OU3 ;\ xorl p1+2*tlen(,%rsi,4),OU4 ;\ shrl $8,I4E ;\ movzbl I1H,%edi ;\ movzbl I2H,%esi ;\ shrl $8,I3E ;\ xorl p1+3*tlen(,I4R,4),OU1 ;\ xorl p1+3*tlen(,%rdi,4),OU2 ;\ xorl p1+3*tlen(,%rsi,4),OU3 ;\ xorl p1+3*tlen(,I3R,4),OU4 // This macro performs an inverse encryption cycle. It is entered with // the first previous round column values in I1E, I2E, I3E and I4E and // exits with the final values OU1, OU2, OU3 and OU4 registers. //Directly uses RDI, RSI, R8, and R13 #define inv_rnd(p1,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \ movzbl I4B,%edi ;\ movzbl I3B,%esi ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ shrl $8,I2E ;\ shrl $8,I1E ;\ xorl p1(,%rdi,4),OU4 ;\ xorl p1(,%rsi,4),OU3 ;\ xorl p1(,%r8,4),OU2 ;\ xorl p1(,%r13,4),OU1 ;\ movzbl I3H,%esi ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ movzbl I4H,%edi ;\ shrl $8,I2E ;\ shrl $8,I1E ;\ xorl p1+tlen(,%rsi,4),OU4 ;\ xorl p1+tlen(,%r8,4),OU3 ;\ xorl p1+tlen(,%r13,4),OU2 ;\ xorl p1+tlen(,%rdi,4),OU1 ;\ shrl $16,I4E ;\ shrl $16,I3E ;\ movzbl I2B,%r8d ;\ movzbl I1B,%r13d ;\ movzbl I4B,%edi ;\ movzbl I3B,%esi ;\ xorl p1+2*tlen(,%r8,4),OU4 ;\ xorl p1+2*tlen(,%r13,4),OU3 ;\ xorl p1+2*tlen(,%rdi,4),OU2 ;\ xorl p1+2*tlen(,%rsi,4),OU1 ;\ shrl $8,I1E ;\ movzbl I4H,%edi ;\ movzbl I3H,%esi ;\ shrl $8,I2E ;\ xorl p1+3*tlen(,I1R,4),OU4 ;\ xorl p1+3*tlen(,%rdi,4),OU3 ;\ xorl p1+3*tlen(,%rsi,4),OU2 ;\ xorl p1+3*tlen(,I2R,4),OU1 //--------------- .bss .lcomm intel_or_amd, 4 .text .align ALIGN64BYTES #define mix_col(p1, r4d, r4l, r4h, t1d, t2d, t2r) \ movzbl %r4l,%t2d ;\ movl p1(,%t2r,4),%t1d ;\ movzbl %r4h,%t2d ;\ ror $16,%r4d ;\ xorl p1+tlen(,%t2r,4),%t1d ;\ movzbl %r4l,%t2d ;\ xorl p1+2*tlen(,%t2r,4),%t1d ;\ movzbl %r4h,%t2d ;\ xorl p1+3*tlen(,%t2r,4),%t1d // Key Schedule Macros #define ksc4(p1, r1d, r2d, r3d, r4d, r4l, r4h, t1d, t2d, t2r) \ rol $24,%r4d ;\ mix_col(aes_fl_tab, r4d, r4l, r4h, t1d, t2d, t2r) ;\ ror $8,%r4d ;\ xorl 4*p1+aes_rcon_tab,%t1d ;\ xorl %t1d,%r1d ;\ xorl %r1d,%r2d ;\ xorl %r2d,%r3d ;\ xorl %r3d,%r4d // This macro generates the previous forward round key given the current one // and the round number of the PREVIOUS round. // To use this for decryption, you still need to do the // inverse mix column operation. // T3D and T3B must be at least tier 2 registers. #define inv4(P1, R1D, R1B, R2D, R2B, R3D, R3L, R3H, R4D, R4L, R4H, T1D, T1B, T2D, T2B, T2R, T3D, T3B) \ MOVZBL %R4H, %T2D /*B1*/; \ MOVZBL %R3H, %T3D ; \ XORB %T3B, %T2B ; \ MOVL aes_fl_tab(,%T2R,4), %T1D ; \ XORL 4*P1+aes_rcon_tab, %T1D ; \ XORB %R1B, %T1B ; \ ROLL $8, %T1D ; \ ; \ ROLL $8, %R1D /*B4*/ ; \ MOVB %R4L, %T2B ; \ XORB %R3L, %T2B ; \ MOVL aes_fl_tab(,%T2R,4), %T2D ; \ XORB %R1B, %T2B ; \ ADDL %T2D, %T1D ; \ ROLL $8, %T1D ; \ ; \ ROLL $8, %R1D /*B3*/ ; \ ROLL $16, %R3D ; \ ROLL $16, %R4D ; \ MOVZBL %R4H, %T2D ; \ MOVZBL %R3H, %T3D ; \ XORB %T3B, %T2B ; \ MOVL aes_fl_tab(,%T2R,4), %T2D ; \ XORB %R1B, %T2B ; \ ADDL %T2D, %T1D ; \ ROLL $8, %T1D ; \ ; \ ROLL $8, %R1D /*B2*/ ; \ MOVB %R4L, %T2B ; \ XORB %R3L, %T2B ; \ MOVL aes_fl_tab(,%T2R,4), %T2D ; \ XORB %R1B, %T2B ; \ ADDL %T2D, %T1D ; \ ROLL $8, %T1D ; \ ; \ ROLL $8, %R1D ; \ ROLL $16, %R3D ; \ ROLL $16, %R4D ; \ /*Take care of the trailing three bytes*/ \ /*Must be in this order due to dependencies.*/ \ XORL %R3D, %R4D ; \ XORL %R2D, %R3D ; \ XORL %R1D, %R2D ; \ /*Overwrite high input word with high output word, and we're done.*/ \ MOVL %T1D, %R1D #define backup_key(r1, r2, r3, r4, r5, r6) \ MOVQ %r1, %r5 ;\ MOVQ %r3, %r6 ;\ SALQ $32, %r5 ;\ SALQ $32, %r6 ;\ ADDQ %r2, %r5 ;\ ADDQ %r4, %r6 #define restore_key(r1d, r1, r2d, r2, r3d, r4d, r5d, r6d) \ MOVL %r1d, %r4d ;\ MOVL %r2d, %r6d ;\ SHRQ $32, %r1 ;\ SHRQ $32, %r2 ;\ MOVL %r1d, %r3d ;\ MOVL %r2d, %r5d #ifdef DEBUG_HARNESS .globl test_ksc4 .globl test_inv4 //RDI,RSI,RDX,RCX,R8 test_ksc4: ksc4(0,EDI,ESI,EDX,ECX,CL,CH,R9D,EAX,RAX) MOVL %EDI, (%R8) MOVL %ESI, 4(%R8) MOVL %EDX, 8(%R8) MOVL %ECX, 12(%R8) RET test_inv4: PUSHQ %RBX inv4(0,EDI,DIL,ESI,SIL,EDX,DL,DH,ECX,CL,CH,R9D,R9B,EAX,AL,RAX,EBX,BL) POPQ %RBX MOVL %EDI, (%R8) MOVL %ESI, 4(%R8) MOVL %EDX, 8(%R8) MOVL %ECX, 12(%R8) RET #endif //Called with key in EBX EDX R14D R15D // plaintext in EAX ECX R10D R11D //Consumes plaintext and outputs ciphertext to EAX ECX R10D R11D //Uses temporary registers RDI, RSI, R8, R13, R9, R12 aes_encrypt_internal: //Simultaneous key expansion and encryption //Input plaintext: EAX ECX R10D R11D //Input key: EBX EDX R14D R15D (input key is OU1 ... OU4) //Registers used by fwd_rnd: RDI, RSI, R8, and R13 //Registers used by ksc4: (four registers of previous round key) // (two temporary registers) //XOR in first round key XORL %EBX, %EAX XORL %EDX, %ECX XORL %R14D, %R10D XORL %R15D, %R11D XCHG %EDX, %R15D ksc4(0, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D backup_key(RBX, RDX, R14, R15, R9, R12) fwd_rnd(aes_ft_tab,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) restore_key(R9D, R9, R12D, R12, EAX, R11D, R10D, ECX) ksc4(1, EAX, R11D, R10D, ECX, CL, CH, EDI, ESI, RSI) XCHG %ECX, %R11D backup_key(RAX, RCX, R10, R11, R9, R12) fwd_rnd(aes_ft_tab, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) restore_key(R9D, R9, R12D, R12, EBX, R15D, R14D, EDX) ksc4(2, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D backup_key(RBX, RDX, R14, R15, R9, R12) fwd_rnd(aes_ft_tab,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) restore_key(R9D, R9, R12D, R12, EAX, R11D, R10D, ECX) ksc4(3, EAX, R11D, R10D, ECX, CL, CH, EDI, ESI, RSI) XCHG %ECX, %R11D backup_key(RAX, RCX, R10, R11, R9, R12) fwd_rnd(aes_ft_tab, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) restore_key(R9D, R9, R12D, R12, EBX, R15D, R14D, EDX) ksc4(4, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D backup_key(RBX, RDX, R14, R15, R9, R12) fwd_rnd(aes_ft_tab,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) restore_key(R9D, R9, R12D, R12, EAX, R11D, R10D, ECX) ksc4(5, EAX, R11D, R10D, ECX, CL, CH, EDI, ESI, RSI) XCHG %ECX, %R11D backup_key(RAX, RCX, R10, R11, R9, R12) fwd_rnd(aes_ft_tab, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) restore_key(R9D, R9, R12D, R12, EBX, R15D, R14D, EDX) ksc4(6, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D backup_key(RBX, RDX, R14, R15, R9, R12) fwd_rnd(aes_ft_tab,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) restore_key(R9D, R9, R12D, R12, EAX, R11D, R10D, ECX) ksc4(7, EAX, R11D, R10D, ECX, CL, CH, EDI, ESI, RSI) XCHG %ECX, %R11D backup_key(RAX, RCX, R10, R11, R9, R12) fwd_rnd(aes_ft_tab, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) restore_key(R9D, R9, R12D, R12, EBX, R15D, R14D, EDX) ksc4(8, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D backup_key(RBX, RDX, R14, R15, R9, R12) fwd_rnd(aes_ft_tab,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d) restore_key(R9D, R9, R12D, R12, EAX, R11D, R10D, ECX) ksc4(9, EAX, R11D, R10D, ECX, CL, CH, EDI, ESI, RSI) XCHG %ECX, %R11D fwd_rnd(aes_fl_tab, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d) RET //Called with key in EBX EDX R14D R15D // ciphertext in EAX ECX R10D R11D //Consumes ciphertext and outputs plaintext to EAX ECX R10D R11D //Uses temporary registers RDI, RSI, R8, R13, R9, R12 aes_decrypt_internal: //Simultaneous key expansion and encryption //Input ciphertext: EAX ECX R10D R11D //Input key: EBX EDX R14D R15D (input key is OU1 ... OU4) //Registers used by inv_rnd: RDI, RSI, R8, and R13 //Registers used by inv4: (four registers of previous round key) // (two temporary registers) //Swap input registers XCHG %R11D, %EAX XCHG %R10D, %ECX XCHG %EBX, %R15D XCHG %EDX, %R14D //XOR in first round key XORL %R15D, %R11D XORL %R14D, %R10D XORL %EDX, %ECX XORL %EBX, %EAX inv4(9,R15D,R15B,R14D,R14B,EDX,DL,DH,EBX,BL,BH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R15, R14, RDX, RBX, R9, R12) XCHG %R15D, %EAX XCHG %R14D, %ECX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R15D, %EAX XCHG %R14D, %ECX inv_rnd(aes_it_tab,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) restore_key(R9D,R9,R12D,R12,R11D,R10D,ECX,EAX) inv4(8,R11D,R11B,R10D,R10B,ECX,CL,CH,EAX,AL,AH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R11,R10,RCX,RAX,R9,R12) XCHG %R11D, %EBX XCHG %R10D, %EDX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R11D, %EBX XCHG %R10D, %EDX inv_rnd(aes_it_tab,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) restore_key(R9D,R9,R12D,R12,R15D,R14D,EDX,EBX) inv4(7,R15D,R15B,R14D,R14B,EDX,DL,DH,EBX,BL,BH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R15, R14, RDX, RBX, R9, R12) XCHG %R15D, %EAX XCHG %R14D, %ECX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R15D, %EAX XCHG %R14D, %ECX inv_rnd(aes_it_tab,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) restore_key(R9D,R9,R12D,R12,R11D,R10D,ECX,EAX) inv4(6,R11D,R11B,R10D,R10B,ECX,CL,CH,EAX,AL,AH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R11,R10,RCX,RAX,R9,R12) XCHG %R11D, %EBX XCHG %R10D, %EDX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R11D, %EBX XCHG %R10D, %EDX inv_rnd(aes_it_tab,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) restore_key(R9D,R9,R12D,R12,R15D,R14D,EDX,EBX) inv4(5,R15D,R15B,R14D,R14B,EDX,DL,DH,EBX,BL,BH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R15, R14, RDX, RBX, R9, R12) XCHG %R15D, %EAX XCHG %R14D, %ECX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R15D, %EAX XCHG %R14D, %ECX inv_rnd(aes_it_tab,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) restore_key(R9D,R9,R12D,R12,R11D,R10D,ECX,EAX) inv4(4,R11D,R11B,R10D,R10B,ECX,CL,CH,EAX,AL,AH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R11,R10,RCX,RAX,R9,R12) XCHG %R11D, %EBX XCHG %R10D, %EDX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R11D, %EBX XCHG %R10D, %EDX inv_rnd(aes_it_tab,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) restore_key(R9D,R9,R12D,R12,R15D,R14D,EDX,EBX) inv4(3,R15D,R15B,R14D,R14B,EDX,DL,DH,EBX,BL,BH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R15, R14, RDX, RBX, R9, R12) XCHG %R15D, %EAX XCHG %R14D, %ECX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R15D, %EAX XCHG %R14D, %ECX inv_rnd(aes_it_tab,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) restore_key(R9D,R9,R12D,R12,R11D,R10D,ECX,EAX) inv4(2,R11D,R11B,R10D,R10B,ECX,CL,CH,EAX,AL,AH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R11,R10,RCX,RAX,R9,R12) XCHG %R11D, %EBX XCHG %R10D, %EDX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R11D, %EBX XCHG %R10D, %EDX inv_rnd(aes_it_tab,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) restore_key(R9D,R9,R12D,R12,R15D,R14D,EDX,EBX) inv4(1,R15D,R15B,R14D,R14B,EDX,DL,DH,EBX,BL,BH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) backup_key(R15, R14, RDX, RBX, R9, R12) XCHG %R15D, %EAX XCHG %R14D, %ECX mix_col(aes_im_tab,EAX,AL,AH,EDI,ESI,RSI) MOVL %EDI, %EAX mix_col(aes_im_tab,EBX,BL,BH,EDI,ESI,RSI) MOVL %EDI, %EBX mix_col(aes_im_tab,ECX,CL,CH,EDI,ESI,RSI) MOVL %EDI, %ECX mix_col(aes_im_tab,EDX,DL,DH,EDI,ESI,RSI) MOVL %EDI, %EDX XCHG %R15D, %EAX XCHG %R14D, %ECX inv_rnd(aes_it_tab,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx) restore_key(R9D,R9,R12D,R12,R11D,R10D,ECX,EAX) inv4(0,R11D,R11B,R10D,R10B,ECX,CL,CH,EAX,AL,AH,R8D,R8B,ESI,SIL,RSI,EDI,DIL) inv_rnd(aes_il_tab,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax) //Swap order of ciphertext XCHG %R11D, %EAX XCHG %R10D, %ECX RET aes_encrypt: PUSHQ %RBP LEAQ (aes_encrypt_internal), %RBP LEAQ ekey(%RDI), %RDI CALL aes_cipher POPQ %RBP RET aes_decrypt: PUSHQ %RBP LEAQ (aes_decrypt_internal), %RBP LEAQ dkey(%RDI), %RDI CALL aes_cipher POPQ %RBP RET //void aes_encrypt/aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[]) // rdi rsi rdx // in_blk and out_blk are both 16-byte buffers aes_cipher: //Push rFLAGS on stack PUSHFQ //Push all callee-saved registers except RBP (which was pushed by our caller and contains an argument we care about). Optimize this later. PUSHQ %RBX PUSHQ %R12 PUSHQ %R13 PUSHQ %R14 PUSHQ %R15 //Also push RSI and RDX since they contain arguments we care about PUSHQ %RDX PUSHQ %RSI //Disable interrupts. #ifndef DEBUG_HARNESS CLI #endif //Copy the secret XOR quantity into a register. //We need to switch on whether we are an Intel or AMD processor. MOVL (intel_or_amd), %R9D SUBL $1, %R9D JE use_intel //We are AMD: C0010004 C0010005 C0010006 C0010007 MOVL $0xC0010004, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %EBX MOVL $0xC0010005, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R12D MOVL $0xC0010006, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R14D MOVL $0xC0010007, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R15D JMP master_copied use_intel: //We are Intel: C1 C2 309 30A MOVL $0xC1, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %EBX MOVL $0xC2, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R12D MOVL $0x309, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R14D MOVL $0x30A, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R15D master_copied: //Shuffle master key into proper registers MOVL %R12D, %EDX //Load encrypted key value MOVL (%RDI), %EAX MOVL 4(%RDI), %ECX MOVL 8(%RDI), %R10D MOVL 12(%RDI), %R11D //Decrypt key (use encryption function because it's faster) CALL aes_encrypt_internal //Get our input plaintext pointer back off the stack POPQ %RSI //Shuffle decrypted key into appropriate registers MOVL %EAX, %EBX MOVL %ECX, %EDX MOVL %R10D, %R14D MOVL %R11D, %R15D //Read in plaintext MOVL (%RSI), %EAX MOVL 4(%RSI), %ECX MOVL 8(%RSI), %R10D MOVL 12(%RSI), %R11D //Do actual encryption or decryption CALL %RBP //Get our output ciphertext pointer back off the stack POPQ %RDX //Output ciphertext MOVL %EAX, (%RDX) MOVL %ECX, 4(%RDX) MOVL %R10D, 8(%RDX) MOVL %R11D, 12(%RDX) //Pop callee-save registers POPQ %R15 POPQ %R14 POPQ %R13 POPQ %R12 POPQ %RBX //Zero callee-save registers!!! We MUST do this before enabling interrupts, //or we risk leaking information! //Optimize this later. XORQ %RAX, %RAX XORQ %RCX, %RCX XORQ %RDX, %RDX XORQ %R8, %R8 XORQ %R9, %R9 XORQ %R10, %R10 XORQ %R11, %R11 XORQ %RDI, %RDI XORQ %RSI, %RSI //Pop the flags, which will (probably) reenable interrupts POPFQ //Return to caller RET .extern get_random_bytes // rdi = pointer to AES context // rsi = pointer to key bytes // rdx = key length, bytes or bits // rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt .align ALIGN64BYTES aes_set_key: //Push flags, disable interrupts PUSHFQ #ifndef DEBUG_HARNESS CLI #endif //Key-setting routine here //Return value of a function call stored in %RAX //Initial register pushes XORL %EAX, %EAX XORL %EDX, %EDX PUSHQ %RBX PUSHQ %R12 PUSHQ %R13 PUSHQ %R14 PUSHQ %R15 PUSHQ %RDI PUSHQ %RSI //See if we are the very first keyset method called MOVL (intel_or_amd), %EAX CMPL $0, %EAX JE create_master_key //Determine whether we are Intel or AMD //Key in registers: EBX EDX R14D R15D SUBL $1, %EAX JE load_intel //We are AMD: C0010004 C0010005 C0010006 C0010007 MOVL $0xC0010004, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %EBX MOVL $0xC0010005, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R8D MOVL $0xC0010006, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R14D MOVL $0xC0010007, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R15D MOVL %R8D, %EDX JMP set_key_vendor_independent load_intel: //We are Intel: C1 C2 309 30A MOVL $0xC1, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %EBX MOVL $0xC2, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R8D MOVL $0x309, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R14D MOVL $0x30A, %ECX #ifdef DEBUG_HARNESS CALL harness_rdmsr #else RDMSR #endif MOVL %EAX, %R15D MOVL %R8D, %EDX JMP set_key_vendor_independent create_master_key: //Use the cryptographically secure get_random_bytes to get 16 bytes of true random data SUBQ $16, %RSP MOVQ %RSP, %RDI MOVQ $16, %RSI CALL get_random_bytes MOVQ %RSP, %RDI ADDQ $16, %RSP //We need to get the CPU ID string XORL %EAX, %EAX CPUID RORL $8, %EBX SHRL $24, %EBX CMPL $0x47, %EBX JE set_intel //We are AMD: C0010004 C0010005 C0010006 C0010007 MOVL $2, intel_or_amd XORL %EDX, %EDX MOVL (%RDI), %EAX MOVL %EAX, %EBX MOVL $0, (%RDI) MOVL $0xC0010004, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 4(%RDI), %EAX MOVL %EAX, %R8D MOVL $0, 4(%RDI) MOVL $0xC0010005, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 8(%RDI), %EAX MOVL %EAX, %R14D MOVL $0, 8(%RDI) MOVL $0xC0010006, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 12(%RDI), %EAX MOVL %EAX, %R15D MOVL $0, 12(%RDI) MOVL $0xC0010007, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif MOVL %R8D, %EDX JMP set_key_vendor_independent set_intel: //We are Intel: C1 C2 309 30A MOVL $1, intel_or_amd XORL %EDX, %EDX MOVL (%RDI), %EAX MOVL %EAX, %EBX MOVL $0, (%RDI) MOVL $0xC1, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 4(%RDI), %EAX MOVL %EAX, %R8D MOVL $0, 4(%RDI) MOVL $0xC2, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 8(%RDI), %EAX MOVL %EAX, %R14D MOVL $0, 8(%RDI) MOVL $0x309, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif XORL %EDX, %EDX MOVL 12(%RDI), %EAX MOVL %EAX, %R15D MOVL $0, 12(%RDI) MOVL $0x30A, %ECX #ifdef DEBUG_HARNESS CALL harness_wrmsr #else WRMSR #endif MOVL %R8D, %EDX set_key_vendor_independent: //We now need to "decrypt" the context keys. //Run the master key through ksc4 XCHG %EDX, %R15D ksc4(0, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(1, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(2, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(3, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(4, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(5, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(6, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(7, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(8, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) ksc4(9, EBX, R15D, R14D, EDX, DL, DH, EDI, ESI, RSI) XCHG %EDX, %R15D //Back the 10th round master key up backup_key(RBX, RDX, R14, R15, R9, R12) MOVQ %R9, %MM0 MOVQ %R12, %MM1 //Read zero-round context key into appropriate registers POPQ %RSI MOVL (%RSI), %EAX MOVL 4(%RSI), %ECX MOVL 8(%RSI), %R10D MOVL 12(%RSI), %R11D //Call decryption routine PUSHQ %RSI CALL aes_decrypt_internal POPQ %RSI //Re-read zero-round context key into available registers MOVL (%RSI), %EBX MOVL 4(%RSI), %R15D MOVL 8(%RSI), %R14D MOVL 12(%RSI), %EDX //Write encrypted zero-round context key POPQ %RDI LEAQ ekey(%RDI), %RSI MOVL %EAX, (%RSI) MOVL %ECX, 4(%RSI) MOVL %R10D, 8(%RSI) MOVL %R11D, 12(%RSI) //Calculate pointer to 10th-round context key, push to stack LEAQ dkey(%RDI), %RSI PUSHQ %RSI //Calculate 10th-round context key ksc4(0,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(1,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(2,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(3,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(4,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(5,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(6,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(7,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(8,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) ksc4(9,EBX,R15D,R14D,EDX,DL,DH,EAX,ECX,RCX) //Move 10th-round context key into appropriate registers MOVL %EBX, %EAX MOVL %R15D, %ECX MOVL %R14D, %R10D MOVL %EDX, %R11D //Restore 10th round master key MOVQ %MM0, %R9 MOVQ %MM1, %R12 XORQ %RBX, %RBX MOVQ %RBX, %MM0 //kill the master key from MMX regs MOVQ %RBX, %MM1 //kill the master key from MMx regs restore_key(R9D,R9,R12D,R12,EBX,EDX,R14D,R15D) //Call encryption function CALL aes_decrypt_internal //Restore dkey pointer POPQ %RSI MOVL %EAX, (%RSI) MOVL %ECX, 4(%RSI) MOVL %R10D, 8(%RSI) MOVL %R11D, 12(%RSI) //Restore callee-save registers POPQ %R15 POPQ %R14 POPQ %R13 POPQ %R12 POPQ %RBX POPFQ RET // finite field multiplies by {02}, {04} and {08} #define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) #define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) #define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) // finite field multiplies required in table generation #define f3(x) (f2(x) ^ x) #define f9(x) (f8(x) ^ x) #define fb(x) (f8(x) ^ f2(x) ^ x) #define fd(x) (f8(x) ^ f4(x) ^ x) #define fe(x) (f8(x) ^ f4(x) ^ f2(x)) // These defines generate the forward table entries #define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x)) #define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x)) #define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x) #define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x) // These defines generate the inverse table entries #define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x)) #define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x)) #define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x)) #define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x)) // These defines generate entries for the last round tables #define w0(x) (x) #define w1(x) (x << 8) #define w2(x) (x << 16) #define w3(x) (x << 24) // macro to generate inverse mix column tables (needed for the key schedule) #define im_data0(p1) \ .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\ .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\ .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\ .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f) #define im_data1(p1) \ .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\ .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\ .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\ .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f) #define im_data2(p1) \ .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\ .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\ .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\ .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f) #define im_data3(p1) \ .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\ .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\ .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\ .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f) #define im_data4(p1) \ .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\ .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\ .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\ .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f) #define im_data5(p1) \ .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\ .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\ .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\ .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf) #define im_data6(p1) \ .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\ .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\ .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\ .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf) #define im_data7(p1) \ .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\ .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\ .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\ .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff) // S-box data - 256 entries #define sb_data0(p1) \ .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\ .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\ .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\ .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0) #define sb_data1(p1) \ .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\ .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\ .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\ .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75) #define sb_data2(p1) \ .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\ .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\ .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\ .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf) #define sb_data3(p1) \ .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\ .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\ .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\ .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2) #define sb_data4(p1) \ .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\ .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\ .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\ .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb) #define sb_data5(p1) \ .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\ .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\ .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\ .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08) #define sb_data6(p1) \ .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\ .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\ .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\ .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e) #define sb_data7(p1) \ .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\ .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\ .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\ .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16) // Inverse S-box data - 256 entries #define ib_data0(p1) \ .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\ .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\ .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\ .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb) #define ib_data1(p1) \ .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\ .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\ .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\ .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25) #define ib_data2(p1) \ .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\ .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\ .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\ .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84) #define ib_data3(p1) \ .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\ .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\ .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\ .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b) #define ib_data4(p1) \ .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\ .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\ .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\ .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e) #define ib_data5(p1) \ .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\ .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\ .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\ .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4) #define ib_data6(p1) \ .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\ .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\ .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\ .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef) #define ib_data7(p1) \ .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\ .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\ .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\ .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d) // The rcon_table (needed for the key schedule) // // Here is original Dr Brian Gladman's source code: // _rcon_tab: // %assign x 1 // %rep 29 // dd x // %assign x f2(x) // %endrep // // Here is precomputed output (it's more portable this way): .section .rodata .align ALIGN64BYTES aes_rcon_tab: .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80 .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4 .long 0xb3,0x7d,0xfa,0xef,0xc5 // The forward xor tables .align ALIGN64BYTES aes_ft_tab: sb_data0(u0) sb_data1(u0) sb_data2(u0) sb_data3(u0) sb_data4(u0) sb_data5(u0) sb_data6(u0) sb_data7(u0) sb_data0(u1) sb_data1(u1) sb_data2(u1) sb_data3(u1) sb_data4(u1) sb_data5(u1) sb_data6(u1) sb_data7(u1) sb_data0(u2) sb_data1(u2) sb_data2(u2) sb_data3(u2) sb_data4(u2) sb_data5(u2) sb_data6(u2) sb_data7(u2) sb_data0(u3) sb_data1(u3) sb_data2(u3) sb_data3(u3) sb_data4(u3) sb_data5(u3) sb_data6(u3) sb_data7(u3) .align ALIGN64BYTES aes_fl_tab: sb_data0(w0) sb_data1(w0) sb_data2(w0) sb_data3(w0) sb_data4(w0) sb_data5(w0) sb_data6(w0) sb_data7(w0) sb_data0(w1) sb_data1(w1) sb_data2(w1) sb_data3(w1) sb_data4(w1) sb_data5(w1) sb_data6(w1) sb_data7(w1) sb_data0(w2) sb_data1(w2) sb_data2(w2) sb_data3(w2) sb_data4(w2) sb_data5(w2) sb_data6(w2) sb_data7(w2) sb_data0(w3) sb_data1(w3) sb_data2(w3) sb_data3(w3) sb_data4(w3) sb_data5(w3) sb_data6(w3) sb_data7(w3) // The inverse xor tables .align ALIGN64BYTES aes_it_tab: ib_data0(v0) ib_data1(v0) ib_data2(v0) ib_data3(v0) ib_data4(v0) ib_data5(v0) ib_data6(v0) ib_data7(v0) ib_data0(v1) ib_data1(v1) ib_data2(v1) ib_data3(v1) ib_data4(v1) ib_data5(v1) ib_data6(v1) ib_data7(v1) ib_data0(v2) ib_data1(v2) ib_data2(v2) ib_data3(v2) ib_data4(v2) ib_data5(v2) ib_data6(v2) ib_data7(v2) ib_data0(v3) ib_data1(v3) ib_data2(v3) ib_data3(v3) ib_data4(v3) ib_data5(v3) ib_data6(v3) ib_data7(v3) .align ALIGN64BYTES aes_il_tab: ib_data0(w0) ib_data1(w0) ib_data2(w0) ib_data3(w0) ib_data4(w0) ib_data5(w0) ib_data6(w0) ib_data7(w0) ib_data0(w1) ib_data1(w1) ib_data2(w1) ib_data3(w1) ib_data4(w1) ib_data5(w1) ib_data6(w1) ib_data7(w1) ib_data0(w2) ib_data1(w2) ib_data2(w2) ib_data3(w2) ib_data4(w2) ib_data5(w2) ib_data6(w2) ib_data7(w2) ib_data0(w3) ib_data1(w3) ib_data2(w3) ib_data3(w3) ib_data4(w3) ib_data5(w3) ib_data6(w3) ib_data7(w3) // The inverse mix column tables .align ALIGN64BYTES aes_im_tab: im_data0(v0) im_data1(v0) im_data2(v0) im_data3(v0) im_data4(v0) im_data5(v0) im_data6(v0) im_data7(v0) im_data0(v1) im_data1(v1) im_data2(v1) im_data3(v1) im_data4(v1) im_data5(v1) im_data6(v1) im_data7(v1) im_data0(v2) im_data1(v2) im_data2(v2) im_data3(v2) im_data4(v2) im_data5(v2) im_data6(v2) im_data7(v2) im_data0(v3) im_data1(v3) im_data2(v3) im_data3(v3) im_data4(v3) im_data5(v3) im_data6(v3) im_data7(v3) #if defined(__ELF__) && defined(SECTION_NOTE_GNU_STACK) .section .note.GNU-stack,"",@progbits #endif