+++ /dev/null
-#! /usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from CRYPTOGAMs[1] and is included here using the option
-# in the license to distribute the code under the GPL. Therefore this program
-# is free software; you can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2 as published by the Free Software
-# Foundation.
-#
-# [1] https://www.openssl.org/~appro/cryptogams/
-
-# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-#       * Redistributions of source code must retain copyright notices,
-#         this list of conditions and the following disclaimer.
-#
-#       * Redistributions in binary form must reproduce the above
-#         copyright notice, this list of conditions and the following
-#         disclaimer in the documentation and/or other materials
-#         provided with the distribution.
-#
-#       * Neither the name of the CRYPTOGAMS nor the names of its
-#         copyright holder and contributors may be used to endorse or
-#         promote products derived from this software without specific
-#         prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-#              CBC en-/decrypt CTR     XTS
-# POWER8[le]   3.96/0.72       0.74    1.1
-# POWER8[be]   3.75/0.65       0.66    1.0
-
-$flavour = shift;
-
-if ($flavour =~ /64/) {
-       $SIZE_T =8;
-       $LRSAVE =2*$SIZE_T;
-       $STU    ="stdu";
-       $POP    ="ld";
-       $PUSH   ="std";
-       $UCMP   ="cmpld";
-       $SHL    ="sldi";
-} elsif ($flavour =~ /32/) {
-       $SIZE_T =4;
-       $LRSAVE =$SIZE_T;
-       $STU    ="stwu";
-       $POP    ="lwz";
-       $PUSH   ="stw";
-       $UCMP   ="cmplw";
-       $SHL    ="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_p8";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{    # Key setup procedures                                          #
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine       "any"
-
-.text
-
-.align 7
-rcon:
-.long  0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
-.long  0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
-.long  0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
-.long  0,0,0,0                                         ?asis
-Lconsts:
-       mflr    r0
-       bcl     20,31,\$+4
-       mflr    $ptr     #vvvvv "distance between . and rcon
-       addi    $ptr,$ptr,-0x48
-       mtlr    r0
-       blr
-       .long   0
-       .byte   0,12,0x14,0,0,0,0,0
-.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl .${prefix}_set_encrypt_key
-Lset_encrypt_key:
-       mflr            r11
-       $PUSH           r11,$LRSAVE($sp)
-
-       li              $ptr,-1
-       ${UCMP}i        $inp,0
-       beq-            Lenc_key_abort          # if ($inp==0) return -1;
-       ${UCMP}i        $out,0
-       beq-            Lenc_key_abort          # if ($out==0) return -1;
-       li              $ptr,-2
-       cmpwi           $bits,128
-       blt-            Lenc_key_abort
-       cmpwi           $bits,256
-       bgt-            Lenc_key_abort
-       andi.           r0,$bits,0x3f
-       bne-            Lenc_key_abort
-
-       lis             r0,0xfff0
-       mfspr           $vrsave,256
-       mtspr           256,r0
-
-       bl              Lconsts
-       mtlr            r11
-
-       neg             r9,$inp
-       lvx             $in0,0,$inp
-       addi            $inp,$inp,15            # 15 is not typo
-       lvsr            $key,0,r9               # borrow $key
-       li              r8,0x20
-       cmpwi           $bits,192
-       lvx             $in1,0,$inp
-       le?vspltisb     $mask,0x0f              # borrow $mask
-       lvx             $rcon,0,$ptr
-       le?vxor         $key,$key,$mask         # adjust for byte swap
-       lvx             $mask,r8,$ptr
-       addi            $ptr,$ptr,0x10
-       vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
-       li              $cnt,8
-       vxor            $zero,$zero,$zero
-       mtctr           $cnt
-
-       ?lvsr           $outperm,0,$out
-       vspltisb        $outmask,-1
-       lvx             $outhead,0,$out
-       ?vperm          $outmask,$zero,$outmask,$outperm
-
-       blt             Loop128
-       addi            $inp,$inp,8
-       beq             L192
-       addi            $inp,$inp,8
-       b               L256
-
-.align 4
-Loop128:
-       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vcipherlast     $key,$key,$rcon
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-        vadduwm        $rcon,$rcon,$rcon
-       vxor            $in0,$in0,$key
-       bdnz            Loop128
-
-       lvx             $rcon,0,$ptr            # last two round keys
-
-       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vcipherlast     $key,$key,$rcon
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-        vadduwm        $rcon,$rcon,$rcon
-       vxor            $in0,$in0,$key
-
-       vperm           $key,$in0,$in0,$mask    # rotate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vcipherlast     $key,$key,$rcon
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vxor            $in0,$in0,$key
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-        stvx           $stage,0,$out
-
-       addi            $inp,$out,15            # 15 is not typo
-       addi            $out,$out,0x50
-
-       li              $rounds,10
-       b               Ldone
-
-.align 4
-L192:
-       lvx             $tmp,0,$inp
-       li              $cnt,4
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-       vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
-       vspltisb        $key,8                  # borrow $key
-       mtctr           $cnt
-       vsububm         $mask,$mask,$key        # adjust the mask
-
-Loop192:
-       vperm           $key,$in1,$in1,$mask    # roate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-       vcipherlast     $key,$key,$rcon
-
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-
-        vsldoi         $stage,$zero,$in1,8
-       vspltw          $tmp,$in0,3
-       vxor            $tmp,$tmp,$in1
-       vsldoi          $in1,$zero,$in1,12      # >>32
-        vadduwm        $rcon,$rcon,$rcon
-       vxor            $in1,$in1,$tmp
-       vxor            $in0,$in0,$key
-       vxor            $in1,$in1,$key
-        vsldoi         $stage,$stage,$in0,8
-
-       vperm           $key,$in1,$in1,$mask    # rotate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-        vperm          $outtail,$stage,$stage,$outperm # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vcipherlast     $key,$key,$rcon
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-        vsldoi         $stage,$in0,$in1,8
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-        vperm          $outtail,$stage,$stage,$outperm # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-       vspltw          $tmp,$in0,3
-       vxor            $tmp,$tmp,$in1
-       vsldoi          $in1,$zero,$in1,12      # >>32
-        vadduwm        $rcon,$rcon,$rcon
-       vxor            $in1,$in1,$tmp
-       vxor            $in0,$in0,$key
-       vxor            $in1,$in1,$key
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-        stvx           $stage,0,$out
-        addi           $inp,$out,15            # 15 is not typo
-        addi           $out,$out,16
-       bdnz            Loop192
-
-       li              $rounds,12
-       addi            $out,$out,0x20
-       b               Ldone
-
-.align 4
-L256:
-       lvx             $tmp,0,$inp
-       li              $cnt,7
-       li              $rounds,14
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-       vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
-       mtctr           $cnt
-
-Loop256:
-       vperm           $key,$in1,$in1,$mask    # rotate-n-splat
-       vsldoi          $tmp,$zero,$in0,12      # >>32
-        vperm          $outtail,$in1,$in1,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-       vcipherlast     $key,$key,$rcon
-        stvx           $stage,0,$out
-        addi           $out,$out,16
-
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in0,$in0,$tmp
-        vadduwm        $rcon,$rcon,$rcon
-       vxor            $in0,$in0,$key
-        vperm          $outtail,$in0,$in0,$outperm     # rotate
-        vsel           $stage,$outhead,$outtail,$outmask
-        vmr            $outhead,$outtail
-        stvx           $stage,0,$out
-        addi           $inp,$out,15            # 15 is not typo
-        addi           $out,$out,16
-       bdz             Ldone
-
-       vspltw          $key,$in0,3             # just splat
-       vsldoi          $tmp,$zero,$in1,12      # >>32
-       vsbox           $key,$key
-
-       vxor            $in1,$in1,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in1,$in1,$tmp
-       vsldoi          $tmp,$zero,$tmp,12      # >>32
-       vxor            $in1,$in1,$tmp
-
-       vxor            $in1,$in1,$key
-       b               Loop256
-
-.align 4
-Ldone:
-       lvx             $in1,0,$inp             # redundant in aligned case
-       vsel            $in1,$outhead,$in1,$outmask
-       stvx            $in1,0,$inp
-       li              $ptr,0
-       mtspr           256,$vrsave
-       stw             $rounds,0($out)
-
-Lenc_key_abort:
-       mr              r3,$ptr
-       blr
-       .long           0
-       .byte           0,12,0x14,1,0,0,3,0
-       .long           0
-.size  .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl .${prefix}_set_decrypt_key
-       $STU            $sp,-$FRAME($sp)
-       mflr            r10
-       $PUSH           r10,$FRAME+$LRSAVE($sp)
-       bl              Lset_encrypt_key
-       mtlr            r10
-
-       cmpwi           r3,0
-       bne-            Ldec_key_abort
-
-       slwi            $cnt,$rounds,4
-       subi            $inp,$out,240           # first round key
-       srwi            $rounds,$rounds,1
-       add             $out,$inp,$cnt          # last round key
-       mtctr           $rounds
-
-Ldeckey:
-       lwz             r0, 0($inp)
-       lwz             r6, 4($inp)
-       lwz             r7, 8($inp)
-       lwz             r8, 12($inp)
-       addi            $inp,$inp,16
-       lwz             r9, 0($out)
-       lwz             r10,4($out)
-       lwz             r11,8($out)
-       lwz             r12,12($out)
-       stw             r0, 0($out)
-       stw             r6, 4($out)
-       stw             r7, 8($out)
-       stw             r8, 12($out)
-       subi            $out,$out,16
-       stw             r9, -16($inp)
-       stw             r10,-12($inp)
-       stw             r11,-8($inp)
-       stw             r12,-4($inp)
-       bdnz            Ldeckey
-
-       xor             r3,r3,r3                # return value
-Ldec_key_abort:
-       addi            $sp,$sp,$FRAME
-       blr
-       .long           0
-       .byte           0,12,4,1,0x80,0,3,0
-       .long           0
-.size  .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{    # Single block en- and decrypt procedures                       #
-sub gen_block () {
-my $dir = shift;
-my $n   = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl .${prefix}_${dir}crypt
-       lwz             $rounds,240($key)
-       lis             r0,0xfc00
-       mfspr           $vrsave,256
-       li              $idx,15                 # 15 is not typo
-       mtspr           256,r0
-
-       lvx             v0,0,$inp
-       neg             r11,$out
-       lvx             v1,$idx,$inp
-       lvsl            v2,0,$inp               # inpperm
-       le?vspltisb     v4,0x0f
-       ?lvsl           v3,0,r11                # outperm
-       le?vxor         v2,v2,v4
-       li              $idx,16
-       vperm           v0,v0,v1,v2             # align [and byte swap in LE]
-       lvx             v1,0,$key
-       ?lvsl           v5,0,$key               # keyperm
-       srwi            $rounds,$rounds,1
-       lvx             v2,$idx,$key
-       addi            $idx,$idx,16
-       subi            $rounds,$rounds,1
-       ?vperm          v1,v1,v2,v5             # align round key
-
-       vxor            v0,v0,v1
-       lvx             v1,$idx,$key
-       addi            $idx,$idx,16
-       mtctr           $rounds
-
-Loop_${dir}c:
-       ?vperm          v2,v2,v1,v5
-       v${n}cipher     v0,v0,v2
-       lvx             v2,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          v1,v1,v2,v5
-       v${n}cipher     v0,v0,v1
-       lvx             v1,$idx,$key
-       addi            $idx,$idx,16
-       bdnz            Loop_${dir}c
-
-       ?vperm          v2,v2,v1,v5
-       v${n}cipher     v0,v0,v2
-       lvx             v2,$idx,$key
-       ?vperm          v1,v1,v2,v5
-       v${n}cipherlast v0,v0,v1
-
-       vspltisb        v2,-1
-       vxor            v1,v1,v1
-       li              $idx,15                 # 15 is not typo
-       ?vperm          v2,v1,v2,v3             # outmask
-       le?vxor         v3,v3,v4
-       lvx             v1,0,$out               # outhead
-       vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
-       vsel            v1,v1,v0,v2
-       lvx             v4,$idx,$out
-       stvx            v1,0,$out
-       vsel            v0,v0,v4,v2
-       stvx            v0,$idx,$out
-
-       mtspr           256,$vrsave
-       blr
-       .long           0
-       .byte           0,12,0x14,0,0,0,3,0
-       .long           0
-.size  .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{    # CBC en- and decrypt procedures                                #
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=            map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
-                                               map("v$_",(4..10));
-$code.=<<___;
-.globl .${prefix}_cbc_encrypt
-       ${UCMP}i        $len,16
-       bltlr-
-
-       cmpwi           $enc,0                  # test direction
-       lis             r0,0xffe0
-       mfspr           $vrsave,256
-       mtspr           256,r0
-
-       li              $idx,15
-       vxor            $rndkey0,$rndkey0,$rndkey0
-       le?vspltisb     $tmp,0x0f
-
-       lvx             $ivec,0,$ivp            # load [unaligned] iv
-       lvsl            $inpperm,0,$ivp
-       lvx             $inptail,$idx,$ivp
-       le?vxor         $inpperm,$inpperm,$tmp
-       vperm           $ivec,$ivec,$inptail,$inpperm
-
-       neg             r11,$inp
-       ?lvsl           $keyperm,0,$key         # prepare for unaligned key
-       lwz             $rounds,240($key)
-
-       lvsr            $inpperm,0,r11          # prepare for unaligned load
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,15            # 15 is not typo
-       le?vxor         $inpperm,$inpperm,$tmp
-
-       ?lvsr           $outperm,0,$out         # prepare for unaligned store
-       vspltisb        $outmask,-1
-       lvx             $outhead,0,$out
-       ?vperm          $outmask,$rndkey0,$outmask,$outperm
-       le?vxor         $outperm,$outperm,$tmp
-
-       srwi            $rounds,$rounds,1
-       li              $idx,16
-       subi            $rounds,$rounds,1
-       beq             Lcbc_dec
-
-Lcbc_enc:
-       vmr             $inout,$inptail
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-       mtctr           $rounds
-       subi            $len,$len,16            # len-=16
-
-       lvx             $rndkey0,0,$key
-        vperm          $inout,$inout,$inptail,$inpperm
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-       vxor            $inout,$inout,$ivec
-
-Loop_cbc_enc:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipher         $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-       bdnz            Loop_cbc_enc
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-       li              $idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipherlast     $ivec,$inout,$rndkey0
-       ${UCMP}i        $len,16
-
-       vperm           $tmp,$ivec,$ivec,$outperm
-       vsel            $inout,$outhead,$tmp,$outmask
-       vmr             $outhead,$tmp
-       stvx            $inout,0,$out
-       addi            $out,$out,16
-       bge             Lcbc_enc
-
-       b               Lcbc_done
-
-.align 4
-Lcbc_dec:
-       ${UCMP}i        $len,128
-       bge             _aesp8_cbc_decrypt8x
-       vmr             $tmp,$inptail
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-       mtctr           $rounds
-       subi            $len,$len,16            # len-=16
-
-       lvx             $rndkey0,0,$key
-        vperm          $tmp,$tmp,$inptail,$inpperm
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$tmp,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-
-Loop_cbc_dec:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vncipher        $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-       bdnz            Loop_cbc_dec
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-       li              $idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vncipherlast    $inout,$inout,$rndkey0
-       ${UCMP}i        $len,16
-
-       vxor            $inout,$inout,$ivec
-       vmr             $ivec,$tmp
-       vperm           $tmp,$inout,$inout,$outperm
-       vsel            $inout,$outhead,$tmp,$outmask
-       vmr             $outhead,$tmp
-       stvx            $inout,0,$out
-       addi            $out,$out,16
-       bge             Lcbc_dec
-
-Lcbc_done:
-       addi            $out,$out,-1
-       lvx             $inout,0,$out           # redundant in aligned case
-       vsel            $inout,$outhead,$inout,$outmask
-       stvx            $inout,0,$out
-
-       neg             $enc,$ivp               # write [unaligned] iv
-       li              $idx,15                 # 15 is not typo
-       vxor            $rndkey0,$rndkey0,$rndkey0
-       vspltisb        $outmask,-1
-       le?vspltisb     $tmp,0x0f
-       ?lvsl           $outperm,0,$enc
-       ?vperm          $outmask,$rndkey0,$outmask,$outperm
-       le?vxor         $outperm,$outperm,$tmp
-       lvx             $outhead,0,$ivp
-       vperm           $ivec,$ivec,$ivec,$outperm
-       vsel            $inout,$outhead,$ivec,$outmask
-       lvx             $inptail,$idx,$ivp
-       stvx            $inout,0,$ivp
-       vsel            $inout,$ivec,$inptail,$outmask
-       stvx            $inout,$idx,$ivp
-
-       mtspr           256,$vrsave
-       blr
-       .long           0
-       .byte           0,12,0x14,0,0,0,6,0
-       .long           0
-___
-#########################################################################
-{{     # Optimized CBC decrypt procedure                               #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
-                       # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);        # aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align 5
-_aesp8_cbc_decrypt8x:
-       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-       li              r10,`$FRAME+8*16+15`
-       li              r11,`$FRAME+8*16+31`
-       stvx            v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       stvx            v21,r11,$sp
-       addi            r11,r11,32
-       stvx            v22,r10,$sp
-       addi            r10,r10,32
-       stvx            v23,r11,$sp
-       addi            r11,r11,32
-       stvx            v24,r10,$sp
-       addi            r10,r10,32
-       stvx            v25,r11,$sp
-       addi            r11,r11,32
-       stvx            v26,r10,$sp
-       addi            r10,r10,32
-       stvx            v27,r11,$sp
-       addi            r11,r11,32
-       stvx            v28,r10,$sp
-       addi            r10,r10,32
-       stvx            v29,r11,$sp
-       addi            r11,r11,32
-       stvx            v30,r10,$sp
-       stvx            v31,r11,$sp
-       li              r0,-1
-       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
-       li              $x10,0x10
-       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       li              $x20,0x20
-       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       li              $x30,0x30
-       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       li              $x40,0x40
-       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       li              $x50,0x50
-       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       li              $x60,0x60
-       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       li              $x70,0x70
-       mtspr           256,r0
-
-       subi            $rounds,$rounds,3       # -4 in total
-       subi            $len,$len,128           # bias
-
-       lvx             $rndkey0,$x00,$key      # load key schedule
-       lvx             v30,$x10,$key
-       addi            $key,$key,0x20
-       lvx             v31,$x00,$key
-       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
-       addi            $key_,$sp,$FRAME+15
-       mtctr           $rounds
-
-Load_cbc_dec_key:
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v30,$x10,$key
-       addi            $key,$key,0x20
-       stvx            v24,$x00,$key_          # off-load round[1]
-       ?vperm          v25,v31,v30,$keyperm
-       lvx             v31,$x00,$key
-       stvx            v25,$x10,$key_          # off-load round[2]
-       addi            $key_,$key_,0x20
-       bdnz            Load_cbc_dec_key
-
-       lvx             v26,$x10,$key
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v27,$x20,$key
-       stvx            v24,$x00,$key_          # off-load round[3]
-       ?vperm          v25,v31,v26,$keyperm
-       lvx             v28,$x30,$key
-       stvx            v25,$x10,$key_          # off-load round[4]
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       ?vperm          v26,v26,v27,$keyperm
-       lvx             v29,$x40,$key
-       ?vperm          v27,v27,v28,$keyperm
-       lvx             v30,$x50,$key
-       ?vperm          v28,v28,v29,$keyperm
-       lvx             v31,$x60,$key
-       ?vperm          v29,v29,v30,$keyperm
-       lvx             $out0,$x70,$key         # borrow $out0
-       ?vperm          v30,v30,v31,$keyperm
-       lvx             v24,$x00,$key_          # pre-load round[1]
-       ?vperm          v31,v31,$out0,$keyperm
-       lvx             v25,$x10,$key_          # pre-load round[2]
-
-       #lvx            $inptail,0,$inp         # "caller" already did this
-       #addi           $inp,$inp,15            # 15 is not typo
-       subi            $inp,$inp,15            # undo "caller"
-
-        le?li          $idx,8
-       lvx_u           $in0,$x00,$inp          # load first 8 "words"
-        le?lvsl        $inpperm,0,$idx
-        le?vspltisb    $tmp,0x0f
-       lvx_u           $in1,$x10,$inp
-        le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
-       lvx_u           $in2,$x20,$inp
-        le?vperm       $in0,$in0,$in0,$inpperm
-       lvx_u           $in3,$x30,$inp
-        le?vperm       $in1,$in1,$in1,$inpperm
-       lvx_u           $in4,$x40,$inp
-        le?vperm       $in2,$in2,$in2,$inpperm
-       vxor            $out0,$in0,$rndkey0
-       lvx_u           $in5,$x50,$inp
-        le?vperm       $in3,$in3,$in3,$inpperm
-       vxor            $out1,$in1,$rndkey0
-       lvx_u           $in6,$x60,$inp
-        le?vperm       $in4,$in4,$in4,$inpperm
-       vxor            $out2,$in2,$rndkey0
-       lvx_u           $in7,$x70,$inp
-       addi            $inp,$inp,0x80
-        le?vperm       $in5,$in5,$in5,$inpperm
-       vxor            $out3,$in3,$rndkey0
-        le?vperm       $in6,$in6,$in6,$inpperm
-       vxor            $out4,$in4,$rndkey0
-        le?vperm       $in7,$in7,$in7,$inpperm
-       vxor            $out5,$in5,$rndkey0
-       vxor            $out6,$in6,$rndkey0
-       vxor            $out7,$in7,$rndkey0
-
-       mtctr           $rounds
-       b               Loop_cbc_dec8x
-.align 5
-Loop_cbc_dec8x:
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-       vncipher        $out6,$out6,v24
-       vncipher        $out7,$out7,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-       vncipher        $out6,$out6,v25
-       vncipher        $out7,$out7,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_cbc_dec8x
-
-       subic           $len,$len,128           # $len-=128
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-       vncipher        $out6,$out6,v24
-       vncipher        $out7,$out7,v24
-
-       subfe.          r0,r0,r0                # borrow?-1:0
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-       vncipher        $out6,$out6,v25
-       vncipher        $out7,$out7,v25
-
-       and             r0,r0,$len
-       vncipher        $out0,$out0,v26
-       vncipher        $out1,$out1,v26
-       vncipher        $out2,$out2,v26
-       vncipher        $out3,$out3,v26
-       vncipher        $out4,$out4,v26
-       vncipher        $out5,$out5,v26
-       vncipher        $out6,$out6,v26
-       vncipher        $out7,$out7,v26
-
-       add             $inp,$inp,r0            # $inp is adjusted in such
-                                               # way that at exit from the
-                                               # loop inX-in7 are loaded
-                                               # with last "words"
-       vncipher        $out0,$out0,v27
-       vncipher        $out1,$out1,v27
-       vncipher        $out2,$out2,v27
-       vncipher        $out3,$out3,v27
-       vncipher        $out4,$out4,v27
-       vncipher        $out5,$out5,v27
-       vncipher        $out6,$out6,v27
-       vncipher        $out7,$out7,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vncipher        $out0,$out0,v28
-       vncipher        $out1,$out1,v28
-       vncipher        $out2,$out2,v28
-       vncipher        $out3,$out3,v28
-       vncipher        $out4,$out4,v28
-       vncipher        $out5,$out5,v28
-       vncipher        $out6,$out6,v28
-       vncipher        $out7,$out7,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-
-       vncipher        $out0,$out0,v29
-       vncipher        $out1,$out1,v29
-       vncipher        $out2,$out2,v29
-       vncipher        $out3,$out3,v29
-       vncipher        $out4,$out4,v29
-       vncipher        $out5,$out5,v29
-       vncipher        $out6,$out6,v29
-       vncipher        $out7,$out7,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-
-       vncipher        $out0,$out0,v30
-        vxor           $ivec,$ivec,v31         # xor with last round key
-       vncipher        $out1,$out1,v30
-        vxor           $in0,$in0,v31
-       vncipher        $out2,$out2,v30
-        vxor           $in1,$in1,v31
-       vncipher        $out3,$out3,v30
-        vxor           $in2,$in2,v31
-       vncipher        $out4,$out4,v30
-        vxor           $in3,$in3,v31
-       vncipher        $out5,$out5,v30
-        vxor           $in4,$in4,v31
-       vncipher        $out6,$out6,v30
-        vxor           $in5,$in5,v31
-       vncipher        $out7,$out7,v30
-        vxor           $in6,$in6,v31
-
-       vncipherlast    $out0,$out0,$ivec
-       vncipherlast    $out1,$out1,$in0
-        lvx_u          $in0,$x00,$inp          # load next input block
-       vncipherlast    $out2,$out2,$in1
-        lvx_u          $in1,$x10,$inp
-       vncipherlast    $out3,$out3,$in2
-        le?vperm       $in0,$in0,$in0,$inpperm
-        lvx_u          $in2,$x20,$inp
-       vncipherlast    $out4,$out4,$in3
-        le?vperm       $in1,$in1,$in1,$inpperm
-        lvx_u          $in3,$x30,$inp
-       vncipherlast    $out5,$out5,$in4
-        le?vperm       $in2,$in2,$in2,$inpperm
-        lvx_u          $in4,$x40,$inp
-       vncipherlast    $out6,$out6,$in5
-        le?vperm       $in3,$in3,$in3,$inpperm
-        lvx_u          $in5,$x50,$inp
-       vncipherlast    $out7,$out7,$in6
-        le?vperm       $in4,$in4,$in4,$inpperm
-        lvx_u          $in6,$x60,$inp
-       vmr             $ivec,$in7
-        le?vperm       $in5,$in5,$in5,$inpperm
-        lvx_u          $in7,$x70,$inp
-        addi           $inp,$inp,0x80
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-        le?vperm       $in6,$in6,$in6,$inpperm
-        vxor           $out0,$in0,$rndkey0
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-        le?vperm       $in7,$in7,$in7,$inpperm
-        vxor           $out1,$in1,$rndkey0
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-        vxor           $out2,$in2,$rndkey0
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x30,$out
-        vxor           $out3,$in3,$rndkey0
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x40,$out
-        vxor           $out4,$in4,$rndkey0
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x50,$out
-        vxor           $out5,$in5,$rndkey0
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x60,$out
-        vxor           $out6,$in6,$rndkey0
-       stvx_u          $out7,$x70,$out
-       addi            $out,$out,0x80
-        vxor           $out7,$in7,$rndkey0
-
-       mtctr           $rounds
-       beq             Loop_cbc_dec8x          # did $len-=128 borrow?
-
-       addic.          $len,$len,128
-       beq             Lcbc_dec8x_done
-       nop
-       nop
-
-Loop_cbc_dec8x_tail:                           # up to 7 "words" tail...
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-       vncipher        $out6,$out6,v24
-       vncipher        $out7,$out7,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-       vncipher        $out6,$out6,v25
-       vncipher        $out7,$out7,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_cbc_dec8x_tail
-
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-       vncipher        $out6,$out6,v24
-       vncipher        $out7,$out7,v24
-
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-       vncipher        $out6,$out6,v25
-       vncipher        $out7,$out7,v25
-
-       vncipher        $out1,$out1,v26
-       vncipher        $out2,$out2,v26
-       vncipher        $out3,$out3,v26
-       vncipher        $out4,$out4,v26
-       vncipher        $out5,$out5,v26
-       vncipher        $out6,$out6,v26
-       vncipher        $out7,$out7,v26
-
-       vncipher        $out1,$out1,v27
-       vncipher        $out2,$out2,v27
-       vncipher        $out3,$out3,v27
-       vncipher        $out4,$out4,v27
-       vncipher        $out5,$out5,v27
-       vncipher        $out6,$out6,v27
-       vncipher        $out7,$out7,v27
-
-       vncipher        $out1,$out1,v28
-       vncipher        $out2,$out2,v28
-       vncipher        $out3,$out3,v28
-       vncipher        $out4,$out4,v28
-       vncipher        $out5,$out5,v28
-       vncipher        $out6,$out6,v28
-       vncipher        $out7,$out7,v28
-
-       vncipher        $out1,$out1,v29
-       vncipher        $out2,$out2,v29
-       vncipher        $out3,$out3,v29
-       vncipher        $out4,$out4,v29
-       vncipher        $out5,$out5,v29
-       vncipher        $out6,$out6,v29
-       vncipher        $out7,$out7,v29
-
-       vncipher        $out1,$out1,v30
-        vxor           $ivec,$ivec,v31         # last round key
-       vncipher        $out2,$out2,v30
-        vxor           $in1,$in1,v31
-       vncipher        $out3,$out3,v30
-        vxor           $in2,$in2,v31
-       vncipher        $out4,$out4,v30
-        vxor           $in3,$in3,v31
-       vncipher        $out5,$out5,v30
-        vxor           $in4,$in4,v31
-       vncipher        $out6,$out6,v30
-        vxor           $in5,$in5,v31
-       vncipher        $out7,$out7,v30
-        vxor           $in6,$in6,v31
-
-       cmplwi          $len,32                 # switch($len)
-       blt             Lcbc_dec8x_one
-       nop
-       beq             Lcbc_dec8x_two
-       cmplwi          $len,64
-       blt             Lcbc_dec8x_three
-       nop
-       beq             Lcbc_dec8x_four
-       cmplwi          $len,96
-       blt             Lcbc_dec8x_five
-       nop
-       beq             Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
-       vncipherlast    $out1,$out1,$ivec
-       vncipherlast    $out2,$out2,$in1
-       vncipherlast    $out3,$out3,$in2
-       vncipherlast    $out4,$out4,$in3
-       vncipherlast    $out5,$out5,$in4
-       vncipherlast    $out6,$out6,$in5
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out1,$out1,$out1,$inpperm
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x00,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x10,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x20,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x30,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x40,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x50,$out
-       stvx_u          $out7,$x60,$out
-       addi            $out,$out,0x70
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_six:
-       vncipherlast    $out2,$out2,$ivec
-       vncipherlast    $out3,$out3,$in2
-       vncipherlast    $out4,$out4,$in3
-       vncipherlast    $out5,$out5,$in4
-       vncipherlast    $out6,$out6,$in5
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out2,$out2,$out2,$inpperm
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x00,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x10,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x20,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x30,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x40,$out
-       stvx_u          $out7,$x50,$out
-       addi            $out,$out,0x60
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_five:
-       vncipherlast    $out3,$out3,$ivec
-       vncipherlast    $out4,$out4,$in3
-       vncipherlast    $out5,$out5,$in4
-       vncipherlast    $out6,$out6,$in5
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out3,$out3,$out3,$inpperm
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x00,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x10,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x20,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x30,$out
-       stvx_u          $out7,$x40,$out
-       addi            $out,$out,0x50
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_four:
-       vncipherlast    $out4,$out4,$ivec
-       vncipherlast    $out5,$out5,$in4
-       vncipherlast    $out6,$out6,$in5
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out4,$out4,$out4,$inpperm
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x00,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x10,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x20,$out
-       stvx_u          $out7,$x30,$out
-       addi            $out,$out,0x40
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_three:
-       vncipherlast    $out5,$out5,$ivec
-       vncipherlast    $out6,$out6,$in5
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out5,$out5,$out5,$inpperm
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x00,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x10,$out
-       stvx_u          $out7,$x20,$out
-       addi            $out,$out,0x30
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_two:
-       vncipherlast    $out6,$out6,$ivec
-       vncipherlast    $out7,$out7,$in6
-       vmr             $ivec,$in7
-
-       le?vperm        $out6,$out6,$out6,$inpperm
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x00,$out
-       stvx_u          $out7,$x10,$out
-       addi            $out,$out,0x20
-       b               Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_one:
-       vncipherlast    $out7,$out7,$ivec
-       vmr             $ivec,$in7
-
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out7,0,$out
-       addi            $out,$out,0x10
-
-Lcbc_dec8x_done:
-       le?vperm        $ivec,$ivec,$ivec,$inpperm
-       stvx_u          $ivec,0,$ivp            # write [unaligned] iv
-
-       li              r10,`$FRAME+15`
-       li              r11,`$FRAME+31`
-       stvx            $inpperm,r10,$sp        # wipe copies of round keys
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-
-       mtspr           256,$vrsave
-       lvx             v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       lvx             v21,r11,$sp
-       addi            r11,r11,32
-       lvx             v22,r10,$sp
-       addi            r10,r10,32
-       lvx             v23,r11,$sp
-       addi            r11,r11,32
-       lvx             v24,r10,$sp
-       addi            r10,r10,32
-       lvx             v25,r11,$sp
-       addi            r11,r11,32
-       lvx             v26,r10,$sp
-       addi            r10,r10,32
-       lvx             v27,r11,$sp
-       addi            r11,r11,32
-       lvx             v28,r10,$sp
-       addi            r10,r10,32
-       lvx             v29,r11,$sp
-       addi            r11,r11,32
-       lvx             v30,r10,$sp
-       lvx             v31,r11,$sp
-       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-       blr
-       .long           0
-       .byte           0,12,0x14,0,0x80,6,6,0
-       .long           0
-.size  .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}}     }}}
-
-#########################################################################
-{{{    # CTR procedure[s]                                              #
-
-####################### WARNING: Here be dragons! #######################
-#
-# This code is written as 'ctr32', based on a 32-bit counter used
-# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
-# a 128-bit counter.
-#
-# This leads to subtle changes from the upstream code: the counter
-# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
-# both the bulk (8 blocks at a time) path, and in the individual block
-# path. Be aware of this when doing updates.
-#
-# See:
-# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
-# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
-# https://github.com/openssl/openssl/pull/8942
-#
-#########################################################################
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)=            map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
-                                               map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl .${prefix}_ctr32_encrypt_blocks
-       ${UCMP}i        $len,1
-       bltlr-
-
-       lis             r0,0xfff0
-       mfspr           $vrsave,256
-       mtspr           256,r0
-
-       li              $idx,15
-       vxor            $rndkey0,$rndkey0,$rndkey0
-       le?vspltisb     $tmp,0x0f
-
-       lvx             $ivec,0,$ivp            # load [unaligned] iv
-       lvsl            $inpperm,0,$ivp
-       lvx             $inptail,$idx,$ivp
-        vspltisb       $one,1
-       le?vxor         $inpperm,$inpperm,$tmp
-       vperm           $ivec,$ivec,$inptail,$inpperm
-        vsldoi         $one,$rndkey0,$one,1
-
-       neg             r11,$inp
-       ?lvsl           $keyperm,0,$key         # prepare for unaligned key
-       lwz             $rounds,240($key)
-
-       lvsr            $inpperm,0,r11          # prepare for unaligned load
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,15            # 15 is not typo
-       le?vxor         $inpperm,$inpperm,$tmp
-
-       srwi            $rounds,$rounds,1
-       li              $idx,16
-       subi            $rounds,$rounds,1
-
-       ${UCMP}i        $len,8
-       bge             _aesp8_ctr32_encrypt8x
-
-       ?lvsr           $outperm,0,$out         # prepare for unaligned store
-       vspltisb        $outmask,-1
-       lvx             $outhead,0,$out
-       ?vperm          $outmask,$rndkey0,$outmask,$outperm
-       le?vxor         $outperm,$outperm,$tmp
-
-       lvx             $rndkey0,0,$key
-       mtctr           $rounds
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$ivec,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-       b               Loop_ctr32_enc
-
-.align 5
-Loop_ctr32_enc:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipher         $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key
-       addi            $idx,$idx,16
-       bdnz            Loop_ctr32_enc
-
-       vadduqm         $ivec,$ivec,$one        # Kernel change for 128-bit
-        vmr            $dat,$inptail
-        lvx            $inptail,0,$inp
-        addi           $inp,$inp,16
-        subic.         $len,$len,1             # blocks--
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key
-        vperm          $dat,$dat,$inptail,$inpperm
-        li             $idx,16
-       ?vperm          $rndkey1,$rndkey0,$rndkey1,$keyperm
-        lvx            $rndkey0,0,$key
-       vxor            $dat,$dat,$rndkey1      # last round key
-       vcipherlast     $inout,$inout,$dat
-
-        lvx            $rndkey1,$idx,$key
-        addi           $idx,$idx,16
-       vperm           $inout,$inout,$inout,$outperm
-       vsel            $dat,$outhead,$inout,$outmask
-        mtctr          $rounds
-        ?vperm         $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vmr             $outhead,$inout
-        vxor           $inout,$ivec,$rndkey0
-        lvx            $rndkey0,$idx,$key
-        addi           $idx,$idx,16
-       stvx            $dat,0,$out
-       addi            $out,$out,16
-       bne             Loop_ctr32_enc
-
-       addi            $out,$out,-1
-       lvx             $inout,0,$out           # redundant in aligned case
-       vsel            $inout,$outhead,$inout,$outmask
-       stvx            $inout,0,$out
-
-       mtspr           256,$vrsave
-       blr
-       .long           0
-       .byte           0,12,0x14,0,0,0,6,0
-       .long           0
-___
-#########################################################################
-{{     # Optimized CTR procedure                                       #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
-                       # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4);        # aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align 5
-_aesp8_ctr32_encrypt8x:
-       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-       li              r10,`$FRAME+8*16+15`
-       li              r11,`$FRAME+8*16+31`
-       stvx            v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       stvx            v21,r11,$sp
-       addi            r11,r11,32
-       stvx            v22,r10,$sp
-       addi            r10,r10,32
-       stvx            v23,r11,$sp
-       addi            r11,r11,32
-       stvx            v24,r10,$sp
-       addi            r10,r10,32
-       stvx            v25,r11,$sp
-       addi            r11,r11,32
-       stvx            v26,r10,$sp
-       addi            r10,r10,32
-       stvx            v27,r11,$sp
-       addi            r11,r11,32
-       stvx            v28,r10,$sp
-       addi            r10,r10,32
-       stvx            v29,r11,$sp
-       addi            r11,r11,32
-       stvx            v30,r10,$sp
-       stvx            v31,r11,$sp
-       li              r0,-1
-       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
-       li              $x10,0x10
-       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       li              $x20,0x20
-       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       li              $x30,0x30
-       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       li              $x40,0x40
-       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       li              $x50,0x50
-       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       li              $x60,0x60
-       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       li              $x70,0x70
-       mtspr           256,r0
-
-       subi            $rounds,$rounds,3       # -4 in total
-
-       lvx             $rndkey0,$x00,$key      # load key schedule
-       lvx             v30,$x10,$key
-       addi            $key,$key,0x20
-       lvx             v31,$x00,$key
-       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
-       addi            $key_,$sp,$FRAME+15
-       mtctr           $rounds
-
-Load_ctr32_enc_key:
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v30,$x10,$key
-       addi            $key,$key,0x20
-       stvx            v24,$x00,$key_          # off-load round[1]
-       ?vperm          v25,v31,v30,$keyperm
-       lvx             v31,$x00,$key
-       stvx            v25,$x10,$key_          # off-load round[2]
-       addi            $key_,$key_,0x20
-       bdnz            Load_ctr32_enc_key
-
-       lvx             v26,$x10,$key
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v27,$x20,$key
-       stvx            v24,$x00,$key_          # off-load round[3]
-       ?vperm          v25,v31,v26,$keyperm
-       lvx             v28,$x30,$key
-       stvx            v25,$x10,$key_          # off-load round[4]
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       ?vperm          v26,v26,v27,$keyperm
-       lvx             v29,$x40,$key
-       ?vperm          v27,v27,v28,$keyperm
-       lvx             v30,$x50,$key
-       ?vperm          v28,v28,v29,$keyperm
-       lvx             v31,$x60,$key
-       ?vperm          v29,v29,v30,$keyperm
-       lvx             $out0,$x70,$key         # borrow $out0
-       ?vperm          v30,v30,v31,$keyperm
-       lvx             v24,$x00,$key_          # pre-load round[1]
-       ?vperm          v31,v31,$out0,$keyperm
-       lvx             v25,$x10,$key_          # pre-load round[2]
-
-       vadduqm         $two,$one,$one
-       subi            $inp,$inp,15            # undo "caller"
-       $SHL            $len,$len,4
-
-       vadduqm         $out1,$ivec,$one        # counter values ...
-       vadduqm         $out2,$ivec,$two        # (do all ctr adds as 128-bit)
-       vxor            $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
-        le?li          $idx,8
-       vadduqm         $out3,$out1,$two
-       vxor            $out1,$out1,$rndkey0
-        le?lvsl        $inpperm,0,$idx
-       vadduqm         $out4,$out2,$two
-       vxor            $out2,$out2,$rndkey0
-        le?vspltisb    $tmp,0x0f
-       vadduqm         $out5,$out3,$two
-       vxor            $out3,$out3,$rndkey0
-        le?vxor        $inpperm,$inpperm,$tmp  # transform for lvx_u/stvx_u
-       vadduqm         $out6,$out4,$two
-       vxor            $out4,$out4,$rndkey0
-       vadduqm         $out7,$out5,$two
-       vxor            $out5,$out5,$rndkey0
-       vadduqm         $ivec,$out6,$two        # next counter value
-       vxor            $out6,$out6,$rndkey0
-       vxor            $out7,$out7,$rndkey0
-
-       mtctr           $rounds
-       b               Loop_ctr32_enc8x
-.align 5
-Loop_ctr32_enc8x:
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-       vcipher         $out4,$out4,v24
-       vcipher         $out5,$out5,v24
-       vcipher         $out6,$out6,v24
-       vcipher         $out7,$out7,v24
-Loop_ctr32_enc8x_middle:
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-       vcipher         $out4,$out4,v25
-       vcipher         $out5,$out5,v25
-       vcipher         $out6,$out6,v25
-       vcipher         $out7,$out7,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_ctr32_enc8x
-
-       subic           r11,$len,256            # $len-256, borrow $key_
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-       vcipher         $out4,$out4,v24
-       vcipher         $out5,$out5,v24
-       vcipher         $out6,$out6,v24
-       vcipher         $out7,$out7,v24
-
-       subfe           r0,r0,r0                # borrow?-1:0
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-       vcipher         $out4,$out4,v25
-       vcipher         $out5,$out5,v25
-       vcipher         $out6,$out6,v25
-       vcipher         $out7,$out7,v25
-
-       and             r0,r0,r11
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vcipher         $out0,$out0,v26
-       vcipher         $out1,$out1,v26
-       vcipher         $out2,$out2,v26
-       vcipher         $out3,$out3,v26
-       vcipher         $out4,$out4,v26
-       vcipher         $out5,$out5,v26
-       vcipher         $out6,$out6,v26
-       vcipher         $out7,$out7,v26
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-
-       subic           $len,$len,129           # $len-=129
-       vcipher         $out0,$out0,v27
-       addi            $len,$len,1             # $len-=128 really
-       vcipher         $out1,$out1,v27
-       vcipher         $out2,$out2,v27
-       vcipher         $out3,$out3,v27
-       vcipher         $out4,$out4,v27
-       vcipher         $out5,$out5,v27
-       vcipher         $out6,$out6,v27
-       vcipher         $out7,$out7,v27
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-
-       vcipher         $out0,$out0,v28
-        lvx_u          $in0,$x00,$inp          # load input
-       vcipher         $out1,$out1,v28
-        lvx_u          $in1,$x10,$inp
-       vcipher         $out2,$out2,v28
-        lvx_u          $in2,$x20,$inp
-       vcipher         $out3,$out3,v28
-        lvx_u          $in3,$x30,$inp
-       vcipher         $out4,$out4,v28
-        lvx_u          $in4,$x40,$inp
-       vcipher         $out5,$out5,v28
-        lvx_u          $in5,$x50,$inp
-       vcipher         $out6,$out6,v28
-        lvx_u          $in6,$x60,$inp
-       vcipher         $out7,$out7,v28
-        lvx_u          $in7,$x70,$inp
-        addi           $inp,$inp,0x80
-
-       vcipher         $out0,$out0,v29
-        le?vperm       $in0,$in0,$in0,$inpperm
-       vcipher         $out1,$out1,v29
-        le?vperm       $in1,$in1,$in1,$inpperm
-       vcipher         $out2,$out2,v29
-        le?vperm       $in2,$in2,$in2,$inpperm
-       vcipher         $out3,$out3,v29
-        le?vperm       $in3,$in3,$in3,$inpperm
-       vcipher         $out4,$out4,v29
-        le?vperm       $in4,$in4,$in4,$inpperm
-       vcipher         $out5,$out5,v29
-        le?vperm       $in5,$in5,$in5,$inpperm
-       vcipher         $out6,$out6,v29
-        le?vperm       $in6,$in6,$in6,$inpperm
-       vcipher         $out7,$out7,v29
-        le?vperm       $in7,$in7,$in7,$inpperm
-
-       add             $inp,$inp,r0            # $inp is adjusted in such
-                                               # way that at exit from the
-                                               # loop inX-in7 are loaded
-                                               # with last "words"
-       subfe.          r0,r0,r0                # borrow?-1:0
-       vcipher         $out0,$out0,v30
-        vxor           $in0,$in0,v31           # xor with last round key
-       vcipher         $out1,$out1,v30
-        vxor           $in1,$in1,v31
-       vcipher         $out2,$out2,v30
-        vxor           $in2,$in2,v31
-       vcipher         $out3,$out3,v30
-        vxor           $in3,$in3,v31
-       vcipher         $out4,$out4,v30
-        vxor           $in4,$in4,v31
-       vcipher         $out5,$out5,v30
-        vxor           $in5,$in5,v31
-       vcipher         $out6,$out6,v30
-        vxor           $in6,$in6,v31
-       vcipher         $out7,$out7,v30
-        vxor           $in7,$in7,v31
-
-       bne             Lctr32_enc8x_break      # did $len-129 borrow?
-
-       vcipherlast     $in0,$out0,$in0
-       vcipherlast     $in1,$out1,$in1
-        vadduqm        $out1,$ivec,$one        # counter values ...
-       vcipherlast     $in2,$out2,$in2
-        vadduqm        $out2,$ivec,$two
-        vxor           $out0,$ivec,$rndkey0    # ... xored with rndkey[0]
-       vcipherlast     $in3,$out3,$in3
-        vadduqm        $out3,$out1,$two
-        vxor           $out1,$out1,$rndkey0
-       vcipherlast     $in4,$out4,$in4
-        vadduqm        $out4,$out2,$two
-        vxor           $out2,$out2,$rndkey0
-       vcipherlast     $in5,$out5,$in5
-        vadduqm        $out5,$out3,$two
-        vxor           $out3,$out3,$rndkey0
-       vcipherlast     $in6,$out6,$in6
-        vadduqm        $out6,$out4,$two
-        vxor           $out4,$out4,$rndkey0
-       vcipherlast     $in7,$out7,$in7
-        vadduqm        $out7,$out5,$two
-        vxor           $out5,$out5,$rndkey0
-       le?vperm        $in0,$in0,$in0,$inpperm
-        vadduqm        $ivec,$out6,$two        # next counter value
-        vxor           $out6,$out6,$rndkey0
-       le?vperm        $in1,$in1,$in1,$inpperm
-        vxor           $out7,$out7,$rndkey0
-       mtctr           $rounds
-
-        vcipher        $out0,$out0,v24
-       stvx_u          $in0,$x00,$out
-       le?vperm        $in2,$in2,$in2,$inpperm
-        vcipher        $out1,$out1,v24
-       stvx_u          $in1,$x10,$out
-       le?vperm        $in3,$in3,$in3,$inpperm
-        vcipher        $out2,$out2,v24
-       stvx_u          $in2,$x20,$out
-       le?vperm        $in4,$in4,$in4,$inpperm
-        vcipher        $out3,$out3,v24
-       stvx_u          $in3,$x30,$out
-       le?vperm        $in5,$in5,$in5,$inpperm
-        vcipher        $out4,$out4,v24
-       stvx_u          $in4,$x40,$out
-       le?vperm        $in6,$in6,$in6,$inpperm
-        vcipher        $out5,$out5,v24
-       stvx_u          $in5,$x50,$out
-       le?vperm        $in7,$in7,$in7,$inpperm
-        vcipher        $out6,$out6,v24
-       stvx_u          $in6,$x60,$out
-        vcipher        $out7,$out7,v24
-       stvx_u          $in7,$x70,$out
-       addi            $out,$out,0x80
-
-       b               Loop_ctr32_enc8x_middle
-
-.align 5
-Lctr32_enc8x_break:
-       cmpwi           $len,-0x60
-       blt             Lctr32_enc8x_one
-       nop
-       beq             Lctr32_enc8x_two
-       cmpwi           $len,-0x40
-       blt             Lctr32_enc8x_three
-       nop
-       beq             Lctr32_enc8x_four
-       cmpwi           $len,-0x20
-       blt             Lctr32_enc8x_five
-       nop
-       beq             Lctr32_enc8x_six
-       cmpwi           $len,0x00
-       blt             Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
-       vcipherlast     $out0,$out0,$in0
-       vcipherlast     $out1,$out1,$in1
-       vcipherlast     $out2,$out2,$in2
-       vcipherlast     $out3,$out3,$in3
-       vcipherlast     $out4,$out4,$in4
-       vcipherlast     $out5,$out5,$in5
-       vcipherlast     $out6,$out6,$in6
-       vcipherlast     $out7,$out7,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x30,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x40,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x50,$out
-       le?vperm        $out7,$out7,$out7,$inpperm
-       stvx_u          $out6,$x60,$out
-       stvx_u          $out7,$x70,$out
-       addi            $out,$out,0x80
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_seven:
-       vcipherlast     $out0,$out0,$in1
-       vcipherlast     $out1,$out1,$in2
-       vcipherlast     $out2,$out2,$in3
-       vcipherlast     $out3,$out3,$in4
-       vcipherlast     $out4,$out4,$in5
-       vcipherlast     $out5,$out5,$in6
-       vcipherlast     $out6,$out6,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x30,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x40,$out
-       le?vperm        $out6,$out6,$out6,$inpperm
-       stvx_u          $out5,$x50,$out
-       stvx_u          $out6,$x60,$out
-       addi            $out,$out,0x70
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_six:
-       vcipherlast     $out0,$out0,$in2
-       vcipherlast     $out1,$out1,$in3
-       vcipherlast     $out2,$out2,$in4
-       vcipherlast     $out3,$out3,$in5
-       vcipherlast     $out4,$out4,$in6
-       vcipherlast     $out5,$out5,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x30,$out
-       le?vperm        $out5,$out5,$out5,$inpperm
-       stvx_u          $out4,$x40,$out
-       stvx_u          $out5,$x50,$out
-       addi            $out,$out,0x60
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_five:
-       vcipherlast     $out0,$out0,$in3
-       vcipherlast     $out1,$out1,$in4
-       vcipherlast     $out2,$out2,$in5
-       vcipherlast     $out3,$out3,$in6
-       vcipherlast     $out4,$out4,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-       le?vperm        $out4,$out4,$out4,$inpperm
-       stvx_u          $out3,$x30,$out
-       stvx_u          $out4,$x40,$out
-       addi            $out,$out,0x50
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_four:
-       vcipherlast     $out0,$out0,$in4
-       vcipherlast     $out1,$out1,$in5
-       vcipherlast     $out2,$out2,$in6
-       vcipherlast     $out3,$out3,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$inpperm
-       stvx_u          $out2,$x20,$out
-       stvx_u          $out3,$x30,$out
-       addi            $out,$out,0x40
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_three:
-       vcipherlast     $out0,$out0,$in5
-       vcipherlast     $out1,$out1,$in6
-       vcipherlast     $out2,$out2,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       le?vperm        $out2,$out2,$out2,$inpperm
-       stvx_u          $out1,$x10,$out
-       stvx_u          $out2,$x20,$out
-       addi            $out,$out,0x30
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_two:
-       vcipherlast     $out0,$out0,$in6
-       vcipherlast     $out1,$out1,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       le?vperm        $out1,$out1,$out1,$inpperm
-       stvx_u          $out0,$x00,$out
-       stvx_u          $out1,$x10,$out
-       addi            $out,$out,0x20
-       b               Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_one:
-       vcipherlast     $out0,$out0,$in7
-
-       le?vperm        $out0,$out0,$out0,$inpperm
-       stvx_u          $out0,0,$out
-       addi            $out,$out,0x10
-
-Lctr32_enc8x_done:
-       li              r10,`$FRAME+15`
-       li              r11,`$FRAME+31`
-       stvx            $inpperm,r10,$sp        # wipe copies of round keys
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-       stvx            $inpperm,r10,$sp
-       addi            r10,r10,32
-       stvx            $inpperm,r11,$sp
-       addi            r11,r11,32
-
-       mtspr           256,$vrsave
-       lvx             v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       lvx             v21,r11,$sp
-       addi            r11,r11,32
-       lvx             v22,r10,$sp
-       addi            r10,r10,32
-       lvx             v23,r11,$sp
-       addi            r11,r11,32
-       lvx             v24,r10,$sp
-       addi            r10,r10,32
-       lvx             v25,r11,$sp
-       addi            r11,r11,32
-       lvx             v26,r10,$sp
-       addi            r10,r10,32
-       lvx             v27,r11,$sp
-       addi            r11,r11,32
-       lvx             v28,r10,$sp
-       addi            r10,r10,32
-       lvx             v29,r11,$sp
-       addi            r11,r11,32
-       lvx             v30,r10,$sp
-       lvx             v31,r11,$sp
-       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-       blr
-       .long           0
-       .byte           0,12,0x14,0,0x80,6,6,0
-       .long           0
-.size  .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}}     }}}
-
-#########################################################################
-{{{    # XTS procedures                                                #
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len,  #
-#                             const AES_KEY *key1, const AES_KEY *key2,        #
-#                             [const] unsigned char iv[16]);           #
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which  #
-# input tweak value is assumed to be encrypted already, and last tweak #
-# value, one suitable for consecutive call on same chunk of data, is   #
-# written back to original buffer. In addition, in "tweak chaining"    #
-# mode only complete input blocks are processed.                       #
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) =    map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) =                                map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) =      map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) =             map("v$_",(8..12));
-my $taillen = $key2;
-
-   ($inp,$idx) = ($idx,$inp);                          # reassign
-
-$code.=<<___;
-.globl .${prefix}_xts_encrypt
-       mr              $inp,r3                         # reassign
-       li              r3,-1
-       ${UCMP}i        $len,16
-       bltlr-
-
-       lis             r0,0xfff0
-       mfspr           r12,256                         # save vrsave
-       li              r11,0
-       mtspr           256,r0
-
-       vspltisb        $seven,0x07                     # 0x070707..07
-       le?lvsl         $leperm,r11,r11
-       le?vspltisb     $tmp,0x0f
-       le?vxor         $leperm,$leperm,$seven
-
-       li              $idx,15
-       lvx             $tweak,0,$ivp                   # load [unaligned] iv
-       lvsl            $inpperm,0,$ivp
-       lvx             $inptail,$idx,$ivp
-       le?vxor         $inpperm,$inpperm,$tmp
-       vperm           $tweak,$tweak,$inptail,$inpperm
-
-       neg             r11,$inp
-       lvsr            $inpperm,0,r11                  # prepare for unaligned load
-       lvx             $inout,0,$inp
-       addi            $inp,$inp,15                    # 15 is not typo
-       le?vxor         $inpperm,$inpperm,$tmp
-
-       ${UCMP}i        $key2,0                         # key2==NULL?
-       beq             Lxts_enc_no_key2
-
-       ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
-       lwz             $rounds,240($key2)
-       srwi            $rounds,$rounds,1
-       subi            $rounds,$rounds,1
-       li              $idx,16
-
-       lvx             $rndkey0,0,$key2
-       lvx             $rndkey1,$idx,$key2
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $tweak,$tweak,$rndkey0
-       lvx             $rndkey0,$idx,$key2
-       addi            $idx,$idx,16
-       mtctr           $rounds
-
-Ltweak_xts_enc:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $tweak,$tweak,$rndkey1
-       lvx             $rndkey1,$idx,$key2
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipher         $tweak,$tweak,$rndkey0
-       lvx             $rndkey0,$idx,$key2
-       addi            $idx,$idx,16
-       bdnz            Ltweak_xts_enc
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $tweak,$tweak,$rndkey1
-       lvx             $rndkey1,$idx,$key2
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipherlast     $tweak,$tweak,$rndkey0
-
-       li              $ivp,0                          # don't chain the tweak
-       b               Lxts_enc
-
-Lxts_enc_no_key2:
-       li              $idx,-16
-       and             $len,$len,$idx                  # in "tweak chaining"
-                                                       # mode only complete
-                                                       # blocks are processed
-Lxts_enc:
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-
-       ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
-       lwz             $rounds,240($key1)
-       srwi            $rounds,$rounds,1
-       subi            $rounds,$rounds,1
-       li              $idx,16
-
-       vslb            $eighty7,$seven,$seven          # 0x808080..80
-       vor             $eighty7,$eighty7,$seven        # 0x878787..87
-       vspltisb        $tmp,1                          # 0x010101..01
-       vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
-
-       ${UCMP}i        $len,96
-       bge             _aesp8_xts_encrypt6x
-
-       andi.           $taillen,$len,15
-       subic           r0,$len,32
-       subi            $taillen,$taillen,16
-       subfe           r0,r0,r0
-       and             r0,r0,$taillen
-       add             $inp,$inp,r0
-
-       lvx             $rndkey0,0,$key1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       vperm           $inout,$inout,$inptail,$inpperm
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$inout,$tweak
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-       mtctr           $rounds
-       b               Loop_xts_enc
-
-.align 5
-Loop_xts_enc:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipher         $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-       bdnz            Loop_xts_enc
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       li              $idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $rndkey0,$rndkey0,$tweak
-       vcipherlast     $output,$inout,$rndkey0
-
-       le?vperm        $tmp,$output,$output,$leperm
-       be?nop
-       le?stvx_u       $tmp,0,$out
-       be?stvx_u       $output,0,$out
-       addi            $out,$out,16
-
-       subic.          $len,$len,16
-       beq             Lxts_enc_done
-
-       vmr             $inout,$inptail
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-       lvx             $rndkey0,0,$key1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-
-       subic           r0,$len,32
-       subfe           r0,r0,r0
-       and             r0,r0,$taillen
-       add             $inp,$inp,r0
-
-       vsrab           $tmp,$tweak,$seven              # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-       vxor            $tweak,$tweak,$tmp
-
-       vperm           $inout,$inout,$inptail,$inpperm
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$inout,$tweak
-       vxor            $output,$output,$rndkey0        # just in case $len<16
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-
-       mtctr           $rounds
-       ${UCMP}i        $len,16
-       bge             Loop_xts_enc
-
-       vxor            $output,$output,$tweak
-       lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
-       vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
-       vspltisb        $tmp,-1
-       vperm           $inptail,$inptail,$tmp,$inpperm
-       vsel            $inout,$inout,$output,$inptail
-
-       subi            r11,$out,17
-       subi            $out,$out,16
-       mtctr           $len
-       li              $len,16
-Loop_xts_enc_steal:
-       lbzu            r0,1(r11)
-       stb             r0,16(r11)
-       bdnz            Loop_xts_enc_steal
-
-       mtctr           $rounds
-       b               Loop_xts_enc                    # one more time...
-
-Lxts_enc_done:
-       ${UCMP}i        $ivp,0
-       beq             Lxts_enc_ret
-
-       vsrab           $tmp,$tweak,$seven              # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-       vxor            $tweak,$tweak,$tmp
-
-       le?vperm        $tweak,$tweak,$tweak,$leperm
-       stvx_u          $tweak,0,$ivp
-
-Lxts_enc_ret:
-       mtspr           256,r12                         # restore vrsave
-       li              r3,0
-       blr
-       .long           0
-       .byte           0,12,0x04,0,0x80,6,6,0
-       .long           0
-.size  .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl .${prefix}_xts_decrypt
-       mr              $inp,r3                         # reassign
-       li              r3,-1
-       ${UCMP}i        $len,16
-       bltlr-
-
-       lis             r0,0xfff8
-       mfspr           r12,256                         # save vrsave
-       li              r11,0
-       mtspr           256,r0
-
-       andi.           r0,$len,15
-       neg             r0,r0
-       andi.           r0,r0,16
-       sub             $len,$len,r0
-
-       vspltisb        $seven,0x07                     # 0x070707..07
-       le?lvsl         $leperm,r11,r11
-       le?vspltisb     $tmp,0x0f
-       le?vxor         $leperm,$leperm,$seven
-
-       li              $idx,15
-       lvx             $tweak,0,$ivp                   # load [unaligned] iv
-       lvsl            $inpperm,0,$ivp
-       lvx             $inptail,$idx,$ivp
-       le?vxor         $inpperm,$inpperm,$tmp
-       vperm           $tweak,$tweak,$inptail,$inpperm
-
-       neg             r11,$inp
-       lvsr            $inpperm,0,r11                  # prepare for unaligned load
-       lvx             $inout,0,$inp
-       addi            $inp,$inp,15                    # 15 is not typo
-       le?vxor         $inpperm,$inpperm,$tmp
-
-       ${UCMP}i        $key2,0                         # key2==NULL?
-       beq             Lxts_dec_no_key2
-
-       ?lvsl           $keyperm,0,$key2                # prepare for unaligned key
-       lwz             $rounds,240($key2)
-       srwi            $rounds,$rounds,1
-       subi            $rounds,$rounds,1
-       li              $idx,16
-
-       lvx             $rndkey0,0,$key2
-       lvx             $rndkey1,$idx,$key2
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $tweak,$tweak,$rndkey0
-       lvx             $rndkey0,$idx,$key2
-       addi            $idx,$idx,16
-       mtctr           $rounds
-
-Ltweak_xts_dec:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $tweak,$tweak,$rndkey1
-       lvx             $rndkey1,$idx,$key2
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipher         $tweak,$tweak,$rndkey0
-       lvx             $rndkey0,$idx,$key2
-       addi            $idx,$idx,16
-       bdnz            Ltweak_xts_dec
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vcipher         $tweak,$tweak,$rndkey1
-       lvx             $rndkey1,$idx,$key2
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vcipherlast     $tweak,$tweak,$rndkey0
-
-       li              $ivp,0                          # don't chain the tweak
-       b               Lxts_dec
-
-Lxts_dec_no_key2:
-       neg             $idx,$len
-       andi.           $idx,$idx,15
-       add             $len,$len,$idx                  # in "tweak chaining"
-                                                       # mode only complete
-                                                       # blocks are processed
-Lxts_dec:
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-
-       ?lvsl           $keyperm,0,$key1                # prepare for unaligned key
-       lwz             $rounds,240($key1)
-       srwi            $rounds,$rounds,1
-       subi            $rounds,$rounds,1
-       li              $idx,16
-
-       vslb            $eighty7,$seven,$seven          # 0x808080..80
-       vor             $eighty7,$eighty7,$seven        # 0x878787..87
-       vspltisb        $tmp,1                          # 0x010101..01
-       vsldoi          $eighty7,$eighty7,$tmp,15       # 0x870101..01
-
-       ${UCMP}i        $len,96
-       bge             _aesp8_xts_decrypt6x
-
-       lvx             $rndkey0,0,$key1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       vperm           $inout,$inout,$inptail,$inpperm
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$inout,$tweak
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-       mtctr           $rounds
-
-       ${UCMP}i        $len,16
-       blt             Ltail_xts_dec
-       be?b            Loop_xts_dec
-
-.align 5
-Loop_xts_dec:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vncipher        $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-       bdnz            Loop_xts_dec
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       li              $idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $rndkey0,$rndkey0,$tweak
-       vncipherlast    $output,$inout,$rndkey0
-
-       le?vperm        $tmp,$output,$output,$leperm
-       be?nop
-       le?stvx_u       $tmp,0,$out
-       be?stvx_u       $output,0,$out
-       addi            $out,$out,16
-
-       subic.          $len,$len,16
-       beq             Lxts_dec_done
-
-       vmr             $inout,$inptail
-       lvx             $inptail,0,$inp
-       addi            $inp,$inp,16
-       lvx             $rndkey0,0,$key1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-
-       vsrab           $tmp,$tweak,$seven              # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-       vxor            $tweak,$tweak,$tmp
-
-       vperm           $inout,$inout,$inptail,$inpperm
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $inout,$inout,$tweak
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-
-       mtctr           $rounds
-       ${UCMP}i        $len,16
-       bge             Loop_xts_dec
-
-Ltail_xts_dec:
-       vsrab           $tmp,$tweak,$seven              # next tweak value
-       vaddubm         $tweak1,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-       vxor            $tweak1,$tweak1,$tmp
-
-       subi            $inp,$inp,16
-       add             $inp,$inp,$len
-
-       vxor            $inout,$inout,$tweak            # :-(
-       vxor            $inout,$inout,$tweak1           # :-)
-
-Loop_xts_dec_short:
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vncipher        $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-       bdnz            Loop_xts_dec_short
-
-       ?vperm          $rndkey1,$rndkey1,$rndkey0,$keyperm
-       vncipher        $inout,$inout,$rndkey1
-       lvx             $rndkey1,$idx,$key1
-       li              $idx,16
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-       vxor            $rndkey0,$rndkey0,$tweak1
-       vncipherlast    $output,$inout,$rndkey0
-
-       le?vperm        $tmp,$output,$output,$leperm
-       be?nop
-       le?stvx_u       $tmp,0,$out
-       be?stvx_u       $output,0,$out
-
-       vmr             $inout,$inptail
-       lvx             $inptail,0,$inp
-       #addi           $inp,$inp,16
-       lvx             $rndkey0,0,$key1
-       lvx             $rndkey1,$idx,$key1
-       addi            $idx,$idx,16
-       vperm           $inout,$inout,$inptail,$inpperm
-       ?vperm          $rndkey0,$rndkey0,$rndkey1,$keyperm
-
-       lvsr            $inpperm,0,$len                 # $inpperm is no longer needed
-       vxor            $inptail,$inptail,$inptail      # $inptail is no longer needed
-       vspltisb        $tmp,-1
-       vperm           $inptail,$inptail,$tmp,$inpperm
-       vsel            $inout,$inout,$output,$inptail
-
-       vxor            $rndkey0,$rndkey0,$tweak
-       vxor            $inout,$inout,$rndkey0
-       lvx             $rndkey0,$idx,$key1
-       addi            $idx,$idx,16
-
-       subi            r11,$out,1
-       mtctr           $len
-       li              $len,16
-Loop_xts_dec_steal:
-       lbzu            r0,1(r11)
-       stb             r0,16(r11)
-       bdnz            Loop_xts_dec_steal
-
-       mtctr           $rounds
-       b               Loop_xts_dec                    # one more time...
-
-Lxts_dec_done:
-       ${UCMP}i        $ivp,0
-       beq             Lxts_dec_ret
-
-       vsrab           $tmp,$tweak,$seven              # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-       vxor            $tweak,$tweak,$tmp
-
-       le?vperm        $tweak,$tweak,$tweak,$leperm
-       stvx_u          $tweak,0,$ivp
-
-Lxts_dec_ret:
-       mtspr           256,r12                         # restore vrsave
-       li              r3,0
-       blr
-       .long           0
-       .byte           0,12,0x04,0,0x80,6,6,0
-       .long           0
-.size  .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{     # Optimized XTS procedures                                      #
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
-    $x00=0 if ($flavour =~ /osx/);
-my ($in0,  $in1,  $in2,  $in3,  $in4,  $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23";     # v24-v25 rotating buffer for first found keys
-                       # v26-v31 last 6 round keys
-my ($keyperm)=($out0); # aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align 5
-_aesp8_xts_encrypt6x:
-       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-       mflr            r11
-       li              r7,`$FRAME+8*16+15`
-       li              r3,`$FRAME+8*16+31`
-       $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-       stvx            v20,r7,$sp              # ABI says so
-       addi            r7,r7,32
-       stvx            v21,r3,$sp
-       addi            r3,r3,32
-       stvx            v22,r7,$sp
-       addi            r7,r7,32
-       stvx            v23,r3,$sp
-       addi            r3,r3,32
-       stvx            v24,r7,$sp
-       addi            r7,r7,32
-       stvx            v25,r3,$sp
-       addi            r3,r3,32
-       stvx            v26,r7,$sp
-       addi            r7,r7,32
-       stvx            v27,r3,$sp
-       addi            r3,r3,32
-       stvx            v28,r7,$sp
-       addi            r7,r7,32
-       stvx            v29,r3,$sp
-       addi            r3,r3,32
-       stvx            v30,r7,$sp
-       stvx            v31,r3,$sp
-       li              r0,-1
-       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
-       li              $x10,0x10
-       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       li              $x20,0x20
-       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       li              $x30,0x30
-       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       li              $x40,0x40
-       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       li              $x50,0x50
-       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       li              $x60,0x60
-       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       li              $x70,0x70
-       mtspr           256,r0
-
-       subi            $rounds,$rounds,3       # -4 in total
-
-       lvx             $rndkey0,$x00,$key1     # load key schedule
-       lvx             v30,$x10,$key1
-       addi            $key1,$key1,0x20
-       lvx             v31,$x00,$key1
-       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
-       addi            $key_,$sp,$FRAME+15
-       mtctr           $rounds
-
-Load_xts_enc_key:
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v30,$x10,$key1
-       addi            $key1,$key1,0x20
-       stvx            v24,$x00,$key_          # off-load round[1]
-       ?vperm          v25,v31,v30,$keyperm
-       lvx             v31,$x00,$key1
-       stvx            v25,$x10,$key_          # off-load round[2]
-       addi            $key_,$key_,0x20
-       bdnz            Load_xts_enc_key
-
-       lvx             v26,$x10,$key1
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v27,$x20,$key1
-       stvx            v24,$x00,$key_          # off-load round[3]
-       ?vperm          v25,v31,v26,$keyperm
-       lvx             v28,$x30,$key1
-       stvx            v25,$x10,$key_          # off-load round[4]
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       ?vperm          v26,v26,v27,$keyperm
-       lvx             v29,$x40,$key1
-       ?vperm          v27,v27,v28,$keyperm
-       lvx             v30,$x50,$key1
-       ?vperm          v28,v28,v29,$keyperm
-       lvx             v31,$x60,$key1
-       ?vperm          v29,v29,v30,$keyperm
-       lvx             $twk5,$x70,$key1        # borrow $twk5
-       ?vperm          v30,v30,v31,$keyperm
-       lvx             v24,$x00,$key_          # pre-load round[1]
-       ?vperm          v31,v31,$twk5,$keyperm
-       lvx             v25,$x10,$key_          # pre-load round[2]
-
-        vperm          $in0,$inout,$inptail,$inpperm
-        subi           $inp,$inp,31            # undo "caller"
-       vxor            $twk0,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out0,$in0,$twk0
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in1,$x10,$inp
-       vxor            $twk1,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in1,$in1,$in1,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out1,$in1,$twk1
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in2,$x20,$inp
-        andi.          $taillen,$len,15
-       vxor            $twk2,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in2,$in2,$in2,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out2,$in2,$twk2
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in3,$x30,$inp
-        sub            $len,$len,$taillen
-       vxor            $twk3,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in3,$in3,$in3,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out3,$in3,$twk3
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in4,$x40,$inp
-        subi           $len,$len,0x60
-       vxor            $twk4,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in4,$in4,$in4,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out4,$in4,$twk4
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in5,$x50,$inp
-        addi           $inp,$inp,0x60
-       vxor            $twk5,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in5,$in5,$in5,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out5,$in5,$twk5
-       vxor            $tweak,$tweak,$tmp
-
-       vxor            v31,v31,$rndkey0
-       mtctr           $rounds
-       b               Loop_xts_enc6x
-
-.align 5
-Loop_xts_enc6x:
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-       vcipher         $out4,$out4,v24
-       vcipher         $out5,$out5,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-       vcipher         $out4,$out4,v25
-       vcipher         $out5,$out5,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_xts_enc6x
-
-       subic           $len,$len,96            # $len-=96
-        vxor           $in0,$twk0,v31          # xor with last round key
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk0,$tweak,$rndkey0
-        vaddubm        $tweak,$tweak,$tweak
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-        vsldoi         $tmp,$tmp,$tmp,15
-       vcipher         $out4,$out4,v24
-       vcipher         $out5,$out5,v24
-
-       subfe.          r0,r0,r0                # borrow?-1:0
-        vand           $tmp,$tmp,$eighty7
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-        vxor           $tweak,$tweak,$tmp
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-        vxor           $in1,$twk1,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk1,$tweak,$rndkey0
-       vcipher         $out4,$out4,v25
-       vcipher         $out5,$out5,v25
-
-       and             r0,r0,$len
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vcipher         $out0,$out0,v26
-       vcipher         $out1,$out1,v26
-        vand           $tmp,$tmp,$eighty7
-       vcipher         $out2,$out2,v26
-       vcipher         $out3,$out3,v26
-        vxor           $tweak,$tweak,$tmp
-       vcipher         $out4,$out4,v26
-       vcipher         $out5,$out5,v26
-
-       add             $inp,$inp,r0            # $inp is adjusted in such
-                                               # way that at exit from the
-                                               # loop inX-in5 are loaded
-                                               # with last "words"
-        vxor           $in2,$twk2,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk2,$tweak,$rndkey0
-        vaddubm        $tweak,$tweak,$tweak
-       vcipher         $out0,$out0,v27
-       vcipher         $out1,$out1,v27
-        vsldoi         $tmp,$tmp,$tmp,15
-       vcipher         $out2,$out2,v27
-       vcipher         $out3,$out3,v27
-        vand           $tmp,$tmp,$eighty7
-       vcipher         $out4,$out4,v27
-       vcipher         $out5,$out5,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-        vxor           $tweak,$tweak,$tmp
-       vcipher         $out0,$out0,v28
-       vcipher         $out1,$out1,v28
-        vxor           $in3,$twk3,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk3,$tweak,$rndkey0
-       vcipher         $out2,$out2,v28
-       vcipher         $out3,$out3,v28
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vcipher         $out4,$out4,v28
-       vcipher         $out5,$out5,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-        vand           $tmp,$tmp,$eighty7
-
-       vcipher         $out0,$out0,v29
-       vcipher         $out1,$out1,v29
-        vxor           $tweak,$tweak,$tmp
-       vcipher         $out2,$out2,v29
-       vcipher         $out3,$out3,v29
-        vxor           $in4,$twk4,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk4,$tweak,$rndkey0
-       vcipher         $out4,$out4,v29
-       vcipher         $out5,$out5,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-
-       vcipher         $out0,$out0,v30
-       vcipher         $out1,$out1,v30
-        vand           $tmp,$tmp,$eighty7
-       vcipher         $out2,$out2,v30
-       vcipher         $out3,$out3,v30
-        vxor           $tweak,$tweak,$tmp
-       vcipher         $out4,$out4,v30
-       vcipher         $out5,$out5,v30
-        vxor           $in5,$twk5,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk5,$tweak,$rndkey0
-
-       vcipherlast     $out0,$out0,$in0
-        lvx_u          $in0,$x00,$inp          # load next input block
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vcipherlast     $out1,$out1,$in1
-        lvx_u          $in1,$x10,$inp
-       vcipherlast     $out2,$out2,$in2
-        le?vperm       $in0,$in0,$in0,$leperm
-        lvx_u          $in2,$x20,$inp
-        vand           $tmp,$tmp,$eighty7
-       vcipherlast     $out3,$out3,$in3
-        le?vperm       $in1,$in1,$in1,$leperm
-        lvx_u          $in3,$x30,$inp
-       vcipherlast     $out4,$out4,$in4
-        le?vperm       $in2,$in2,$in2,$leperm
-        lvx_u          $in4,$x40,$inp
-        vxor           $tweak,$tweak,$tmp
-       vcipherlast     $tmp,$out5,$in5         # last block might be needed
-                                               # in stealing mode
-        le?vperm       $in3,$in3,$in3,$leperm
-        lvx_u          $in5,$x50,$inp
-        addi           $inp,$inp,0x60
-        le?vperm       $in4,$in4,$in4,$leperm
-        le?vperm       $in5,$in5,$in5,$leperm
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-        vxor           $out0,$in0,$twk0
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-        vxor           $out1,$in1,$twk1
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-        vxor           $out2,$in2,$twk2
-       le?vperm        $out4,$out4,$out4,$leperm
-       stvx_u          $out3,$x30,$out
-        vxor           $out3,$in3,$twk3
-       le?vperm        $out5,$tmp,$tmp,$leperm
-       stvx_u          $out4,$x40,$out
-        vxor           $out4,$in4,$twk4
-       le?stvx_u       $out5,$x50,$out
-       be?stvx_u       $tmp, $x50,$out
-        vxor           $out5,$in5,$twk5
-       addi            $out,$out,0x60
-
-       mtctr           $rounds
-       beq             Loop_xts_enc6x          # did $len-=96 borrow?
-
-       addic.          $len,$len,0x60
-       beq             Lxts_enc6x_zero
-       cmpwi           $len,0x20
-       blt             Lxts_enc6x_one
-       nop
-       beq             Lxts_enc6x_two
-       cmpwi           $len,0x40
-       blt             Lxts_enc6x_three
-       nop
-       beq             Lxts_enc6x_four
-
-Lxts_enc6x_five:
-       vxor            $out0,$in1,$twk0
-       vxor            $out1,$in2,$twk1
-       vxor            $out2,$in3,$twk2
-       vxor            $out3,$in4,$twk3
-       vxor            $out4,$in5,$twk4
-
-       bl              _aesp8_xts_enc5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk5             # unused tweak
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-       vxor            $tmp,$out4,$twk5        # last block prep for stealing
-       le?vperm        $out4,$out4,$out4,$leperm
-       stvx_u          $out3,$x30,$out
-       stvx_u          $out4,$x40,$out
-       addi            $out,$out,0x50
-       bne             Lxts_enc6x_steal
-       b               Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_four:
-       vxor            $out0,$in2,$twk0
-       vxor            $out1,$in3,$twk1
-       vxor            $out2,$in4,$twk2
-       vxor            $out3,$in5,$twk3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_enc5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk4             # unused tweak
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       vxor            $tmp,$out3,$twk4        # last block prep for stealing
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-       stvx_u          $out3,$x30,$out
-       addi            $out,$out,0x40
-       bne             Lxts_enc6x_steal
-       b               Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_three:
-       vxor            $out0,$in3,$twk0
-       vxor            $out1,$in4,$twk1
-       vxor            $out2,$in5,$twk2
-       vxor            $out3,$out3,$out3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_enc5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk3             # unused tweak
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       vxor            $tmp,$out2,$twk3        # last block prep for stealing
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       stvx_u          $out2,$x20,$out
-       addi            $out,$out,0x30
-       bne             Lxts_enc6x_steal
-       b               Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_two:
-       vxor            $out0,$in4,$twk0
-       vxor            $out1,$in5,$twk1
-       vxor            $out2,$out2,$out2
-       vxor            $out3,$out3,$out3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_enc5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk2             # unused tweak
-       vxor            $tmp,$out1,$twk2        # last block prep for stealing
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       stvx_u          $out1,$x10,$out
-       addi            $out,$out,0x20
-       bne             Lxts_enc6x_steal
-       b               Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_one:
-       vxor            $out0,$in5,$twk0
-       nop
-Loop_xts_enc1x:
-       vcipher         $out0,$out0,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vcipher         $out0,$out0,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_xts_enc1x
-
-       add             $inp,$inp,$taillen
-       cmpwi           $taillen,0
-       vcipher         $out0,$out0,v24
-
-       subi            $inp,$inp,16
-       vcipher         $out0,$out0,v25
-
-       lvsr            $inpperm,0,$taillen
-       vcipher         $out0,$out0,v26
-
-       lvx_u           $in0,0,$inp
-       vcipher         $out0,$out0,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vcipher         $out0,$out0,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-
-       vcipher         $out0,$out0,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vxor           $twk0,$twk0,v31
-
-       le?vperm        $in0,$in0,$in0,$leperm
-       vcipher         $out0,$out0,v30
-
-       vperm           $in0,$in0,$in0,$inpperm
-       vcipherlast     $out0,$out0,$twk0
-
-       vmr             $twk0,$twk1             # unused tweak
-       vxor            $tmp,$out0,$twk1        # last block prep for stealing
-       le?vperm        $out0,$out0,$out0,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       addi            $out,$out,0x10
-       bne             Lxts_enc6x_steal
-       b               Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_zero:
-       cmpwi           $taillen,0
-       beq             Lxts_enc6x_done
-
-       add             $inp,$inp,$taillen
-       subi            $inp,$inp,16
-       lvx_u           $in0,0,$inp
-       lvsr            $inpperm,0,$taillen     # $in5 is no more
-       le?vperm        $in0,$in0,$in0,$leperm
-       vperm           $in0,$in0,$in0,$inpperm
-       vxor            $tmp,$tmp,$twk0
-Lxts_enc6x_steal:
-       vxor            $in0,$in0,$twk0
-       vxor            $out0,$out0,$out0
-       vspltisb        $out1,-1
-       vperm           $out0,$out0,$out1,$inpperm
-       vsel            $out0,$in0,$tmp,$out0   # $tmp is last block, remember?
-
-       subi            r30,$out,17
-       subi            $out,$out,16
-       mtctr           $taillen
-Loop_xts_enc6x_steal:
-       lbzu            r0,1(r30)
-       stb             r0,16(r30)
-       bdnz            Loop_xts_enc6x_steal
-
-       li              $taillen,0
-       mtctr           $rounds
-       b               Loop_xts_enc1x          # one more time...
-
-.align 4
-Lxts_enc6x_done:
-       ${UCMP}i        $ivp,0
-       beq             Lxts_enc6x_ret
-
-       vxor            $tweak,$twk0,$rndkey0
-       le?vperm        $tweak,$tweak,$tweak,$leperm
-       stvx_u          $tweak,0,$ivp
-
-Lxts_enc6x_ret:
-       mtlr            r11
-       li              r10,`$FRAME+15`
-       li              r11,`$FRAME+31`
-       stvx            $seven,r10,$sp          # wipe copies of round keys
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-
-       mtspr           256,$vrsave
-       lvx             v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       lvx             v21,r11,$sp
-       addi            r11,r11,32
-       lvx             v22,r10,$sp
-       addi            r10,r10,32
-       lvx             v23,r11,$sp
-       addi            r11,r11,32
-       lvx             v24,r10,$sp
-       addi            r10,r10,32
-       lvx             v25,r11,$sp
-       addi            r11,r11,32
-       lvx             v26,r10,$sp
-       addi            r10,r10,32
-       lvx             v27,r11,$sp
-       addi            r11,r11,32
-       lvx             v28,r10,$sp
-       addi            r10,r10,32
-       lvx             v29,r11,$sp
-       addi            r11,r11,32
-       lvx             v30,r10,$sp
-       lvx             v31,r11,$sp
-       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-       blr
-       .long           0
-       .byte           0,12,0x04,1,0x80,6,6,0
-       .long           0
-
-.align 5
-_aesp8_xts_enc5x:
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-       vcipher         $out4,$out4,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-       vcipher         $out4,$out4,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            _aesp8_xts_enc5x
-
-       add             $inp,$inp,$taillen
-       cmpwi           $taillen,0
-       vcipher         $out0,$out0,v24
-       vcipher         $out1,$out1,v24
-       vcipher         $out2,$out2,v24
-       vcipher         $out3,$out3,v24
-       vcipher         $out4,$out4,v24
-
-       subi            $inp,$inp,16
-       vcipher         $out0,$out0,v25
-       vcipher         $out1,$out1,v25
-       vcipher         $out2,$out2,v25
-       vcipher         $out3,$out3,v25
-       vcipher         $out4,$out4,v25
-        vxor           $twk0,$twk0,v31
-
-       vcipher         $out0,$out0,v26
-       lvsr            $inpperm,r0,$taillen    # $in5 is no more
-       vcipher         $out1,$out1,v26
-       vcipher         $out2,$out2,v26
-       vcipher         $out3,$out3,v26
-       vcipher         $out4,$out4,v26
-        vxor           $in1,$twk1,v31
-
-       vcipher         $out0,$out0,v27
-       lvx_u           $in0,0,$inp
-       vcipher         $out1,$out1,v27
-       vcipher         $out2,$out2,v27
-       vcipher         $out3,$out3,v27
-       vcipher         $out4,$out4,v27
-        vxor           $in2,$twk2,v31
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vcipher         $out0,$out0,v28
-       vcipher         $out1,$out1,v28
-       vcipher         $out2,$out2,v28
-       vcipher         $out3,$out3,v28
-       vcipher         $out4,$out4,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-        vxor           $in3,$twk3,v31
-
-       vcipher         $out0,$out0,v29
-       le?vperm        $in0,$in0,$in0,$leperm
-       vcipher         $out1,$out1,v29
-       vcipher         $out2,$out2,v29
-       vcipher         $out3,$out3,v29
-       vcipher         $out4,$out4,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vxor           $in4,$twk4,v31
-
-       vcipher         $out0,$out0,v30
-       vperm           $in0,$in0,$in0,$inpperm
-       vcipher         $out1,$out1,v30
-       vcipher         $out2,$out2,v30
-       vcipher         $out3,$out3,v30
-       vcipher         $out4,$out4,v30
-
-       vcipherlast     $out0,$out0,$twk0
-       vcipherlast     $out1,$out1,$in1
-       vcipherlast     $out2,$out2,$in2
-       vcipherlast     $out3,$out3,$in3
-       vcipherlast     $out4,$out4,$in4
-       blr
-        .long          0
-        .byte          0,12,0x14,0,0,0,0,0
-
-.align 5
-_aesp8_xts_decrypt6x:
-       $STU            $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
-       mflr            r11
-       li              r7,`$FRAME+8*16+15`
-       li              r3,`$FRAME+8*16+31`
-       $PUSH           r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
-       stvx            v20,r7,$sp              # ABI says so
-       addi            r7,r7,32
-       stvx            v21,r3,$sp
-       addi            r3,r3,32
-       stvx            v22,r7,$sp
-       addi            r7,r7,32
-       stvx            v23,r3,$sp
-       addi            r3,r3,32
-       stvx            v24,r7,$sp
-       addi            r7,r7,32
-       stvx            v25,r3,$sp
-       addi            r3,r3,32
-       stvx            v26,r7,$sp
-       addi            r7,r7,32
-       stvx            v27,r3,$sp
-       addi            r3,r3,32
-       stvx            v28,r7,$sp
-       addi            r7,r7,32
-       stvx            v29,r3,$sp
-       addi            r3,r3,32
-       stvx            v30,r7,$sp
-       stvx            v31,r3,$sp
-       li              r0,-1
-       stw             $vrsave,`$FRAME+21*16-4`($sp)   # save vrsave
-       li              $x10,0x10
-       $PUSH           r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       li              $x20,0x20
-       $PUSH           r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       li              $x30,0x30
-       $PUSH           r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       li              $x40,0x40
-       $PUSH           r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       li              $x50,0x50
-       $PUSH           r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       li              $x60,0x60
-       $PUSH           r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       li              $x70,0x70
-       mtspr           256,r0
-
-       subi            $rounds,$rounds,3       # -4 in total
-
-       lvx             $rndkey0,$x00,$key1     # load key schedule
-       lvx             v30,$x10,$key1
-       addi            $key1,$key1,0x20
-       lvx             v31,$x00,$key1
-       ?vperm          $rndkey0,$rndkey0,v30,$keyperm
-       addi            $key_,$sp,$FRAME+15
-       mtctr           $rounds
-
-Load_xts_dec_key:
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v30,$x10,$key1
-       addi            $key1,$key1,0x20
-       stvx            v24,$x00,$key_          # off-load round[1]
-       ?vperm          v25,v31,v30,$keyperm
-       lvx             v31,$x00,$key1
-       stvx            v25,$x10,$key_          # off-load round[2]
-       addi            $key_,$key_,0x20
-       bdnz            Load_xts_dec_key
-
-       lvx             v26,$x10,$key1
-       ?vperm          v24,v30,v31,$keyperm
-       lvx             v27,$x20,$key1
-       stvx            v24,$x00,$key_          # off-load round[3]
-       ?vperm          v25,v31,v26,$keyperm
-       lvx             v28,$x30,$key1
-       stvx            v25,$x10,$key_          # off-load round[4]
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       ?vperm          v26,v26,v27,$keyperm
-       lvx             v29,$x40,$key1
-       ?vperm          v27,v27,v28,$keyperm
-       lvx             v30,$x50,$key1
-       ?vperm          v28,v28,v29,$keyperm
-       lvx             v31,$x60,$key1
-       ?vperm          v29,v29,v30,$keyperm
-       lvx             $twk5,$x70,$key1        # borrow $twk5
-       ?vperm          v30,v30,v31,$keyperm
-       lvx             v24,$x00,$key_          # pre-load round[1]
-       ?vperm          v31,v31,$twk5,$keyperm
-       lvx             v25,$x10,$key_          # pre-load round[2]
-
-        vperm          $in0,$inout,$inptail,$inpperm
-        subi           $inp,$inp,31            # undo "caller"
-       vxor            $twk0,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out0,$in0,$twk0
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in1,$x10,$inp
-       vxor            $twk1,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in1,$in1,$in1,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out1,$in1,$twk1
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in2,$x20,$inp
-        andi.          $taillen,$len,15
-       vxor            $twk2,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in2,$in2,$in2,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out2,$in2,$twk2
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in3,$x30,$inp
-        sub            $len,$len,$taillen
-       vxor            $twk3,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in3,$in3,$in3,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out3,$in3,$twk3
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in4,$x40,$inp
-        subi           $len,$len,0x60
-       vxor            $twk4,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in4,$in4,$in4,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out4,$in4,$twk4
-       vxor            $tweak,$tweak,$tmp
-
-        lvx_u          $in5,$x50,$inp
-        addi           $inp,$inp,0x60
-       vxor            $twk5,$tweak,$rndkey0
-       vsrab           $tmp,$tweak,$seven      # next tweak value
-       vaddubm         $tweak,$tweak,$tweak
-       vsldoi          $tmp,$tmp,$tmp,15
-        le?vperm       $in5,$in5,$in5,$leperm
-       vand            $tmp,$tmp,$eighty7
-        vxor           $out5,$in5,$twk5
-       vxor            $tweak,$tweak,$tmp
-
-       vxor            v31,v31,$rndkey0
-       mtctr           $rounds
-       b               Loop_xts_dec6x
-
-.align 5
-Loop_xts_dec6x:
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_xts_dec6x
-
-       subic           $len,$len,96            # $len-=96
-        vxor           $in0,$twk0,v31          # xor with last round key
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk0,$tweak,$rndkey0
-        vaddubm        $tweak,$tweak,$tweak
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-        vsldoi         $tmp,$tmp,$tmp,15
-       vncipher        $out4,$out4,v24
-       vncipher        $out5,$out5,v24
-
-       subfe.          r0,r0,r0                # borrow?-1:0
-        vand           $tmp,$tmp,$eighty7
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-        vxor           $tweak,$tweak,$tmp
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-        vxor           $in1,$twk1,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk1,$tweak,$rndkey0
-       vncipher        $out4,$out4,v25
-       vncipher        $out5,$out5,v25
-
-       and             r0,r0,$len
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vncipher        $out0,$out0,v26
-       vncipher        $out1,$out1,v26
-        vand           $tmp,$tmp,$eighty7
-       vncipher        $out2,$out2,v26
-       vncipher        $out3,$out3,v26
-        vxor           $tweak,$tweak,$tmp
-       vncipher        $out4,$out4,v26
-       vncipher        $out5,$out5,v26
-
-       add             $inp,$inp,r0            # $inp is adjusted in such
-                                               # way that at exit from the
-                                               # loop inX-in5 are loaded
-                                               # with last "words"
-        vxor           $in2,$twk2,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk2,$tweak,$rndkey0
-        vaddubm        $tweak,$tweak,$tweak
-       vncipher        $out0,$out0,v27
-       vncipher        $out1,$out1,v27
-        vsldoi         $tmp,$tmp,$tmp,15
-       vncipher        $out2,$out2,v27
-       vncipher        $out3,$out3,v27
-        vand           $tmp,$tmp,$eighty7
-       vncipher        $out4,$out4,v27
-       vncipher        $out5,$out5,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-        vxor           $tweak,$tweak,$tmp
-       vncipher        $out0,$out0,v28
-       vncipher        $out1,$out1,v28
-        vxor           $in3,$twk3,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk3,$tweak,$rndkey0
-       vncipher        $out2,$out2,v28
-       vncipher        $out3,$out3,v28
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vncipher        $out4,$out4,v28
-       vncipher        $out5,$out5,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-        vand           $tmp,$tmp,$eighty7
-
-       vncipher        $out0,$out0,v29
-       vncipher        $out1,$out1,v29
-        vxor           $tweak,$tweak,$tmp
-       vncipher        $out2,$out2,v29
-       vncipher        $out3,$out3,v29
-        vxor           $in4,$twk4,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk4,$tweak,$rndkey0
-       vncipher        $out4,$out4,v29
-       vncipher        $out5,$out5,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-
-       vncipher        $out0,$out0,v30
-       vncipher        $out1,$out1,v30
-        vand           $tmp,$tmp,$eighty7
-       vncipher        $out2,$out2,v30
-       vncipher        $out3,$out3,v30
-        vxor           $tweak,$tweak,$tmp
-       vncipher        $out4,$out4,v30
-       vncipher        $out5,$out5,v30
-        vxor           $in5,$twk5,v31
-        vsrab          $tmp,$tweak,$seven      # next tweak value
-        vxor           $twk5,$tweak,$rndkey0
-
-       vncipherlast    $out0,$out0,$in0
-        lvx_u          $in0,$x00,$inp          # load next input block
-        vaddubm        $tweak,$tweak,$tweak
-        vsldoi         $tmp,$tmp,$tmp,15
-       vncipherlast    $out1,$out1,$in1
-        lvx_u          $in1,$x10,$inp
-       vncipherlast    $out2,$out2,$in2
-        le?vperm       $in0,$in0,$in0,$leperm
-        lvx_u          $in2,$x20,$inp
-        vand           $tmp,$tmp,$eighty7
-       vncipherlast    $out3,$out3,$in3
-        le?vperm       $in1,$in1,$in1,$leperm
-        lvx_u          $in3,$x30,$inp
-       vncipherlast    $out4,$out4,$in4
-        le?vperm       $in2,$in2,$in2,$leperm
-        lvx_u          $in4,$x40,$inp
-        vxor           $tweak,$tweak,$tmp
-       vncipherlast    $out5,$out5,$in5
-        le?vperm       $in3,$in3,$in3,$leperm
-        lvx_u          $in5,$x50,$inp
-        addi           $inp,$inp,0x60
-        le?vperm       $in4,$in4,$in4,$leperm
-        le?vperm       $in5,$in5,$in5,$leperm
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-        vxor           $out0,$in0,$twk0
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-        vxor           $out1,$in1,$twk1
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-        vxor           $out2,$in2,$twk2
-       le?vperm        $out4,$out4,$out4,$leperm
-       stvx_u          $out3,$x30,$out
-        vxor           $out3,$in3,$twk3
-       le?vperm        $out5,$out5,$out5,$leperm
-       stvx_u          $out4,$x40,$out
-        vxor           $out4,$in4,$twk4
-       stvx_u          $out5,$x50,$out
-        vxor           $out5,$in5,$twk5
-       addi            $out,$out,0x60
-
-       mtctr           $rounds
-       beq             Loop_xts_dec6x          # did $len-=96 borrow?
-
-       addic.          $len,$len,0x60
-       beq             Lxts_dec6x_zero
-       cmpwi           $len,0x20
-       blt             Lxts_dec6x_one
-       nop
-       beq             Lxts_dec6x_two
-       cmpwi           $len,0x40
-       blt             Lxts_dec6x_three
-       nop
-       beq             Lxts_dec6x_four
-
-Lxts_dec6x_five:
-       vxor            $out0,$in1,$twk0
-       vxor            $out1,$in2,$twk1
-       vxor            $out2,$in3,$twk2
-       vxor            $out3,$in4,$twk3
-       vxor            $out4,$in5,$twk4
-
-       bl              _aesp8_xts_dec5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk5             # unused tweak
-       vxor            $twk1,$tweak,$rndkey0
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       vxor            $out0,$in0,$twk1
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-       le?vperm        $out4,$out4,$out4,$leperm
-       stvx_u          $out3,$x30,$out
-       stvx_u          $out4,$x40,$out
-       addi            $out,$out,0x50
-       bne             Lxts_dec6x_steal
-       b               Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_four:
-       vxor            $out0,$in2,$twk0
-       vxor            $out1,$in3,$twk1
-       vxor            $out2,$in4,$twk2
-       vxor            $out3,$in5,$twk3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_dec5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk4             # unused tweak
-       vmr             $twk1,$twk5
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       vxor            $out0,$in0,$twk5
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       le?vperm        $out3,$out3,$out3,$leperm
-       stvx_u          $out2,$x20,$out
-       stvx_u          $out3,$x30,$out
-       addi            $out,$out,0x40
-       bne             Lxts_dec6x_steal
-       b               Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_three:
-       vxor            $out0,$in3,$twk0
-       vxor            $out1,$in4,$twk1
-       vxor            $out2,$in5,$twk2
-       vxor            $out3,$out3,$out3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_dec5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk3             # unused tweak
-       vmr             $twk1,$twk4
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       vxor            $out0,$in0,$twk4
-       le?vperm        $out2,$out2,$out2,$leperm
-       stvx_u          $out1,$x10,$out
-       stvx_u          $out2,$x20,$out
-       addi            $out,$out,0x30
-       bne             Lxts_dec6x_steal
-       b               Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_two:
-       vxor            $out0,$in4,$twk0
-       vxor            $out1,$in5,$twk1
-       vxor            $out2,$out2,$out2
-       vxor            $out3,$out3,$out3
-       vxor            $out4,$out4,$out4
-
-       bl              _aesp8_xts_dec5x
-
-       le?vperm        $out0,$out0,$out0,$leperm
-       vmr             $twk0,$twk2             # unused tweak
-       vmr             $twk1,$twk3
-       le?vperm        $out1,$out1,$out1,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       vxor            $out0,$in0,$twk3
-       stvx_u          $out1,$x10,$out
-       addi            $out,$out,0x20
-       bne             Lxts_dec6x_steal
-       b               Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_one:
-       vxor            $out0,$in5,$twk0
-       nop
-Loop_xts_dec1x:
-       vncipher        $out0,$out0,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out0,$out0,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Loop_xts_dec1x
-
-       subi            r0,$taillen,1
-       vncipher        $out0,$out0,v24
-
-       andi.           r0,r0,16
-       cmpwi           $taillen,0
-       vncipher        $out0,$out0,v25
-
-       sub             $inp,$inp,r0
-       vncipher        $out0,$out0,v26
-
-       lvx_u           $in0,0,$inp
-       vncipher        $out0,$out0,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vncipher        $out0,$out0,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-
-       vncipher        $out0,$out0,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vxor           $twk0,$twk0,v31
-
-       le?vperm        $in0,$in0,$in0,$leperm
-       vncipher        $out0,$out0,v30
-
-       mtctr           $rounds
-       vncipherlast    $out0,$out0,$twk0
-
-       vmr             $twk0,$twk1             # unused tweak
-       vmr             $twk1,$twk2
-       le?vperm        $out0,$out0,$out0,$leperm
-       stvx_u          $out0,$x00,$out         # store output
-       addi            $out,$out,0x10
-       vxor            $out0,$in0,$twk2
-       bne             Lxts_dec6x_steal
-       b               Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_zero:
-       cmpwi           $taillen,0
-       beq             Lxts_dec6x_done
-
-       lvx_u           $in0,0,$inp
-       le?vperm        $in0,$in0,$in0,$leperm
-       vxor            $out0,$in0,$twk1
-Lxts_dec6x_steal:
-       vncipher        $out0,$out0,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out0,$out0,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            Lxts_dec6x_steal
-
-       add             $inp,$inp,$taillen
-       vncipher        $out0,$out0,v24
-
-       cmpwi           $taillen,0
-       vncipher        $out0,$out0,v25
-
-       lvx_u           $in0,0,$inp
-       vncipher        $out0,$out0,v26
-
-       lvsr            $inpperm,0,$taillen     # $in5 is no more
-       vncipher        $out0,$out0,v27
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vncipher        $out0,$out0,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-
-       vncipher        $out0,$out0,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vxor           $twk1,$twk1,v31
-
-       le?vperm        $in0,$in0,$in0,$leperm
-       vncipher        $out0,$out0,v30
-
-       vperm           $in0,$in0,$in0,$inpperm
-       vncipherlast    $tmp,$out0,$twk1
-
-       le?vperm        $out0,$tmp,$tmp,$leperm
-       le?stvx_u       $out0,0,$out
-       be?stvx_u       $tmp,0,$out
-
-       vxor            $out0,$out0,$out0
-       vspltisb        $out1,-1
-       vperm           $out0,$out0,$out1,$inpperm
-       vsel            $out0,$in0,$tmp,$out0
-       vxor            $out0,$out0,$twk0
-
-       subi            r30,$out,1
-       mtctr           $taillen
-Loop_xts_dec6x_steal:
-       lbzu            r0,1(r30)
-       stb             r0,16(r30)
-       bdnz            Loop_xts_dec6x_steal
-
-       li              $taillen,0
-       mtctr           $rounds
-       b               Loop_xts_dec1x          # one more time...
-
-.align 4
-Lxts_dec6x_done:
-       ${UCMP}i        $ivp,0
-       beq             Lxts_dec6x_ret
-
-       vxor            $tweak,$twk0,$rndkey0
-       le?vperm        $tweak,$tweak,$tweak,$leperm
-       stvx_u          $tweak,0,$ivp
-
-Lxts_dec6x_ret:
-       mtlr            r11
-       li              r10,`$FRAME+15`
-       li              r11,`$FRAME+31`
-       stvx            $seven,r10,$sp          # wipe copies of round keys
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-       stvx            $seven,r10,$sp
-       addi            r10,r10,32
-       stvx            $seven,r11,$sp
-       addi            r11,r11,32
-
-       mtspr           256,$vrsave
-       lvx             v20,r10,$sp             # ABI says so
-       addi            r10,r10,32
-       lvx             v21,r11,$sp
-       addi            r11,r11,32
-       lvx             v22,r10,$sp
-       addi            r10,r10,32
-       lvx             v23,r11,$sp
-       addi            r11,r11,32
-       lvx             v24,r10,$sp
-       addi            r10,r10,32
-       lvx             v25,r11,$sp
-       addi            r11,r11,32
-       lvx             v26,r10,$sp
-       addi            r10,r10,32
-       lvx             v27,r11,$sp
-       addi            r11,r11,32
-       lvx             v28,r10,$sp
-       addi            r10,r10,32
-       lvx             v29,r11,$sp
-       addi            r11,r11,32
-       lvx             v30,r10,$sp
-       lvx             v31,r11,$sp
-       $POP            r26,`$FRAME+21*16+0*$SIZE_T`($sp)
-       $POP            r27,`$FRAME+21*16+1*$SIZE_T`($sp)
-       $POP            r28,`$FRAME+21*16+2*$SIZE_T`($sp)
-       $POP            r29,`$FRAME+21*16+3*$SIZE_T`($sp)
-       $POP            r30,`$FRAME+21*16+4*$SIZE_T`($sp)
-       $POP            r31,`$FRAME+21*16+5*$SIZE_T`($sp)
-       addi            $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
-       blr
-       .long           0
-       .byte           0,12,0x04,1,0x80,6,6,0
-       .long           0
-
-.align 5
-_aesp8_xts_dec5x:
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-       lvx             v24,$x20,$key_          # round[3]
-       addi            $key_,$key_,0x20
-
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-       lvx             v25,$x10,$key_          # round[4]
-       bdnz            _aesp8_xts_dec5x
-
-       subi            r0,$taillen,1
-       vncipher        $out0,$out0,v24
-       vncipher        $out1,$out1,v24
-       vncipher        $out2,$out2,v24
-       vncipher        $out3,$out3,v24
-       vncipher        $out4,$out4,v24
-
-       andi.           r0,r0,16
-       cmpwi           $taillen,0
-       vncipher        $out0,$out0,v25
-       vncipher        $out1,$out1,v25
-       vncipher        $out2,$out2,v25
-       vncipher        $out3,$out3,v25
-       vncipher        $out4,$out4,v25
-        vxor           $twk0,$twk0,v31
-
-       sub             $inp,$inp,r0
-       vncipher        $out0,$out0,v26
-       vncipher        $out1,$out1,v26
-       vncipher        $out2,$out2,v26
-       vncipher        $out3,$out3,v26
-       vncipher        $out4,$out4,v26
-        vxor           $in1,$twk1,v31
-
-       vncipher        $out0,$out0,v27
-       lvx_u           $in0,0,$inp
-       vncipher        $out1,$out1,v27
-       vncipher        $out2,$out2,v27
-       vncipher        $out3,$out3,v27
-       vncipher        $out4,$out4,v27
-        vxor           $in2,$twk2,v31
-
-       addi            $key_,$sp,$FRAME+15     # rewind $key_
-       vncipher        $out0,$out0,v28
-       vncipher        $out1,$out1,v28
-       vncipher        $out2,$out2,v28
-       vncipher        $out3,$out3,v28
-       vncipher        $out4,$out4,v28
-       lvx             v24,$x00,$key_          # re-pre-load round[1]
-        vxor           $in3,$twk3,v31
-
-       vncipher        $out0,$out0,v29
-       le?vperm        $in0,$in0,$in0,$leperm
-       vncipher        $out1,$out1,v29
-       vncipher        $out2,$out2,v29
-       vncipher        $out3,$out3,v29
-       vncipher        $out4,$out4,v29
-       lvx             v25,$x10,$key_          # re-pre-load round[2]
-        vxor           $in4,$twk4,v31
-
-       vncipher        $out0,$out0,v30
-       vncipher        $out1,$out1,v30
-       vncipher        $out2,$out2,v30
-       vncipher        $out3,$out3,v30
-       vncipher        $out4,$out4,v30
-
-       vncipherlast    $out0,$out0,$twk0
-       vncipherlast    $out1,$out1,$in1
-       vncipherlast    $out2,$out2,$in2
-       vncipherlast    $out3,$out3,$in3
-       vncipherlast    $out4,$out4,$in4
-       mtctr           $rounds
-       blr
-        .long          0
-        .byte          0,12,0x14,0,0,0,0,0
-___
-}}     }}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
-        s/\`([^\`]*)\`/eval($1)/geo;
-
-       # constants table endian-specific conversion
-       if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
-           my $conv=$3;
-           my @bytes=();
-
-           # convert to endian-agnostic format
-           if ($1 eq "long") {
-             foreach (split(/,\s*/,$2)) {
-               my $l = /^0/?oct:int;
-               push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
-             }
-           } else {
-               @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
-           }
-
-           # little-endian conversion
-           if ($flavour =~ /le$/o) {
-               SWITCH: for($conv)  {
-                   /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
-                   /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
-               }
-           }
-
-           #emit
-           print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
-           next;
-       }
-       $consts=0 if (m/Lconsts:/o);    # end of table
-
-       # instructions prefixed with '?' are endian-specific and need
-       # to be adjusted accordingly...
-       if ($flavour =~ /le$/o) {       # little-endian
-           s/le\?//o           or
-           s/be\?/#be#/o       or
-           s/\?lvsr/lvsl/o     or
-           s/\?lvsl/lvsr/o     or
-           s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
-           s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
-           s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
-       } else {                        # big-endian
-           s/le\?/#le#/o       or
-           s/be\?//o           or
-           s/\?([a-z]+)/$1/o;
-       }
-
-        print $_,"\n";
-}
-
-close STDOUT;