1 /*
   2  * Copyright (c) 2016, Linaro Ltd. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24         .global _Copy_conjoint_words
  25         .global _Copy_disjoint_words
  26 
  27 s       .req    x0
  28 d       .req    x1
  29 count   .req    x2
  30 t0      .req    x3
  31 t1      .req    x4
  32 t2      .req    x5
  33 t3      .req    x6
  34 t4      .req    x7
  35 t5      .req    x8
  36 t6      .req    x9
  37 t7      .req    x10
  38 
  39         .align  6
  40 _Copy_disjoint_words:
  41         // Ensure 2 word aligned
  42         tbz     s, #3, fwd_copy_aligned
  43         ldr     t0, [s], #8
  44         str     t0, [d], #8
  45         sub     count, count, #1
  46 
  47 fwd_copy_aligned:
  48         // Bias s & d so we only pre index on the last copy
  49         sub     s, s, #16
  50         sub     d, d, #16
  51 
  52         ldp     t0, t1, [s, #16]
  53         ldp     t2, t3, [s, #32]
  54         ldp     t4, t5, [s, #48]
  55         ldp     t6, t7, [s, #64]!
  56 
  57         subs    count, count, #16
  58         blo     fwd_copy_drain
  59 
  60 fwd_copy_again:
  61         prfm    pldl1keep, [s, #256]
  62         stp     t0, t1, [d, #16]
  63         ldp     t0, t1, [s, #16]
  64         stp     t2, t3, [d, #32]
  65         ldp     t2, t3, [s, #32]
  66         stp     t4, t5, [d, #48]
  67         ldp     t4, t5, [s, #48]
  68         stp     t6, t7, [d, #64]!
  69         ldp     t6, t7, [s, #64]!
  70         subs    count, count, #8
  71         bhs     fwd_copy_again
  72 
  73 fwd_copy_drain:
  74         stp     t0, t1, [d, #16]
  75         stp     t2, t3, [d, #32]
  76         stp     t4, t5, [d, #48]
  77         stp     t6, t7, [d, #64]!
  78 
  79         // count is now -8..-1 for 0..7 words to copy
  80         adr     t0, 0f
  81         add     t0, t0, count, lsl #5
  82         br      t0
  83 
  84         .align  5
  85         ret                             // -8 == 0 words
  86         .align  5
  87         ldr     t0, [s, #16]            // -7 == 1 word
  88         str     t0, [d, #16]
  89         ret
  90         .align  5
  91         ldp     t0, t1, [s, #16]        // -6 = 2 words
  92         stp     t0, t1, [d, #16]
  93         ret
  94         .align  5
  95         ldp     t0, t1, [s, #16]        // -5 = 3 words
  96         ldr     t2, [s, #32]
  97         stp     t0, t1, [d, #16]
  98         str     t2, [d, #32]
  99         ret
 100         .align  5
 101         ldp     t0, t1, [s, #16]        // -4 = 4 words
 102         ldp     t2, t3, [s, #32]
 103         stp     t0, t1, [d, #16]
 104         stp     t2, t3, [d, #32]
 105         ret
 106         .align  5
 107         ldp     t0, t1, [s, #16]        // -3 = 5 words
 108         ldp     t2, t3, [s, #32]
 109         ldr     t4, [s, #48]
 110         stp     t0, t1, [d, #16]
 111         stp     t2, t3, [d, #32]
 112         str     t4, [d, #48]
 113         ret
 114         .align  5
 115         ldp     t0, t1, [s, #16]        // -2 = 6 words
 116         ldp     t2, t3, [s, #32]
 117         ldp     t4, t5, [s, #48]
 118         stp     t0, t1, [d, #16]
 119         stp     t2, t3, [d, #32]
 120         stp     t4, t5, [d, #48]
 121         ret
 122         .align  5
 123         ldp     t0, t1, [s, #16]        // -1 = 7 words
 124         ldp     t2, t3, [s, #32]
 125         ldp     t4, t5, [s, #48]
 126         ldr     t6, [s, #64]
 127         stp     t0, t1, [d, #16]
 128         stp     t2, t3, [d, #32]
 129         stp     t4, t5, [d, #48]
 130         str     t6, [d, #64]
 131         // Is always aligned here, code for 7 words is one instruction
 132         // too large so it just falls through.
 133         .align  5
 134 0:
 135         ret
 136 
 137         .align  6
 138 _Copy_conjoint_words:
 139         sub     t0, d, s
 140         cmp     t0, count, lsl #3
 141         bhs     _Copy_disjoint_words
 142 
 143         add     s, s, count, lsl #3
 144         add     d, d, count, lsl #3
 145 
 146         // Ensure 2 word aligned
 147         tbz     s, #3, bwd_copy_aligned
 148         ldr     t0, [s, #-8]!
 149         str     t0, [d, #-8]!
 150         sub     count, count, #1
 151 
 152 bwd_copy_aligned:
 153         ldp     t0, t1, [s, #-16]
 154         ldp     t2, t3, [s, #-32]
 155         ldp     t4, t5, [s, #-48]
 156         ldp     t6, t7, [s, #-64]!
 157 
 158         subs    count, count, #16
 159         blo     bwd_copy_drain
 160 
 161 bwd_copy_again:
 162         prfm    pldl1keep, [s, #-256]
 163         stp     t0, t1, [d, #-16]
 164         ldp     t0, t1, [s, #-16]
 165         stp     t2, t3, [d, #-32]
 166         ldp     t2, t3, [s, #-32]
 167         stp     t4, t5, [d, #-48]
 168         ldp     t4, t5, [s, #-48]
 169         stp     t6, t7, [d, #-64]!
 170         ldp     t6, t7, [s, #-64]!
 171         subs    count, count, #8
 172         bhs     bwd_copy_again
 173 
 174 bwd_copy_drain:
 175         stp     t0, t1, [d, #-16]
 176         stp     t2, t3, [d, #-32]
 177         stp     t4, t5, [d, #-48]
 178         stp     t6, t7, [d, #-64]!
 179 
 180         // count is now -8..-1 for 0..7 words to copy
 181         adr     t0, 0f
 182         add     t0, t0, count, lsl #5
 183         br      t0
 184 
 185         .align  5
 186         ret                             // -8 == 0 words
 187         .align  5
 188         ldr     t0, [s, #-8]            // -7 == 1 word
 189         str     t0, [d, #-8]
 190         ret
 191         .align  5
 192         ldp     t0, t1, [s, #-16]       // -6 = 2 words
 193         stp     t0, t1, [d, #-16]
 194         ret
 195         .align  5
 196         ldp     t0, t1, [s, #-16]       // -5 = 3 words
 197         ldr     t2, [s, #-24]
 198         stp     t0, t1, [d, #-16]
 199         str     t2, [d, #-24]
 200         ret
 201         .align  5
 202         ldp     t0, t1, [s, #-16]       // -4 = 4 words
 203         ldp     t2, t3, [s, #-32]
 204         stp     t0, t1, [d, #-16]
 205         stp     t2, t3, [d, #-32]
 206         ret
 207         .align  5
 208         ldp     t0, t1, [s, #-16]       // -3 = 5 words
 209         ldp     t2, t3, [s, #-32]
 210         ldr     t4, [s, #-40]
 211         stp     t0, t1, [d, #-16]
 212         stp     t2, t3, [d, #-32]
 213         str     t4, [d, #-40]
 214         ret
 215         .align  5
 216         ldp     t0, t1, [s, #-16]       // -2 = 6 words
 217         ldp     t2, t3, [s, #-32]
 218         ldp     t4, t5, [s, #-48]
 219         stp     t0, t1, [d, #-16]
 220         stp     t2, t3, [d, #-32]
 221         stp     t4, t5, [d, #-48]
 222         ret
 223         .align  5
 224         ldp     t0, t1, [s, #-16]       // -1 = 7 words
 225         ldp     t2, t3, [s, #-32]
 226         ldp     t4, t5, [s, #-48]
 227         ldr     t6, [s, #-56]
 228         stp     t0, t1, [d, #-16]
 229         stp     t2, t3, [d, #-32]
 230         stp     t4, t5, [d, #-48]
 231         str     t6, [d, #-56]
 232         // Is always aligned here, code for 7 words is one instruction
 233         // too large so it just falls through.
 234         .align  5
 235 0:
 236         ret