1 /*
   2  * Copyright (c) 2016, Linaro Ltd. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24         .global _Copy_conjoint_words
  25         .global _Copy_disjoint_words
  26 
  27 s       .req    x0
  28 d       .req    x1
  29 count   .req    x2
  30 t0      .req    x3
  31 t1      .req    x4
  32 t2      .req    x5
  33 t3      .req    x6
  34 t4      .req    x7
  35 t5      .req    x8
  36 t6      .req    x9
  37 t7      .req    x10
  38 
  39         .align  6
  40 _Copy_disjoint_words:
  41         // Ensure 2 word aligned
  42         tbz     s, #3, fwd_copy_aligned
  43         ldr     t0, [s], #8
  44         str     t0, [d], #8
  45         sub     count, count, #1
  46 
  47 fwd_copy_aligned:
  48         ldp     t0, t1, [s, #0]
  49         ldp     t2, t3, [s, #16]
  50         ldp     t4, t5, [s, #32]
  51         ldp     t6, t7, [s, #48]!       // Source now biased by -16
  52 
  53         tbnz    d, #3, unal_fwd_copy
  54         sub     d, d, #16               // and bias dest
  55 
  56         subs    count, count, #16
  57         blo     fwd_copy_drain
  58 
  59 fwd_copy_again:
  60         prfm    pldl1keep, [s, #256]
  61         stp     t0, t1, [d, #16]
  62         ldp     t0, t1, [s, #16]
  63         stp     t2, t3, [d, #32]
  64         ldp     t2, t3, [s, #32]
  65         stp     t4, t5, [d, #48]
  66         ldp     t4, t5, [s, #48]
  67         stp     t6, t7, [d, #64]!
  68         ldp     t6, t7, [s, #64]!
  69         subs    count, count, #8
  70         bhs     fwd_copy_again
  71 
  72 fwd_copy_drain:
  73         stp     t0, t1, [d, #16]
  74         stp     t2, t3, [d, #32]
  75         stp     t4, t5, [d, #48]
  76         stp     t6, t7, [d, #64]!
  77 
  78         // count is now -8..-1 for 0..7 words to copy
  79         adr     t0, 0f
  80         add     t0, t0, count, lsl #5
  81         br      t0
  82 
  83         .align  5
  84         ret                             // -8 == 0 words
  85         .align  5
  86         ldr     t0, [s, #16]            // -7 == 1 word
  87         str     t0, [d, #16]
  88         ret
  89         .align  5
  90         ldp     t0, t1, [s, #16]        // -6 = 2 words
  91         stp     t0, t1, [d, #16]
  92         ret
  93         .align  5
  94         ldp     t0, t1, [s, #16]        // -5 = 3 words
  95         ldr     t2, [s, #32]
  96         stp     t0, t1, [d, #16]
  97         str     t2, [d, #32]
  98         ret
  99         .align  5
 100         ldp     t0, t1, [s, #16]        // -4 = 4 words
 101         ldp     t2, t3, [s, #32]
 102         stp     t0, t1, [d, #16]
 103         stp     t2, t3, [d, #32]
 104         ret
 105         .align  5
 106         ldp     t0, t1, [s, #16]        // -3 = 5 words
 107         ldp     t2, t3, [s, #32]
 108         ldr     t4, [s, #48]
 109         stp     t0, t1, [d, #16]
 110         stp     t2, t3, [d, #32]
 111         str     t4, [d, #48]
 112         ret
 113         .align  5
 114         ldp     t0, t1, [s, #16]        // -2 = 6 words
 115         ldp     t2, t3, [s, #32]
 116         ldp     t4, t5, [s, #48]
 117         stp     t0, t1, [d, #16]
 118         stp     t2, t3, [d, #32]
 119         stp     t4, t5, [d, #48]
 120         ret
 121         .align  5
 122         ldp     t0, t1, [s, #16]        // -1 = 7 words
 123         ldp     t2, t3, [s, #32]
 124         ldp     t4, t5, [s, #48]
 125         ldr     t6, [s, #64]
 126         stp     t0, t1, [d, #16]
 127         stp     t2, t3, [d, #32]
 128         stp     t4, t5, [d, #48]
 129         str     t6, [d, #64]
 130         // Is always aligned here, code for 7 words is one instruction
 131         // too large so it just falls through.
 132         .align  5
 133 0:
 134         ret
 135 
 136 unal_fwd_copy:
 137         // Bias dest so we only pre index on the last copy
 138         sub     d, d, #8
 139         subs    count, count, #16
 140         blo     unal_fwd_copy_drain
 141 
 142 unal_fwd_copy_again:
 143         prfm    pldl1keep, [s, #256]
 144         str     t0, [d, #8]
 145         stp     t1, t2, [d, #16]
 146         ldp     t0, t1, [s, #16]
 147         stp     t3, t4, [d, #32]
 148         ldp     t2, t3, [s, #32]
 149         stp     t5, t6, [d, #48]
 150         ldp     t4, t5, [s, #48]
 151         str     t7, [d, #64]!
 152         ldp     t6, t7, [s, #64]!
 153         subs    count, count, #8
 154         bhs     unal_fwd_copy_again
 155 
 156 unal_fwd_copy_drain:
 157         str     t0, [d, #8]
 158         stp     t1, t2, [d, #16]
 159         stp     t3, t4, [d, #32]
 160         stp     t5, t6, [d, #48]
 161         str     t7, [d, #64]!
 162 
 163         // count is now -8..-1 for 0..7 words to copy
 164         adr     t0, 0f
 165         add     t0, t0, count, lsl #5
 166         br      t0
 167 
 168         .align  5
 169         ret                             // -8 == 0 words
 170         .align  5
 171         ldr     t0, [s, #16]            // -7 == 1 word
 172         str     t0, [d, #8]
 173         ret
 174         .align  5
 175         ldp     t0, t1, [s, #16]        // -6 = 2 words
 176         str     t0, [d, #8]
 177         str     t1, [d, #16]
 178         ret
 179         .align  5
 180         ldp     t0, t1, [s, #16]        // -5 = 3 words
 181         ldr     t2, [s, #32]
 182         str     t0, [d, #8]
 183         stp     t1, t2, [d, #16]
 184         ret
 185         .align  5
 186         ldp     t0, t1, [s, #16]        // -4 = 4 words
 187         ldp     t2, t3, [s, #32]
 188         str     t0, [d, #8]
 189         stp     t1, t2, [d, #16]
 190         str     t3, [d, #32]
 191         ret
 192         .align  5
 193         ldp     t0, t1, [s, #16]        // -3 = 5 words
 194         ldp     t2, t3, [s, #32]
 195         ldr     t4, [s, #48]
 196         str     t0, [d, #8]
 197         stp     t1, t2, [d, #16]
 198         stp     t3, t4, [d, #32]
 199         ret
 200         .align  5
 201         ldp     t0, t1, [s, #16]        // -2 = 6 words
 202         ldp     t2, t3, [s, #32]
 203         ldp     t4, t5, [s, #48]
 204         str     t0, [d, #8]
 205         stp     t1, t2, [d, #16]
 206         stp     t3, t4, [d, #32]
 207         str     t5, [d, #48]
 208         ret
 209         .align  5
 210         ldp     t0, t1, [s, #16]        // -1 = 7 words
 211         ldp     t2, t3, [s, #32]
 212         ldp     t4, t5, [s, #48]
 213         ldr     t6, [s, #64]
 214         str     t0, [d, #8]
 215         stp     t1, t2, [d, #16]
 216         stp     t3, t4, [d, #32]
 217         stp     t5, t6, [d, #48]
 218         // Is always aligned here, code for 7 words is one instruction
 219         // too large so it just falls through.
 220         .align  5
 221 0:
 222         ret
 223 
 224         .align  6
 225 _Copy_conjoint_words:
 226         sub     t0, d, s
 227         cmp     t0, count, lsl #3
 228         bhs     _Copy_disjoint_words
 229 
 230         add     s, s, count, lsl #3
 231         add     d, d, count, lsl #3
 232 
 233         // Ensure 2 word aligned
 234         tbz     s, #3, bwd_copy_aligned
 235         ldr     t0, [s, #-8]!
 236         str     t0, [d, #-8]!
 237         sub     count, count, #1
 238 
 239 bwd_copy_aligned:
 240         ldp     t0, t1, [s, #-16]
 241         ldp     t2, t3, [s, #-32]
 242         ldp     t4, t5, [s, #-48]
 243         ldp     t6, t7, [s, #-64]!
 244 
 245         tbnz    d, #3, unal_bwd_copy
 246 
 247         subs    count, count, #16
 248         blo     bwd_copy_drain
 249 
 250 bwd_copy_again:
 251         prfum   pldl1keep, [s, #-256]
 252         stp     t0, t1, [d, #-16]
 253         ldp     t0, t1, [s, #-16]
 254         stp     t2, t3, [d, #-32]
 255         ldp     t2, t3, [s, #-32]
 256         stp     t4, t5, [d, #-48]
 257         ldp     t4, t5, [s, #-48]
 258         stp     t6, t7, [d, #-64]!
 259         ldp     t6, t7, [s, #-64]!
 260         subs    count, count, #8
 261         bhs     bwd_copy_again
 262 
 263 bwd_copy_drain:
 264         stp     t0, t1, [d, #-16]
 265         stp     t2, t3, [d, #-32]
 266         stp     t4, t5, [d, #-48]
 267         stp     t6, t7, [d, #-64]!
 268 
 269         // count is now -8..-1 for 0..7 words to copy
 270         adr     t0, 0f
 271         add     t0, t0, count, lsl #5
 272         br      t0
 273 
 274         .align  5
 275         ret                             // -8 == 0 words
 276         .align  5
 277         ldr     t0, [s, #-8]            // -7 == 1 word
 278         str     t0, [d, #-8]
 279         ret
 280         .align  5
 281         ldp     t0, t1, [s, #-16]       // -6 = 2 words
 282         stp     t0, t1, [d, #-16]
 283         ret
 284         .align  5
 285         ldp     t0, t1, [s, #-16]       // -5 = 3 words
 286         ldr     t2, [s, #-24]
 287         stp     t0, t1, [d, #-16]
 288         str     t2, [d, #-24]
 289         ret
 290         .align  5
 291         ldp     t0, t1, [s, #-16]       // -4 = 4 words
 292         ldp     t2, t3, [s, #-32]
 293         stp     t0, t1, [d, #-16]
 294         stp     t2, t3, [d, #-32]
 295         ret
 296         .align  5
 297         ldp     t0, t1, [s, #-16]       // -3 = 5 words
 298         ldp     t2, t3, [s, #-32]
 299         ldr     t4, [s, #-40]
 300         stp     t0, t1, [d, #-16]
 301         stp     t2, t3, [d, #-32]
 302         str     t4, [d, #-40]
 303         ret
 304         .align  5
 305         ldp     t0, t1, [s, #-16]       // -2 = 6 words
 306         ldp     t2, t3, [s, #-32]
 307         ldp     t4, t5, [s, #-48]
 308         stp     t0, t1, [d, #-16]
 309         stp     t2, t3, [d, #-32]
 310         stp     t4, t5, [d, #-48]
 311         ret
 312         .align  5
 313         ldp     t0, t1, [s, #-16]       // -1 = 7 words
 314         ldp     t2, t3, [s, #-32]
 315         ldp     t4, t5, [s, #-48]
 316         ldr     t6, [s, #-56]
 317         stp     t0, t1, [d, #-16]
 318         stp     t2, t3, [d, #-32]
 319         stp     t4, t5, [d, #-48]
 320         str     t6, [d, #-56]
 321         // Is always aligned here, code for 7 words is one instruction
 322         // too large so it just falls through.
 323         .align  5
 324 0:
 325         ret
 326 
 327 unal_bwd_copy:
 328         subs    count, count, #16
 329         blo     unal_bwd_copy_drain
 330 
 331 unal_bwd_copy_again:
 332         prfm    pldl1keep, [s, #-256]
 333         str     t1, [d, #-8]
 334         stp     t3, t0, [d, #-24]
 335         ldp     t0, t1, [s, #-16]
 336         stp     t5, t2, [d, #-40]
 337         ldp     t2, t3, [s, #-32]
 338         stp     t7, t4, [d, #-56]
 339         ldp     t4, t5, [s, #-48]
 340         str     t6, [d, #-64]!
 341         ldp     t6, t7, [s, #-64]!
 342         subs    count, count, #8
 343         bhs     unal_bwd_copy_again
 344 
 345 unal_bwd_copy_drain:
 346         str     t1, [d, #-8]
 347         stp     t3, t0, [d, #-24]
 348         stp     t5, t2, [d, #-40]
 349         stp     t7, t4, [d, #-56]
 350         str     t6, [d, #-64]!
 351 
 352         // count is now -8..-1 for 0..7 words to copy
 353         adr     t0, 0f
 354         add     t0, t0, count, lsl #5
 355         br      t0
 356 
 357         .align  5
 358         ret                             // -8 == 0 words
 359         .align  5
 360         ldr     t0, [s, #-8]            // -7 == 1 word
 361         str     t0, [d, #-8]
 362         ret
 363         .align  5
 364         ldp     t0, t1, [s, #-16]       // -6 = 2 words
 365         str     t1, [d, #-8]
 366         str     t0, [d, #-16]
 367         ret
 368         .align  5
 369         ldp     t0, t1, [s, #-16]       // -5 = 3 words
 370         ldr     t2, [s, #-24]
 371         str     t1, [d, #-8]
 372         stp     t2, t0, [d, #-24]
 373         ret
 374         .align  5
 375         ldp     t0, t1, [s, #-16]       // -4 = 4 words
 376         ldp     t2, t3, [s, #-32]
 377         str     t1, [d, #-8]
 378         stp     t3, t0, [d, #-24]
 379         str     t2, [d, #-32]
 380         ret
 381         .align  5
 382         ldp     t0, t1, [s, #-16]       // -3 = 5 words
 383         ldp     t2, t3, [s, #-32]
 384         ldr     t4, [s, #-40]
 385         str     t1, [d, #-8]
 386         stp     t3, t0, [d, #-24]
 387         stp     t4, t2, [d, #-40]
 388         ret
 389         .align  5
 390         ldp     t0, t1, [s, #-16]       // -2 = 6 words
 391         ldp     t2, t3, [s, #-32]
 392         ldp     t4, t5, [s, #-48]
 393         str     t1, [d, #-8]
 394         stp     t3, t0, [d, #-24]
 395         stp     t5, t2, [d, #-40]
 396         str     t4, [d, #-48]
 397         ret
 398         .align  5
 399         ldp     t0, t1, [s, #-16]       // -1 = 7 words
 400         ldp     t2, t3, [s, #-32]
 401         ldp     t4, t5, [s, #-48]
 402         ldr     t6, [s, #-56]
 403         str     t1, [d, #-8]
 404         stp     t3, t0, [d, #-24]
 405         stp     t5, t2, [d, #-40]
 406         stp     t6, t4, [d, #-56]
 407         // Is always aligned here, code for 7 words is one instruction
 408         // too large so it just falls through.
 409         .align  5
 410 0:
 411         ret