arch/c6x/lib/divu.S

   1 ;; SPDX-License-Identifier: GPL-2.0-or-later
   2 ;;  Copyright 2010  Free Software Foundation, Inc.
   3 ;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
   4 ;;
   5
   6 #include <linux/linkage.h>
   7
   8         ;; ABI considerations for the divide functions
   9         ;; The following registers are call-used:
  10         ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
  11         ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
  12         ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
  13         ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
  14         ;;
  15         ;; In our implementation, divu and remu are leaf functions,
  16         ;; while both divi and remi call into divu.
  17         ;; A0 is not clobbered by any of the functions.
  18         ;; divu does not clobber B2 either, which is taken advantage of
  19         ;; in remi.
  20         ;; divi uses B5 to hold the original return address during
  21         ;; the call to divu.
  22         ;; remi uses B2 and A5 to hold the input values during the
  23         ;; call to divu.  It stores B3 in on the stack.
  24
  25         .text
  26 ENTRY(__c6xabi_divu)
  27         ;; We use a series of up to 31 subc instructions.  First, we find
  28         ;; out how many leading zero bits there are in the divisor.  This
  29         ;; gives us both a shift count for aligning (shifting) the divisor
  30         ;; to the, and the number of times we have to execute subc.
  31
  32         ;; At the end, we have both the remainder and most of the quotient
  33         ;; in A4.  The top bit of the quotient is computed first and is
  34         ;; placed in A2.
  35
  36         ;; Return immediately if the dividend is zero.
  37          mv     .s2x    A4, B1
  38    [B1]  lmbd   .l2     1, B4, B1
  39 || [!B1] b      .s2     B3      ; RETURN A
  40 || [!B1] mvk    .d2     1, B4
  41          mv     .l1x    B1, A6
  42 ||       shl    .s2     B4, B1, B4
  43
  44         ;; The loop performs a maximum of 28 steps, so we do the
  45         ;; first 3 here.
  46          cmpltu .l1x    A4, B4, A2
  47    [!A2] sub    .l1x    A4, B4, A4
  48 ||       shru   .s2     B4, 1, B4
  49 ||       xor    .s1     1, A2, A2
  50
  51          shl    .s1     A2, 31, A2
  52 || [B1]  subc   .l1x    A4,B4,A4
  53 || [B1]  add    .s2     -1, B1, B1
  54    [B1]  subc   .l1x    A4,B4,A4
  55 || [B1]  add    .s2     -1, B1, B1
  56
  57         ;; RETURN A may happen here (note: must happen before the next branch)
  58 _divu_loop:
  59          cmpgt  .l2     B1, 7, B0
  60 || [B1]  subc   .l1x    A4,B4,A4
  61 || [B1]  add    .s2     -1, B1, B1
  62    [B1]  subc   .l1x    A4,B4,A4
  63 || [B1]  add    .s2     -1, B1, B1
  64 || [B0]  b      .s1     _divu_loop
  65    [B1]  subc   .l1x    A4,B4,A4
  66 || [B1]  add    .s2     -1, B1, B1
  67    [B1]  subc   .l1x    A4,B4,A4
  68 || [B1]  add    .s2     -1, B1, B1
  69    [B1]  subc   .l1x    A4,B4,A4
  70 || [B1]  add    .s2     -1, B1, B1
  71    [B1]  subc   .l1x    A4,B4,A4
  72 || [B1]  add    .s2     -1, B1, B1
  73    [B1]  subc   .l1x    A4,B4,A4
  74 || [B1]  add    .s2     -1, B1, B1
  75         ;; loop backwards branch happens here
  76
  77          ret    .s2     B3
  78 ||       mvk    .s1     32, A1
  79          sub    .l1     A1, A6, A6
  80          shl    .s1     A4, A6, A4
  81          shru   .s1     A4, 1, A4
  82 ||       sub    .l1     A6, 1, A6
  83          or     .l1     A2, A4, A4
  84          shru   .s1     A4, A6, A4
  85          nop
  86 ENDPROC(__c6xabi_divu)