ddr: altera: Fix ad-hoc iterative division implementation
[oweals/u-boot.git] / drivers / ddr / altera / sequencer.c
index 1122cfa71dffea5e3feb874a438fe2de7737d921..28e32ff57501b797712d5fdfce159e832a401501 100644 (file)
@@ -305,7 +305,7 @@ static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
        scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
 }
 
-static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group, uint32_t delay)
+static void scc_mgr_set_dqs_io_in_delay(uint32_t delay)
 {
        scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
                    delay);
@@ -321,8 +321,7 @@ static void scc_mgr_set_dq_out1_delay(uint32_t dq_in_group, uint32_t delay)
        scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
 }
 
-static void scc_mgr_set_dqs_out1_delay(uint32_t write_group,
-                                             uint32_t delay)
+static void scc_mgr_set_dqs_out1_delay(uint32_t delay)
 {
        scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, RW_MGR_MEM_DQ_PER_WRITE_DQS,
                    delay);
@@ -430,11 +429,19 @@ static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
        writel(0, &sdr_scc_mgr->update);
 }
 
-static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay)
+/**
+ * scc_mgr_set_oct_out1_delay() - Set OCT output delay
+ * @write_group:       Write group
+ * @delay:             Delay value
+ *
+ * This function sets the OCT output delay in SCC manager.
+ */
+static void scc_mgr_set_oct_out1_delay(const u32 write_group, const u32 delay)
 {
-       uint32_t read_group;
-       uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET;
-
+       const int ratio = RW_MGR_MEM_IF_READ_DQS_WIDTH /
+                         RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
+       const int base = write_group * ratio;
+       int i;
        /*
         * Load the setting in the SCC manager
         * Although OCT affects only write data, the OCT delay is controlled
@@ -442,44 +449,54 @@ static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay)
         * For protocols where a write group consists of multiple read groups,
         * the setting must be set multiple times.
         */
-       for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
-            RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
-            read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
-            RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
-               writel(delay, addr + (read_group << 2));
+       for (i = 0; i < ratio; i++)
+               scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
 }
 
+/**
+ * scc_mgr_set_hhp_extras() - Set HHP extras.
+ *
+ * Load the fixed setting in the SCC manager HHP extras.
+ */
 static void scc_mgr_set_hhp_extras(void)
 {
        /*
         * Load the fixed setting in the SCC manager
-        * bits: 0:0 = 1'b1   - dqs bypass
-        * bits: 1:1 = 1'b1   - dq bypass
-        * bits: 4:2 = 3'b001   - rfifo_mode
-        * bits: 6:5 = 2'b01  - rfifo clock_select
-        * bits: 7:7 = 1'b0  - separate gating from ungating setting
-        * bits: 8:8 = 1'b0  - separate OE from Output delay setting
+        * bits: 0:0 = 1'b1     - DQS bypass
+        * bits: 1:1 = 1'b1     - DQ bypass
+        * bits: 4:2 = 3'b001   - rfifo_mode
+        * bits: 6:5 = 2'b01    - rfifo clock_select
+        * bits: 7:7 = 1'b0     - separate gating from ungating setting
+        * bits: 8:8 = 1'b0     - separate OE from Output delay setting
         */
-       uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0);
-       uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET;
+       const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
+                         (1 << 2) | (1 << 1) | (1 << 0);
+       const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
+                        SCC_MGR_HHP_GLOBALS_OFFSET |
+                        SCC_MGR_HHP_EXTRAS_OFFSET;
 
-       writel(value, addr + SCC_MGR_HHP_EXTRAS_OFFSET);
+       debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n",
+                  __func__, __LINE__);
+       writel(value, addr);
+       debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
+                  __func__, __LINE__);
 }
 
-/*
- * USER Zero all DQS config
- * TODO: maybe rename to scc_mgr_zero_dqs_config (or something)
+/**
+ * scc_mgr_zero_all() - Zero all DQS config
+ *
+ * Zero all DQS config.
  */
 static void scc_mgr_zero_all(void)
 {
-       uint32_t i, r;
+       int i, r;
 
        /*
         * USER Zero all DQS config settings, across all groups and all
         * shadow registers
         */
-       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
-            NUM_RANKS_PER_SHADOW_REG) {
+       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
+            r += NUM_RANKS_PER_SHADOW_REG) {
                for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
                        /*
                         * The phases actually don't exist on a per-rank basis,
@@ -493,40 +510,35 @@ static void scc_mgr_zero_all(void)
 
                for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
                        scc_mgr_set_dqdqs_output_phase(i, 0);
-                       /* av/cv don't have out2 */
+                       /* Arria V/Cyclone V don't have out2. */
                        scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
                }
        }
 
-       /* multicast to all DQS group enables */
+       /* Multicast to all DQS group enables. */
        writel(0xff, &sdr_scc_mgr->dqs_ena);
        writel(0, &sdr_scc_mgr->update);
 }
 
-static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode)
+/**
+ * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
+ * @write_group:       Write group
+ *
+ * Set bypass mode and trigger SCC update.
+ */
+static void scc_set_bypass_mode(const u32 write_group)
 {
-       /* mode = 0 : Do NOT bypass - Half Rate Mode */
-       /* mode = 1 : Bypass - Full Rate Mode */
-
-       /* only need to set once for all groups, pins, dq, dqs, dm */
-       if (write_group == 0) {
-               debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__,
-                          __LINE__);
-               scc_mgr_set_hhp_extras();
-               debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
-                         __func__, __LINE__);
-       }
-       /* multicast to all DQ enables */
+       /* Multicast to all DQ enables. */
        writel(0xff, &sdr_scc_mgr->dq_ena);
        writel(0xff, &sdr_scc_mgr->dm_ena);
 
-       /* update current DQS IO enable */
+       /* Update current DQS IO enable. */
        writel(0, &sdr_scc_mgr->dqs_io_ena);
 
-       /* update the DQS logic */
+       /* Update the DQS logic. */
        writel(write_group, &sdr_scc_mgr->dqs_ena);
 
-       /* hit update */
+       /* Hit update. */
        writel(0, &sdr_scc_mgr->update);
 }
 
@@ -553,43 +565,47 @@ static void scc_mgr_load_dqs_for_write_group(const u32 write_group)
                writel(base + i, &sdr_scc_mgr->dqs_ena);
 }
 
-static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin,
-                              int32_t out_only)
+/**
+ * scc_mgr_zero_group() - Zero all configs for a group
+ *
+ * Zero DQ, DM, DQS and OCT configs for a group.
+ */
+static void scc_mgr_zero_group(const u32 write_group, const int out_only)
 {
-       uint32_t i, r;
+       int i, r;
 
-       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
-               NUM_RANKS_PER_SHADOW_REG) {
-               /* Zero all DQ config settings */
+       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
+            r += NUM_RANKS_PER_SHADOW_REG) {
+               /* Zero all DQ config settings. */
                for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
                        scc_mgr_set_dq_out1_delay(i, 0);
                        if (!out_only)
                                scc_mgr_set_dq_in_delay(i, 0);
                }
 
-               /* multicast to all DQ enables */
+               /* Multicast to all DQ enables. */
                writel(0xff, &sdr_scc_mgr->dq_ena);
 
-               /* Zero all DM config settings */
-               for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
+               /* Zero all DM config settings. */
+               for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
                        scc_mgr_set_dm_out1_delay(i, 0);
-               }
 
-               /* multicast to all DM enables */
+               /* Multicast to all DM enables. */
                writel(0xff, &sdr_scc_mgr->dm_ena);
 
-               /* zero all DQS io settings */
+               /* Zero all DQS IO settings. */
                if (!out_only)
-                       scc_mgr_set_dqs_io_in_delay(write_group, 0);
-               /* av/cv don't have out2 */
-               scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE);
+                       scc_mgr_set_dqs_io_in_delay(0);
+
+               /* Arria V/Cyclone V don't have out2. */
+               scc_mgr_set_dqs_out1_delay(IO_DQS_OUT_RESERVE);
                scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
                scc_mgr_load_dqs_for_write_group(write_group);
 
-               /* multicast to all DQS IO enables (only 1) */
+               /* Multicast to all DQS IO enables (only 1 in total). */
                writel(0, &sdr_scc_mgr->dqs_io_ena);
 
-               /* hit update to zero everything */
+               /* Hit update to zero everything. */
                writel(0, &sdr_scc_mgr->update);
        }
 }
@@ -598,8 +614,7 @@ static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin,
  * apply and load a particular input delay for the DQ pins in a group
  * group_bgn is the index of the first dq pin (in the write group)
  */
-static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group,
-                                           uint32_t group_bgn, uint32_t delay)
+static void scc_mgr_apply_group_dq_in_delay(uint32_t group_bgn, uint32_t delay)
 {
        uint32_t i, p;
 
@@ -609,22 +624,24 @@ static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group,
        }
 }
 
-/* apply and load a particular output delay for the DQ pins in a group */
-static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group,
-                                             uint32_t group_bgn,
-                                             uint32_t delay1)
+/**
+ * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the DQ pins in a group
+ * @delay:             Delay value
+ *
+ * Apply and load a particular output delay for the DQ pins in a group.
+ */
+static void scc_mgr_apply_group_dq_out1_delay(const u32 delay)
 {
-       uint32_t i, p;
+       int i;
 
-       for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
-               scc_mgr_set_dq_out1_delay(i, delay1);
+       for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
+               scc_mgr_set_dq_out1_delay(i, delay);
                scc_mgr_load_dq(i);
        }
 }
 
 /* apply and load a particular output delay for the DM pins in a group */
-static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group,
-                                             uint32_t delay1)
+static void scc_mgr_apply_group_dm_out1_delay(uint32_t delay1)
 {
        uint32_t i;
 
@@ -639,114 +656,94 @@ static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group,
 static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
                                                    uint32_t delay)
 {
-       scc_mgr_set_dqs_out1_delay(write_group, delay);
+       scc_mgr_set_dqs_out1_delay(delay);
        scc_mgr_load_dqs_io();
 
        scc_mgr_set_oct_out1_delay(write_group, delay);
        scc_mgr_load_dqs_for_write_group(write_group);
 }
 
-/* apply a delay to the entire output side: DQ, DM, DQS, OCT */
-static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group,
-                                                 uint32_t group_bgn,
-                                                 uint32_t delay)
+/**
+ * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side: DQ, DM, DQS, OCT
+ * @write_group:       Write group
+ * @delay:             Delay value
+ *
+ * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
+ */
+static void scc_mgr_apply_group_all_out_delay_add(const u32 write_group,
+                                                 const u32 delay)
 {
-       uint32_t i, p, new_delay;
-
-       /* dq shift */
-       for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
-               new_delay = READ_SCC_DQ_OUT2_DELAY;
-               new_delay += delay;
-
-               if (new_delay > IO_IO_OUT2_DELAY_MAX) {
-                       debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\
-                                  %u > %lu => %lu", __func__, __LINE__,
-                                  write_group, group_bgn, delay, i, p, new_delay,
-                                  (long unsigned int)IO_IO_OUT2_DELAY_MAX,
-                                  (long unsigned int)IO_IO_OUT2_DELAY_MAX);
-                       new_delay = IO_IO_OUT2_DELAY_MAX;
-               }
+       u32 i, new_delay;
 
+       /* DQ shift */
+       for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++)
                scc_mgr_load_dq(i);
-       }
-
-       /* dm shift */
-       for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
-               new_delay = READ_SCC_DM_IO_OUT2_DELAY;
-               new_delay += delay;
-
-               if (new_delay > IO_IO_OUT2_DELAY_MAX) {
-                       debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\
-                                  %u > %lu => %lu\n",  __func__, __LINE__,
-                                  write_group, group_bgn, delay, i, new_delay,
-                                  (long unsigned int)IO_IO_OUT2_DELAY_MAX,
-                                  (long unsigned int)IO_IO_OUT2_DELAY_MAX);
-                       new_delay = IO_IO_OUT2_DELAY_MAX;
-               }
 
+       /* DM shift */
+       for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
                scc_mgr_load_dm(i);
-       }
-
-       /* dqs shift */
-       new_delay = READ_SCC_DQS_IO_OUT2_DELAY;
-       new_delay += delay;
 
+       /* DQS shift */
+       new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
        if (new_delay > IO_IO_OUT2_DELAY_MAX) {
-               debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
-                          " adding %u to OUT1\n", __func__, __LINE__,
-                          write_group, group_bgn, delay, new_delay,
-                          IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
+               debug_cond(DLEVEL == 1,
+                          "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
+                          __func__, __LINE__, write_group, delay, new_delay,
+                          IO_IO_OUT2_DELAY_MAX,
                           new_delay - IO_IO_OUT2_DELAY_MAX);
-               scc_mgr_set_dqs_out1_delay(write_group, new_delay -
-                                          IO_IO_OUT2_DELAY_MAX);
-               new_delay = IO_IO_OUT2_DELAY_MAX;
+               new_delay -= IO_IO_OUT2_DELAY_MAX;
+               scc_mgr_set_dqs_out1_delay(new_delay);
        }
 
        scc_mgr_load_dqs_io();
 
-       /* oct shift */
-       new_delay = READ_SCC_OCT_OUT2_DELAY;
-       new_delay += delay;
-
+       /* OCT shift */
+       new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
        if (new_delay > IO_IO_OUT2_DELAY_MAX) {
-               debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
-                          " adding %u to OUT1\n", __func__, __LINE__,
-                          write_group, group_bgn, delay, new_delay,
-                          IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
+               debug_cond(DLEVEL == 1,
+                          "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
+                          __func__, __LINE__, write_group, delay,
+                          new_delay, IO_IO_OUT2_DELAY_MAX,
                           new_delay - IO_IO_OUT2_DELAY_MAX);
-               scc_mgr_set_oct_out1_delay(write_group, new_delay -
-                                          IO_IO_OUT2_DELAY_MAX);
-               new_delay = IO_IO_OUT2_DELAY_MAX;
+               new_delay -= IO_IO_OUT2_DELAY_MAX;
+               scc_mgr_set_oct_out1_delay(write_group, new_delay);
        }
 
        scc_mgr_load_dqs_for_write_group(write_group);
 }
 
-/*
- * USER apply a delay to the entire output side (DQ, DM, DQS, OCT)
- * and to all ranks
+/**
+ * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output side to all ranks
+ * @write_group:       Write group
+ * @delay:             Delay value
+ *
+ * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
  */
-static void scc_mgr_apply_group_all_out_delay_add_all_ranks(
-       uint32_t write_group, uint32_t group_bgn, uint32_t delay)
+static void
+scc_mgr_apply_group_all_out_delay_add_all_ranks(const u32 write_group,
+                                               const u32 delay)
 {
-       uint32_t r;
+       int r;
 
        for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
-               r += NUM_RANKS_PER_SHADOW_REG) {
-               scc_mgr_apply_group_all_out_delay_add(write_group,
-                                                     group_bgn, delay);
+            r += NUM_RANKS_PER_SHADOW_REG) {
+               scc_mgr_apply_group_all_out_delay_add(write_group, delay);
                writel(0, &sdr_scc_mgr->update);
        }
 }
 
-/* optimization used to recover some slots in ddr3 inst_rom */
-/* could be applied to other protocols if we wanted to */
+/**
+ * set_jump_as_return() - Return instruction optimization
+ *
+ * Optimization used to recover some slots in ddr3 inst_rom could be
+ * applied to other protocols if we wanted to
+ */
 static void set_jump_as_return(void)
 {
        /*
-        * to save space, we replace return with jump to special shared
+        * To save space, we replace return with jump to special shared
         * RETURN instruction so we set the counter to large value so that
-        * we always jump
+        * we always jump.
         */
        writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
        writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
@@ -847,12 +844,107 @@ static void delay_for_n_mem_clocks(const uint32_t clocks)
        debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
 }
 
-static void rw_mgr_mem_initialize(void)
+/**
+ * rw_mgr_mem_init_load_regs() - Load instruction registers
+ * @cntr0:     Counter 0 value
+ * @cntr1:     Counter 1 value
+ * @cntr2:     Counter 2 value
+ * @jump:      Jump instruction value
+ *
+ * Load instruction registers.
+ */
+static void rw_mgr_mem_init_load_regs(u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
 {
-       uint32_t r;
        uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
                           RW_MGR_RUN_SINGLE_GROUP_OFFSET;
 
+       /* Load counters */
+       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
+              &sdr_rw_load_mgr_regs->load_cntr0);
+       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
+              &sdr_rw_load_mgr_regs->load_cntr1);
+       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
+              &sdr_rw_load_mgr_regs->load_cntr2);
+
+       /* Load jump address */
+       writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
+       writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
+       writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
+
+       /* Execute count instruction */
+       writel(jump, grpaddr);
+}
+
+/**
+ * rw_mgr_mem_load_user() - Load user calibration values
+ * @fin1:      Final instruction 1
+ * @fin2:      Final instruction 2
+ * @precharge: If 1, precharge the banks at the end
+ *
+ * Load user calibration values and optionally precharge the banks.
+ */
+static void rw_mgr_mem_load_user(const u32 fin1, const u32 fin2,
+                                const int precharge)
+{
+       u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
+                     RW_MGR_RUN_SINGLE_GROUP_OFFSET;
+       u32 r;
+
+       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
+               if (param->skip_ranks[r]) {
+                       /* request to skip the rank */
+                       continue;
+               }
+
+               /* set rank */
+               set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
+
+               /* precharge all banks ... */
+               if (precharge)
+                       writel(RW_MGR_PRECHARGE_ALL, grpaddr);
+
+               /*
+                * USER Use Mirror-ed commands for odd ranks if address
+                * mirrorring is on
+                */
+               if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS2_MIRR, grpaddr);
+                       delay_for_n_mem_clocks(4);
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS3_MIRR, grpaddr);
+                       delay_for_n_mem_clocks(4);
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS1_MIRR, grpaddr);
+                       delay_for_n_mem_clocks(4);
+                       set_jump_as_return();
+                       writel(fin1, grpaddr);
+               } else {
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS2, grpaddr);
+                       delay_for_n_mem_clocks(4);
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS3, grpaddr);
+                       delay_for_n_mem_clocks(4);
+                       set_jump_as_return();
+                       writel(RW_MGR_MRS1, grpaddr);
+                       set_jump_as_return();
+                       writel(fin2, grpaddr);
+               }
+
+               if (precharge)
+                       continue;
+
+               set_jump_as_return();
+               writel(RW_MGR_ZQCL, grpaddr);
+
+               /* tZQinit = tDLLK = 512 ck cycles */
+               delay_for_n_mem_clocks(512);
+       }
+}
+
+static void rw_mgr_mem_initialize(void)
+{
        debug("%s:%d\n", __func__, __LINE__);
 
        /* The reset / cke part of initialization is broadcasted to all ranks */
@@ -882,25 +974,9 @@ static void rw_mgr_mem_initialize(void)
         * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
         * b = 6A
         */
-
-       /* Load counters */
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR0_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr0);
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR1_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr1);
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TINIT_CNTR2_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr2);
-
-       /* Load jump address */
-       writel(RW_MGR_INIT_RESET_0_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add0);
-       writel(RW_MGR_INIT_RESET_0_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add1);
-       writel(RW_MGR_INIT_RESET_0_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
-
-       /* Execute count instruction */
-       writel(RW_MGR_INIT_RESET_0_CKE_0, grpaddr);
+       rw_mgr_mem_init_load_regs(SEQ_TINIT_CNTR0_VAL, SEQ_TINIT_CNTR1_VAL,
+                                 SEQ_TINIT_CNTR2_VAL,
+                                 RW_MGR_INIT_RESET_0_CKE_0);
 
        /* indicate that memory is stable */
        writel(1, &phy_mgr_cfg->reset_mem_stbl);
@@ -919,73 +995,17 @@ static void rw_mgr_mem_initialize(void)
         * One possible solution is n = 2 , a = 131 , b = 256 => a = 83,
         * b = FF
         */
-
-       /* Load counters */
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR0_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr0);
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR1_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr1);
-       writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(SEQ_TRESET_CNTR2_VAL),
-              &sdr_rw_load_mgr_regs->load_cntr2);
-
-       /* Load jump address */
-       writel(RW_MGR_INIT_RESET_1_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add0);
-       writel(RW_MGR_INIT_RESET_1_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add1);
-       writel(RW_MGR_INIT_RESET_1_CKE_0,
-               &sdr_rw_load_jump_mgr_regs->load_jump_add2);
-
-       writel(RW_MGR_INIT_RESET_1_CKE_0, grpaddr);
+       rw_mgr_mem_init_load_regs(SEQ_TRESET_CNTR0_VAL, SEQ_TRESET_CNTR1_VAL,
+                                 SEQ_TRESET_CNTR2_VAL,
+                                 RW_MGR_INIT_RESET_1_CKE_0);
 
        /* bring up clock enable */
 
        /* tXRP < 250 ck cycles */
        delay_for_n_mem_clocks(250);
 
-       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
-               if (param->skip_ranks[r]) {
-                       /* request to skip the rank */
-                       continue;
-               }
-
-               /* set rank */
-               set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
-
-               /*
-                * USER Use Mirror-ed commands for odd ranks if address
-                * mirrorring is on
-                */
-               if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS2_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS3_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS1_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS0_DLL_RESET_MIRR, grpaddr);
-               } else {
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS2, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS3, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS1, grpaddr);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS0_DLL_RESET, grpaddr);
-               }
-               set_jump_as_return();
-               writel(RW_MGR_ZQCL, grpaddr);
-
-               /* tZQinit = tDLLK = 512 ck cycles */
-               delay_for_n_mem_clocks(512);
-       }
+       rw_mgr_mem_load_user(RW_MGR_MRS0_DLL_RESET_MIRR, RW_MGR_MRS0_DLL_RESET,
+                            0);
 }
 
 /*
@@ -994,58 +1014,12 @@ static void rw_mgr_mem_initialize(void)
  */
 static void rw_mgr_mem_handoff(void)
 {
-       uint32_t r;
-       uint32_t grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
-                          RW_MGR_RUN_SINGLE_GROUP_OFFSET;
-
-       debug("%s:%d\n", __func__, __LINE__);
-       for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r++) {
-               if (param->skip_ranks[r])
-                       /* request to skip the rank */
-                       continue;
-               /* set rank */
-               set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_OFF);
-
-               /* precharge all banks ... */
-               writel(RW_MGR_PRECHARGE_ALL, grpaddr);
-
-               /* load up MR settings specified by user */
-
-               /*
-                * Use Mirror-ed commands for odd ranks if address
-                * mirrorring is on
-                */
-               if ((RW_MGR_MEM_ADDRESS_MIRRORING >> r) & 0x1) {
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS2_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS3_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS1_MIRR, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS0_USER_MIRR, grpaddr);
-               } else {
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS2, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS3, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS1, grpaddr);
-                       delay_for_n_mem_clocks(4);
-                       set_jump_as_return();
-                       writel(RW_MGR_MRS0_USER, grpaddr);
-               }
-               /*
-                * USER  need to wait tMOD (12CK or 15ns) time before issuing
-                * other commands, but we will have plenty of NIOS cycles before
-                * actual handoff so its okay.
-                */
-       }
+       rw_mgr_mem_load_user(RW_MGR_MRS0_USER_MIRR, RW_MGR_MRS0_USER, 1);
+       /*
+        * USER  need to wait tMOD (12CK or 15ns) time before issuing
+        * other commands, but we will have plenty of NIOS cycles before
+        * actual handoff so its okay.
+        */
 }
 
 /*
@@ -1793,8 +1767,7 @@ rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay
 
        for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
             r += NUM_RANKS_PER_SHADOW_REG) {
-               for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS;
-                       i++, p++, d += delay_step) {
+               for (i = 0, p = test_bgn, d = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++, d += delay_step) {
                        debug_cond(DLEVEL == 1, "%s:%d rw_mgr_mem_calibrate_\
                                   vfifo_find_dqs_", __func__, __LINE__);
                        debug_cond(DLEVEL == 1, "en_phase_sweep_dq_in_delay: g=%u/%u ",
@@ -1920,7 +1893,7 @@ static uint32_t rw_mgr_mem_calibrate_vfifo_center(uint32_t rank_bgn,
        }
 
        /* Reset DQ delay chains to 0 */
-       scc_mgr_apply_group_dq_in_delay(write_group, test_bgn, 0);
+       scc_mgr_apply_group_dq_in_delay(test_bgn, 0);
        sticky_bit_chk = 0;
        for (i = RW_MGR_MEM_DQ_PER_READ_DQS - 1;; i--) {
                debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
@@ -2212,7 +2185,6 @@ static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
 {
        uint32_t p, d, rank_bgn, sr;
        uint32_t dtaps_per_ptap;
-       uint32_t tmp_delay;
        uint32_t bit_chk;
        uint32_t grp_calibrated;
        uint32_t write_group, write_test_bgn;
@@ -2227,14 +2199,8 @@ static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
        write_test_bgn = test_bgn;
 
        /* USER Determine number of delay taps for each phase tap */
-       dtaps_per_ptap = 0;
-       tmp_delay = 0;
-       while (tmp_delay < IO_DELAY_PER_OPA_TAP) {
-               dtaps_per_ptap++;
-               tmp_delay += IO_DELAY_PER_DQS_EN_DCHAIN_TAP;
-       }
-       dtaps_per_ptap--;
-       tmp_delay = 0;
+       dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
+                                     IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
 
        /* update info for sims */
        reg_file_set_group(read_group);
@@ -2252,8 +2218,8 @@ static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
                 * calibrated output side yet.
                 */
                if (d > 0) {
-                       scc_mgr_apply_group_all_out_delay_add_all_ranks
-                       (write_group, write_test_bgn, d);
+                       scc_mgr_apply_group_all_out_delay_add_all_ranks(
+                                                               write_group, d);
                }
 
                for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX && grp_calibrated == 0;
@@ -2332,7 +2298,7 @@ static uint32_t rw_mgr_mem_calibrate_vfifo(uint32_t read_group,
         * first case).
         */
        if (d > 2)
-               scc_mgr_zero_group(write_group, write_test_bgn, 1);
+               scc_mgr_zero_group(write_group, 1);
 
        return 1;
 }
@@ -2690,7 +2656,7 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
 
        /* Search for the left edge of the window for each bit */
        for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
-               scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, d);
+               scc_mgr_apply_group_dq_out1_delay(write_group, d);
 
                writel(0, &sdr_scc_mgr->update);
 
@@ -2739,7 +2705,7 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
        }
 
        /* Reset DQ delay chains to 0 */
-       scc_mgr_apply_group_dq_out1_delay(write_group, test_bgn, 0);
+       scc_mgr_apply_group_dq_out1_delay(0);
        sticky_bit_chk = 0;
        for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
                debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
@@ -2963,7 +2929,7 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
 
        /* Search for the/part of the window with DM shift */
        for (d = IO_IO_OUT1_DELAY_MAX; d >= 0; d -= DELTA_D) {
-               scc_mgr_apply_group_dm_out1_delay(write_group, d);
+               scc_mgr_apply_group_dm_out1_delay(d);
                writel(0, &sdr_scc_mgr->update);
 
                if (rw_mgr_mem_calibrate_write_test(rank_bgn, write_group, 1,
@@ -2996,7 +2962,7 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
 
 
        /* Reset DM delay chains to 0 */
-       scc_mgr_apply_group_dm_out1_delay(write_group, 0);
+       scc_mgr_apply_group_dm_out1_delay(0);
 
        /*
         * Check to see if the current window nudges up aganist 0 delay.
@@ -3078,7 +3044,7 @@ static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
        else
                dm_margin = left_edge[0] - mid;
 
-       scc_mgr_apply_group_dm_out1_delay(write_group, mid);
+       scc_mgr_apply_group_dm_out1_delay(mid);
        writel(0, &sdr_scc_mgr->update);
 
        debug_cond(DLEVEL == 2, "%s:%d dm_calib: left=%d right=%d mid=%d \
@@ -3314,11 +3280,14 @@ static uint32_t mem_calibrate(void)
 
        mem_config();
 
-       uint32_t bypass_mode = 0x1;
        for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
                writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
                          SCC_MGR_GROUP_COUNTER_OFFSET);
-               scc_set_bypass_mode(i, bypass_mode);
+               /* Only needed once to set all groups, pins, DQ, DQS, DM. */
+               if (i == 0)
+                       scc_mgr_set_hhp_extras();
+
+               scc_set_bypass_mode(i);
        }
 
        if ((dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
@@ -3353,8 +3322,7 @@ static uint32_t mem_calibrate(void)
 
                                writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
                                                    SCC_MGR_GROUP_COUNTER_OFFSET);
-                               scc_mgr_zero_group(write_group, write_test_bgn,
-                                                  0);
+                               scc_mgr_zero_group(write_group, 0);
 
                                for (read_group = write_group *
                                        RW_MGR_MEM_IF_READ_DQS_WIDTH /