ddr: altera: Add DDR2 support to Gen5 driver
[oweals/u-boot.git] / drivers / ddr / altera / sequencer.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /*
3  * Copyright Altera Corporation (C) 2012-2015
4  */
5
6 #include <common.h>
7 #include <asm/io.h>
8 #include <asm/arch/sdram.h>
9 #include <errno.h>
10 #include <hang.h>
11 #include "sequencer.h"
12
13 static const struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
14         (struct socfpga_sdr_rw_load_manager *)
15                 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);
16 static const struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs
17         = (struct socfpga_sdr_rw_load_jump_manager *)
18                 (SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);
19 static const struct socfpga_sdr_reg_file *sdr_reg_file =
20         (struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;
21 static const struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
22         (struct socfpga_sdr_scc_mgr *)
23                 (SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);
24 static const struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
25         (struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;
26 static const struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
27         (struct socfpga_phy_mgr_cfg *)
28                 (SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);
29 static const struct socfpga_data_mgr *data_mgr =
30         (struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
31 static const struct socfpga_sdr_ctrl *sdr_ctrl =
32         (struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;
33
34 #define DELTA_D         1
35
36 /*
37  * In order to reduce ROM size, most of the selectable calibration steps are
38  * decided at compile time based on the user's calibration mode selection,
39  * as captured by the STATIC_CALIB_STEPS selection below.
40  *
41  * However, to support simulation-time selection of fast simulation mode, where
42  * we skip everything except the bare minimum, we need a few of the steps to
43  * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
44  * check, which is based on the rtl-supplied value, or we dynamically compute
45  * the value to use based on the dynamically-chosen calibration mode
46  */
47
48 #define DLEVEL 0
49 #define STATIC_IN_RTL_SIM 0
50 #define STATIC_SKIP_DELAY_LOOPS 0
51
52 #define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
53         STATIC_SKIP_DELAY_LOOPS)
54
55 #define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
56         ((non_skip_value) & seq->skip_delay_mask)
57
58 bool dram_is_ddr(const u8 ddr)
59 {
60         const struct socfpga_sdram_config *cfg = socfpga_get_sdram_config();
61         const u8 type = (cfg->ctrl_cfg >> SDR_CTRLGRP_CTRLCFG_MEMTYPE_LSB) &
62                         SDR_CTRLGRP_CTRLCFG_MEMTYPE_MASK;
63
64         if (ddr == 2 && type == 1)      /* DDR2 */
65                 return true;
66
67         if (ddr == 3 && type == 2)      /* DDR3 */
68                 return true;
69
70         return false;
71 }
72
73 static void set_failing_group_stage(struct socfpga_sdrseq *seq,
74                                     u32 group, u32 stage, u32 substage)
75 {
76         /*
77          * Only set the global stage if there was not been any other
78          * failing group
79          */
80         if (seq->gbl.error_stage == CAL_STAGE_NIL)      {
81                 seq->gbl.error_substage = substage;
82                 seq->gbl.error_stage = stage;
83                 seq->gbl.error_group = group;
84         }
85 }
86
87 static void reg_file_set_group(u16 set_group)
88 {
89         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff0000, set_group << 16);
90 }
91
92 static void reg_file_set_stage(u8 set_stage)
93 {
94         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xffff, set_stage & 0xff);
95 }
96
97 static void reg_file_set_sub_stage(u8 set_sub_stage)
98 {
99         set_sub_stage &= 0xff;
100         clrsetbits_le32(&sdr_reg_file->cur_stage, 0xff00, set_sub_stage << 8);
101 }
102
103 /**
104  * phy_mgr_initialize() - Initialize PHY Manager
105  *
106  * Initialize PHY Manager.
107  */
108 static void phy_mgr_initialize(struct socfpga_sdrseq *seq)
109 {
110         u32 ratio;
111
112         debug("%s:%d\n", __func__, __LINE__);
113         /* Calibration has control over path to memory */
114         /*
115          * In Hard PHY this is a 2-bit control:
116          * 0: AFI Mux Select
117          * 1: DDIO Mux Select
118          */
119         writel(0x3, &phy_mgr_cfg->mux_sel);
120
121         /* USER memory clock is not stable we begin initialization  */
122         writel(0, &phy_mgr_cfg->reset_mem_stbl);
123
124         /* USER calibration status all set to zero */
125         writel(0, &phy_mgr_cfg->cal_status);
126
127         writel(0, &phy_mgr_cfg->cal_debug_info);
128
129         /* Init params only if we do NOT skip calibration. */
130         if ((seq->dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL)
131                 return;
132
133         ratio = seq->rwcfg->mem_dq_per_read_dqs /
134                 seq->rwcfg->mem_virtual_groups_per_read_dqs;
135         seq->param.read_correct_mask_vg = (1 << ratio) - 1;
136         seq->param.write_correct_mask_vg = (1 << ratio) - 1;
137         seq->param.read_correct_mask = (1 << seq->rwcfg->mem_dq_per_read_dqs)
138                 - 1;
139         seq->param.write_correct_mask = (1 << seq->rwcfg->mem_dq_per_write_dqs)
140                 - 1;
141 }
142
143 /**
144  * set_rank_and_odt_mask() - Set Rank and ODT mask
145  * @rank:       Rank mask
146  * @odt_mode:   ODT mode, OFF or READ_WRITE
147  *
148  * Set Rank and ODT mask (On-Die Termination).
149  */
150 static void set_rank_and_odt_mask(struct socfpga_sdrseq *seq,
151                                   const u32 rank, const u32 odt_mode)
152 {
153         u32 odt_mask_0 = 0;
154         u32 odt_mask_1 = 0;
155         u32 cs_and_odt_mask;
156
157         if (odt_mode == RW_MGR_ODT_MODE_OFF) {
158                 odt_mask_0 = 0x0;
159                 odt_mask_1 = 0x0;
160         } else {        /* RW_MGR_ODT_MODE_READ_WRITE */
161                 switch (seq->rwcfg->mem_number_of_ranks) {
162                 case 1: /* 1 Rank */
163                         /* Read: ODT = 0 ; Write: ODT = 1 */
164                         odt_mask_0 = 0x0;
165                         odt_mask_1 = 0x1;
166                         break;
167                 case 2: /* 2 Ranks */
168                         if (seq->rwcfg->mem_number_of_cs_per_dimm == 1) {
169                                 /*
170                                  * - Dual-Slot , Single-Rank (1 CS per DIMM)
171                                  *   OR
172                                  * - RDIMM, 4 total CS (2 CS per DIMM, 2 DIMM)
173                                  *
174                                  * Since MEM_NUMBER_OF_RANKS is 2, they
175                                  * are both single rank with 2 CS each
176                                  * (special for RDIMM).
177                                  *
178                                  * Read: Turn on ODT on the opposite rank
179                                  * Write: Turn on ODT on all ranks
180                                  */
181                                 odt_mask_0 = 0x3 & ~(1 << rank);
182                                 odt_mask_1 = 0x3;
183                                 if (dram_is_ddr(2))
184                                         odt_mask_1 &= ~(1 << rank);
185                         } else {
186                                 /*
187                                  * - Single-Slot , Dual-Rank (2 CS per DIMM)
188                                  *
189                                  * Read: Turn on ODT off on all ranks
190                                  * Write: Turn on ODT on active rank
191                                  */
192                                 odt_mask_0 = 0x0;
193                                 odt_mask_1 = 0x3 & (1 << rank);
194                         }
195                         break;
196                 case 4: /* 4 Ranks */
197                         /*
198                          * DDR3 Read, DDR2 Read/Write:
199                          * ----------+-----------------------+
200                          *           |         ODT           |
201                          *           +-----------------------+
202                          *   Rank    |  3  |  2  |  1  |  0  |
203                          * ----------+-----+-----+-----+-----+
204                          *     0     |  0  |  1  |  0  |  0  |
205                          *     1     |  1  |  0  |  0  |  0  |
206                          *     2     |  0  |  0  |  0  |  1  |
207                          *     3     |  0  |  0  |  1  |  0  |
208                          * ----------+-----+-----+-----+-----+
209                          *
210                          * DDR3 Write:
211                          * ----------+-----------------------+
212                          *           |         ODT           |
213                          * Write To  +-----------------------+
214                          *   Rank    |  3  |  2  |  1  |  0  |
215                          * ----------+-----+-----+-----+-----+
216                          *     0     |  0  |  1  |  0  |  1  |
217                          *     1     |  1  |  0  |  1  |  0  |
218                          *     2     |  0  |  1  |  0  |  1  |
219                          *     3     |  1  |  0  |  1  |  0  |
220                          * ----------+-----+-----+-----+-----+
221                          */
222                         switch (rank) {
223                         case 0:
224                                 odt_mask_0 = 0x4;
225                                 if (dram_is_ddr(2))
226                                         odt_mask_1 = 0x4;
227                                 else if (dram_is_ddr(3))
228                                         odt_mask_1 = 0x5;
229                                 break;
230                         case 1:
231                                 odt_mask_0 = 0x8;
232                                 if (dram_is_ddr(2))
233                                         odt_mask_1 = 0x8;
234                                 else if (dram_is_ddr(3))
235                                         odt_mask_1 = 0xA;
236                                 break;
237                         case 2:
238                                 odt_mask_0 = 0x1;
239                                 if (dram_is_ddr(2))
240                                         odt_mask_1 = 0x1;
241                                 else if (dram_is_ddr(3))
242                                         odt_mask_1 = 0x5;
243                                 break;
244                         case 3:
245                                 odt_mask_0 = 0x2;
246                                 if (dram_is_ddr(2))
247                                         odt_mask_1 = 0x2;
248                                 else if (dram_is_ddr(3))
249                                         odt_mask_1 = 0xA;
250                                 break;
251                         }
252                         break;
253                 }
254         }
255
256         cs_and_odt_mask = (0xFF & ~(1 << rank)) |
257                           ((0xFF & odt_mask_0) << 8) |
258                           ((0xFF & odt_mask_1) << 16);
259         writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
260                                 RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
261 }
262
263 /**
264  * scc_mgr_set() - Set SCC Manager register
265  * @off:        Base offset in SCC Manager space
266  * @grp:        Read/Write group
267  * @val:        Value to be set
268  *
269  * This function sets the SCC Manager (Scan Chain Control Manager) register.
270  */
271 static void scc_mgr_set(u32 off, u32 grp, u32 val)
272 {
273         writel(val, SDR_PHYGRP_SCCGRP_ADDRESS | off | (grp << 2));
274 }
275
276 /**
277  * scc_mgr_initialize() - Initialize SCC Manager registers
278  *
279  * Initialize SCC Manager registers.
280  */
281 static void scc_mgr_initialize(void)
282 {
283         /*
284          * Clear register file for HPS. 16 (2^4) is the size of the
285          * full register file in the scc mgr:
286          *      RFILE_DEPTH = 1 + log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
287          *                             MEM_IF_READ_DQS_WIDTH - 1);
288          */
289         int i;
290
291         for (i = 0; i < 16; i++) {
292                 debug_cond(DLEVEL >= 1, "%s:%d: Clearing SCC RFILE index %u\n",
293                            __func__, __LINE__, i);
294                 scc_mgr_set(SCC_MGR_HHP_RFILE_OFFSET, i, 0);
295         }
296 }
297
298 static void scc_mgr_set_dqdqs_output_phase(u32 write_group, u32 phase)
299 {
300         scc_mgr_set(SCC_MGR_DQDQS_OUT_PHASE_OFFSET, write_group, phase);
301 }
302
303 static void scc_mgr_set_dqs_bus_in_delay(u32 read_group, u32 delay)
304 {
305         scc_mgr_set(SCC_MGR_DQS_IN_DELAY_OFFSET, read_group, delay);
306 }
307
308 static void scc_mgr_set_dqs_en_phase(u32 read_group, u32 phase)
309 {
310         scc_mgr_set(SCC_MGR_DQS_EN_PHASE_OFFSET, read_group, phase);
311 }
312
313 static void scc_mgr_set_dqs_en_delay(u32 read_group, u32 delay)
314 {
315         scc_mgr_set(SCC_MGR_DQS_EN_DELAY_OFFSET, read_group, delay);
316 }
317
318 static void scc_mgr_set_dq_in_delay(u32 dq_in_group, u32 delay)
319 {
320         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET, dq_in_group, delay);
321 }
322
323 static void scc_mgr_set_dqs_io_in_delay(struct socfpga_sdrseq *seq,
324                                         u32 delay)
325 {
326         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET,
327                     seq->rwcfg->mem_dq_per_write_dqs, delay);
328 }
329
330 static void scc_mgr_set_dm_in_delay(struct socfpga_sdrseq *seq, u32 dm,
331                                     u32 delay)
332 {
333         scc_mgr_set(SCC_MGR_IO_IN_DELAY_OFFSET,
334                     seq->rwcfg->mem_dq_per_write_dqs + 1 + dm,
335                     delay);
336 }
337
338 static void scc_mgr_set_dq_out1_delay(u32 dq_in_group, u32 delay)
339 {
340         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET, dq_in_group, delay);
341 }
342
343 static void scc_mgr_set_dqs_out1_delay(struct socfpga_sdrseq *seq,
344                                        u32 delay)
345 {
346         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
347                     seq->rwcfg->mem_dq_per_write_dqs, delay);
348 }
349
350 static void scc_mgr_set_dm_out1_delay(struct socfpga_sdrseq *seq, u32 dm,
351                                       u32 delay)
352 {
353         scc_mgr_set(SCC_MGR_IO_OUT1_DELAY_OFFSET,
354                     seq->rwcfg->mem_dq_per_write_dqs + 1 + dm,
355                     delay);
356 }
357
358 /* load up dqs config settings */
359 static void scc_mgr_load_dqs(u32 dqs)
360 {
361         writel(dqs, &sdr_scc_mgr->dqs_ena);
362 }
363
364 /* load up dqs io config settings */
365 static void scc_mgr_load_dqs_io(void)
366 {
367         writel(0, &sdr_scc_mgr->dqs_io_ena);
368 }
369
370 /* load up dq config settings */
371 static void scc_mgr_load_dq(u32 dq_in_group)
372 {
373         writel(dq_in_group, &sdr_scc_mgr->dq_ena);
374 }
375
376 /* load up dm config settings */
377 static void scc_mgr_load_dm(u32 dm)
378 {
379         writel(dm, &sdr_scc_mgr->dm_ena);
380 }
381
382 /**
383  * scc_mgr_set_all_ranks() - Set SCC Manager register for all ranks
384  * @off:        Base offset in SCC Manager space
385  * @grp:        Read/Write group
386  * @val:        Value to be set
387  * @update:     If non-zero, trigger SCC Manager update for all ranks
388  *
389  * This function sets the SCC Manager (Scan Chain Control Manager) register
390  * and optionally triggers the SCC update for all ranks.
391  */
392 static void scc_mgr_set_all_ranks(struct socfpga_sdrseq *seq,
393                                   const u32 off, const u32 grp, const u32 val,
394                                   const int update)
395 {
396         u32 r;
397
398         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
399              r += NUM_RANKS_PER_SHADOW_REG) {
400                 scc_mgr_set(off, grp, val);
401
402                 if (update || (r == 0)) {
403                         writel(grp, &sdr_scc_mgr->dqs_ena);
404                         writel(0, &sdr_scc_mgr->update);
405                 }
406         }
407 }
408
409 static void scc_mgr_set_dqs_en_phase_all_ranks(struct socfpga_sdrseq *seq,
410                                                u32 read_group, u32 phase)
411 {
412         /*
413          * USER although the h/w doesn't support different phases per
414          * shadow register, for simplicity our scc manager modeling
415          * keeps different phase settings per shadow reg, and it's
416          * important for us to keep them in sync to match h/w.
417          * for efficiency, the scan chain update should occur only
418          * once to sr0.
419          */
420         scc_mgr_set_all_ranks(seq, SCC_MGR_DQS_EN_PHASE_OFFSET,
421                               read_group, phase, 0);
422 }
423
424 static void scc_mgr_set_dqdqs_output_phase_all_ranks(struct socfpga_sdrseq *seq,
425                                                      u32 write_group, u32 phase)
426 {
427         /*
428          * USER although the h/w doesn't support different phases per
429          * shadow register, for simplicity our scc manager modeling
430          * keeps different phase settings per shadow reg, and it's
431          * important for us to keep them in sync to match h/w.
432          * for efficiency, the scan chain update should occur only
433          * once to sr0.
434          */
435         scc_mgr_set_all_ranks(seq, SCC_MGR_DQDQS_OUT_PHASE_OFFSET,
436                               write_group, phase, 0);
437 }
438
439 static void scc_mgr_set_dqs_en_delay_all_ranks(struct socfpga_sdrseq *seq,
440                                                u32 read_group, u32 delay)
441 {
442         /*
443          * In shadow register mode, the T11 settings are stored in
444          * registers in the core, which are updated by the DQS_ENA
445          * signals. Not issuing the SCC_MGR_UPD command allows us to
446          * save lots of rank switching overhead, by calling
447          * select_shadow_regs_for_update with update_scan_chains
448          * set to 0.
449          */
450         scc_mgr_set_all_ranks(seq, SCC_MGR_DQS_EN_DELAY_OFFSET,
451                               read_group, delay, 1);
452 }
453
454 /**
455  * scc_mgr_set_oct_out1_delay() - Set OCT output delay
456  * @write_group:        Write group
457  * @delay:              Delay value
458  *
459  * This function sets the OCT output delay in SCC manager.
460  */
461 static void scc_mgr_set_oct_out1_delay(struct socfpga_sdrseq *seq,
462                                        const u32 write_group, const u32 delay)
463 {
464         const int ratio = seq->rwcfg->mem_if_read_dqs_width /
465                          seq->rwcfg->mem_if_write_dqs_width;
466         const int base = write_group * ratio;
467         int i;
468         /*
469          * Load the setting in the SCC manager
470          * Although OCT affects only write data, the OCT delay is controlled
471          * by the DQS logic block which is instantiated once per read group.
472          * For protocols where a write group consists of multiple read groups,
473          * the setting must be set multiple times.
474          */
475         for (i = 0; i < ratio; i++)
476                 scc_mgr_set(SCC_MGR_OCT_OUT1_DELAY_OFFSET, base + i, delay);
477 }
478
479 /**
480  * scc_mgr_set_hhp_extras() - Set HHP extras.
481  *
482  * Load the fixed setting in the SCC manager HHP extras.
483  */
484 static void scc_mgr_set_hhp_extras(void)
485 {
486         /*
487          * Load the fixed setting in the SCC manager
488          * bits: 0:0 = 1'b1     - DQS bypass
489          * bits: 1:1 = 1'b1     - DQ bypass
490          * bits: 4:2 = 3'b001   - rfifo_mode
491          * bits: 6:5 = 2'b01    - rfifo clock_select
492          * bits: 7:7 = 1'b0     - separate gating from ungating setting
493          * bits: 8:8 = 1'b0     - separate OE from Output delay setting
494          */
495         const u32 value = (0 << 8) | (0 << 7) | (1 << 5) |
496                           (1 << 2) | (1 << 1) | (1 << 0);
497         const u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS |
498                          SCC_MGR_HHP_GLOBALS_OFFSET |
499                          SCC_MGR_HHP_EXTRAS_OFFSET;
500
501         debug_cond(DLEVEL >= 1, "%s:%d Setting HHP Extras\n",
502                    __func__, __LINE__);
503         writel(value, addr);
504         debug_cond(DLEVEL >= 1, "%s:%d Done Setting HHP Extras\n",
505                    __func__, __LINE__);
506 }
507
508 /**
509  * scc_mgr_zero_all() - Zero all DQS config
510  *
511  * Zero all DQS config.
512  */
513 static void scc_mgr_zero_all(struct socfpga_sdrseq *seq)
514 {
515         int i, r;
516
517         /*
518          * USER Zero all DQS config settings, across all groups and all
519          * shadow registers
520          */
521         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
522              r += NUM_RANKS_PER_SHADOW_REG) {
523                 for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
524                         /*
525                          * The phases actually don't exist on a per-rank basis,
526                          * but there's no harm updating them several times, so
527                          * let's keep the code simple.
528                          */
529                         scc_mgr_set_dqs_bus_in_delay(i,
530                                                      seq->iocfg->dqs_in_reserve
531                                                      );
532                         scc_mgr_set_dqs_en_phase(i, 0);
533                         scc_mgr_set_dqs_en_delay(i, 0);
534                 }
535
536                 for (i = 0; i < seq->rwcfg->mem_if_write_dqs_width; i++) {
537                         scc_mgr_set_dqdqs_output_phase(i, 0);
538                         /* Arria V/Cyclone V don't have out2. */
539                         scc_mgr_set_oct_out1_delay(seq, i,
540                                                    seq->iocfg->dqs_out_reserve);
541                 }
542         }
543
544         /* Multicast to all DQS group enables. */
545         writel(0xff, &sdr_scc_mgr->dqs_ena);
546         writel(0, &sdr_scc_mgr->update);
547 }
548
549 /**
550  * scc_set_bypass_mode() - Set bypass mode and trigger SCC update
551  * @write_group:        Write group
552  *
553  * Set bypass mode and trigger SCC update.
554  */
555 static void scc_set_bypass_mode(const u32 write_group)
556 {
557         /* Multicast to all DQ enables. */
558         writel(0xff, &sdr_scc_mgr->dq_ena);
559         writel(0xff, &sdr_scc_mgr->dm_ena);
560
561         /* Update current DQS IO enable. */
562         writel(0, &sdr_scc_mgr->dqs_io_ena);
563
564         /* Update the DQS logic. */
565         writel(write_group, &sdr_scc_mgr->dqs_ena);
566
567         /* Hit update. */
568         writel(0, &sdr_scc_mgr->update);
569 }
570
571 /**
572  * scc_mgr_load_dqs_for_write_group() - Load DQS settings for Write Group
573  * @write_group:        Write group
574  *
575  * Load DQS settings for Write Group, do not trigger SCC update.
576  */
577 static void scc_mgr_load_dqs_for_write_group(struct socfpga_sdrseq *seq,
578                                              const u32 write_group)
579 {
580         const int ratio = seq->rwcfg->mem_if_read_dqs_width /
581                           seq->rwcfg->mem_if_write_dqs_width;
582         const int base = write_group * ratio;
583         int i;
584         /*
585          * Load the setting in the SCC manager
586          * Although OCT affects only write data, the OCT delay is controlled
587          * by the DQS logic block which is instantiated once per read group.
588          * For protocols where a write group consists of multiple read groups,
589          * the setting must be set multiple times.
590          */
591         for (i = 0; i < ratio; i++)
592                 writel(base + i, &sdr_scc_mgr->dqs_ena);
593 }
594
595 /**
596  * scc_mgr_zero_group() - Zero all configs for a group
597  *
598  * Zero DQ, DM, DQS and OCT configs for a group.
599  */
600 static void scc_mgr_zero_group(struct socfpga_sdrseq *seq,
601                                const u32 write_group, const int out_only)
602 {
603         int i, r;
604
605         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
606              r += NUM_RANKS_PER_SHADOW_REG) {
607                 /* Zero all DQ config settings. */
608                 for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
609                         scc_mgr_set_dq_out1_delay(i, 0);
610                         if (!out_only)
611                                 scc_mgr_set_dq_in_delay(i, 0);
612                 }
613
614                 /* Multicast to all DQ enables. */
615                 writel(0xff, &sdr_scc_mgr->dq_ena);
616
617                 /* Zero all DM config settings. */
618                 for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
619                         if (!out_only)
620                                 scc_mgr_set_dm_in_delay(seq, i, 0);
621                         scc_mgr_set_dm_out1_delay(seq, i, 0);
622                 }
623
624                 /* Multicast to all DM enables. */
625                 writel(0xff, &sdr_scc_mgr->dm_ena);
626
627                 /* Zero all DQS IO settings. */
628                 if (!out_only)
629                         scc_mgr_set_dqs_io_in_delay(seq, 0);
630
631                 /* Arria V/Cyclone V don't have out2. */
632                 scc_mgr_set_dqs_out1_delay(seq, seq->iocfg->dqs_out_reserve);
633                 scc_mgr_set_oct_out1_delay(seq, write_group,
634                                            seq->iocfg->dqs_out_reserve);
635                 scc_mgr_load_dqs_for_write_group(seq, write_group);
636
637                 /* Multicast to all DQS IO enables (only 1 in total). */
638                 writel(0, &sdr_scc_mgr->dqs_io_ena);
639
640                 /* Hit update to zero everything. */
641                 writel(0, &sdr_scc_mgr->update);
642         }
643 }
644
645 /*
646  * apply and load a particular input delay for the DQ pins in a group
647  * group_bgn is the index of the first dq pin (in the write group)
648  */
649 static void scc_mgr_apply_group_dq_in_delay(struct socfpga_sdrseq *seq,
650                                             u32 group_bgn, u32 delay)
651 {
652         u32 i, p;
653
654         for (i = 0, p = group_bgn; i < seq->rwcfg->mem_dq_per_read_dqs;
655              i++, p++) {
656                 scc_mgr_set_dq_in_delay(p, delay);
657                 scc_mgr_load_dq(p);
658         }
659 }
660
661 /**
662  * scc_mgr_apply_group_dq_out1_delay() - Apply and load an output delay for the
663  * DQ pins in a group
664  * @delay:              Delay value
665  *
666  * Apply and load a particular output delay for the DQ pins in a group.
667  */
668 static void scc_mgr_apply_group_dq_out1_delay(struct socfpga_sdrseq *seq,
669                                               const u32 delay)
670 {
671         int i;
672
673         for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
674                 scc_mgr_set_dq_out1_delay(i, delay);
675                 scc_mgr_load_dq(i);
676         }
677 }
678
679 /* apply and load a particular output delay for the DM pins in a group */
680 static void scc_mgr_apply_group_dm_out1_delay(struct socfpga_sdrseq *seq,
681                                               u32 delay1)
682 {
683         u32 i;
684
685         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
686                 scc_mgr_set_dm_out1_delay(seq, i, delay1);
687                 scc_mgr_load_dm(i);
688         }
689 }
690
691
692 /* apply and load delay on both DQS and OCT out1 */
693 static void scc_mgr_apply_group_dqs_io_and_oct_out1(struct socfpga_sdrseq *seq,
694                                                     u32 write_group, u32 delay)
695 {
696         scc_mgr_set_dqs_out1_delay(seq, delay);
697         scc_mgr_load_dqs_io();
698
699         scc_mgr_set_oct_out1_delay(seq, write_group, delay);
700         scc_mgr_load_dqs_for_write_group(seq, write_group);
701 }
702
703 /**
704  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output
705  * side: DQ, DM, DQS, OCT
706  * @write_group:        Write group
707  * @delay:              Delay value
708  *
709  * Apply a delay to the entire output side: DQ, DM, DQS, OCT.
710  */
711 static void scc_mgr_apply_group_all_out_delay_add(struct socfpga_sdrseq *seq,
712                                                   const u32 write_group,
713                                                   const u32 delay)
714 {
715         u32 i, new_delay;
716
717         /* DQ shift */
718         for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++)
719                 scc_mgr_load_dq(i);
720
721         /* DM shift */
722         for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++)
723                 scc_mgr_load_dm(i);
724
725         /* DQS shift */
726         new_delay = READ_SCC_DQS_IO_OUT2_DELAY + delay;
727         if (new_delay > seq->iocfg->io_out2_delay_max) {
728                 debug_cond(DLEVEL >= 1,
729                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
730                            __func__, __LINE__, write_group, delay, new_delay,
731                            seq->iocfg->io_out2_delay_max,
732                            new_delay - seq->iocfg->io_out2_delay_max);
733                 new_delay -= seq->iocfg->io_out2_delay_max;
734                 scc_mgr_set_dqs_out1_delay(seq, new_delay);
735         }
736
737         scc_mgr_load_dqs_io();
738
739         /* OCT shift */
740         new_delay = READ_SCC_OCT_OUT2_DELAY + delay;
741         if (new_delay > seq->iocfg->io_out2_delay_max) {
742                 debug_cond(DLEVEL >= 1,
743                            "%s:%d (%u, %u) DQS: %u > %d; adding %u to OUT1\n",
744                            __func__, __LINE__, write_group, delay,
745                            new_delay, seq->iocfg->io_out2_delay_max,
746                            new_delay - seq->iocfg->io_out2_delay_max);
747                 new_delay -= seq->iocfg->io_out2_delay_max;
748                 scc_mgr_set_oct_out1_delay(seq, write_group, new_delay);
749         }
750
751         scc_mgr_load_dqs_for_write_group(seq, write_group);
752 }
753
754 /**
755  * scc_mgr_apply_group_all_out_delay_add() - Apply a delay to the entire output
756  * side to all ranks
757  * @write_group:        Write group
758  * @delay:              Delay value
759  *
760  * Apply a delay to the entire output side (DQ, DM, DQS, OCT) to all ranks.
761  */
762 static void
763 scc_mgr_apply_group_all_out_delay_add_all_ranks(struct socfpga_sdrseq *seq,
764                                                 const u32 write_group,
765                                                 const u32 delay)
766 {
767         int r;
768
769         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
770              r += NUM_RANKS_PER_SHADOW_REG) {
771                 scc_mgr_apply_group_all_out_delay_add(seq, write_group, delay);
772                 writel(0, &sdr_scc_mgr->update);
773         }
774 }
775
776 /**
777  * set_jump_as_return() - Return instruction optimization
778  *
779  * Optimization used to recover some slots in ddr3 inst_rom could be
780  * applied to other protocols if we wanted to
781  */
782 static void set_jump_as_return(struct socfpga_sdrseq *seq)
783 {
784         /*
785          * To save space, we replace return with jump to special shared
786          * RETURN instruction so we set the counter to large value so that
787          * we always jump.
788          */
789         writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
790         writel(seq->rwcfg->rreturn, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
791 }
792
793 /**
794  * delay_for_n_mem_clocks() - Delay for N memory clocks
795  * @clocks:     Length of the delay
796  *
797  * Delay for N memory clocks.
798  */
799 static void delay_for_n_mem_clocks(struct socfpga_sdrseq *seq,
800                                    const u32 clocks)
801 {
802         u32 afi_clocks;
803         u16 c_loop;
804         u8 inner;
805         u8 outer;
806
807         debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);
808
809         /* Scale (rounding up) to get afi clocks. */
810         afi_clocks = DIV_ROUND_UP(clocks, seq->misccfg->afi_rate_ratio);
811         if (afi_clocks) /* Temporary underflow protection */
812                 afi_clocks--;
813
814         /*
815          * Note, we don't bother accounting for being off a little
816          * bit because of a few extra instructions in outer loops.
817          * Note, the loops have a test at the end, and do the test
818          * before the decrement, and so always perform the loop
819          * 1 time more than the counter value
820          */
821         c_loop = afi_clocks >> 16;
822         outer = c_loop ? 0xff : (afi_clocks >> 8);
823         inner = outer ? 0xff : afi_clocks;
824
825         /*
826          * rom instructions are structured as follows:
827          *
828          *    IDLE_LOOP2: jnz cntr0, TARGET_A
829          *    IDLE_LOOP1: jnz cntr1, TARGET_B
830          *                return
831          *
832          * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
833          * TARGET_B is set to IDLE_LOOP2 as well
834          *
835          * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
836          * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
837          *
838          * a little confusing, but it helps save precious space in the inst_rom
839          * and sequencer rom and keeps the delays more accurate and reduces
840          * overhead
841          */
842         if (afi_clocks < 0x100) {
843                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
844                        &sdr_rw_load_mgr_regs->load_cntr1);
845
846                 writel(seq->rwcfg->idle_loop1,
847                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
848
849                 writel(seq->rwcfg->idle_loop1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
850                                           RW_MGR_RUN_SINGLE_GROUP_OFFSET);
851         } else {
852                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
853                        &sdr_rw_load_mgr_regs->load_cntr0);
854
855                 writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
856                        &sdr_rw_load_mgr_regs->load_cntr1);
857
858                 writel(seq->rwcfg->idle_loop2,
859                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
860
861                 writel(seq->rwcfg->idle_loop2,
862                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
863
864                 do {
865                         writel(seq->rwcfg->idle_loop2,
866                                SDR_PHYGRP_RWMGRGRP_ADDRESS |
867                                RW_MGR_RUN_SINGLE_GROUP_OFFSET);
868                 } while (c_loop-- != 0);
869         }
870         debug("%s:%d clocks=%u ... end\n", __func__, __LINE__, clocks);
871 }
872
873 static void delay_for_n_ns(struct socfpga_sdrseq *seq, const u32 ns)
874 {
875         delay_for_n_mem_clocks(seq, (ns * seq->misccfg->afi_clk_freq *
876                                 seq->misccfg->afi_rate_ratio) / 1000);
877 }
878
879 /**
880  * rw_mgr_mem_init_load_regs() - Load instruction registers
881  * @cntr0:      Counter 0 value
882  * @cntr1:      Counter 1 value
883  * @cntr2:      Counter 2 value
884  * @jump:       Jump instruction value
885  *
886  * Load instruction registers.
887  */
888 static void rw_mgr_mem_init_load_regs(struct socfpga_sdrseq *seq,
889                                       u32 cntr0, u32 cntr1, u32 cntr2, u32 jump)
890 {
891         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
892                            RW_MGR_RUN_SINGLE_GROUP_OFFSET;
893
894         /* Load counters */
895         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr0),
896                &sdr_rw_load_mgr_regs->load_cntr0);
897         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr1),
898                &sdr_rw_load_mgr_regs->load_cntr1);
899         writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(cntr2),
900                &sdr_rw_load_mgr_regs->load_cntr2);
901
902         /* Load jump address */
903         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
904         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add1);
905         writel(jump, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
906
907         /* Execute count instruction */
908         writel(jump, grpaddr);
909 }
910
911 /**
912  * rw_mgr_mem_load_user_ddr2() - Load user calibration values for DDR2
913  * @handoff:    Indicate whether this is initialization or handoff phase
914  *
915  * Load user calibration values and optionally precharge the banks.
916  */
917 static void rw_mgr_mem_load_user_ddr2(struct socfpga_sdrseq *seq,
918                                       const int handoff)
919 {
920         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
921                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
922         u32 r;
923
924         for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
925                 /* set rank */
926                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
927
928                 /* precharge all banks ... */
929                 writel(seq->rwcfg->precharge_all, grpaddr);
930
931                 writel(seq->rwcfg->emr2, grpaddr);
932                 writel(seq->rwcfg->emr3, grpaddr);
933                 writel(seq->rwcfg->emr, grpaddr);
934
935                 if (handoff) {
936                         writel(seq->rwcfg->mr_user, grpaddr);
937                         continue;
938                 }
939
940                 writel(seq->rwcfg->mr_dll_reset, grpaddr);
941
942                 writel(seq->rwcfg->precharge_all, grpaddr);
943
944                 writel(seq->rwcfg->refresh, grpaddr);
945                 delay_for_n_ns(seq, 200);
946                 writel(seq->rwcfg->refresh, grpaddr);
947                 delay_for_n_ns(seq, 200);
948
949                 writel(seq->rwcfg->mr_calib, grpaddr);
950                 writel(/*seq->rwcfg->*/0x0b, grpaddr);  // EMR_OCD_ENABLE
951                 writel(seq->rwcfg->emr, grpaddr);
952                 delay_for_n_mem_clocks(seq, 200);
953         }
954 }
955
956 /**
957  * rw_mgr_mem_load_user_ddr3() - Load user calibration values
958  * @fin1:       Final instruction 1
959  * @fin2:       Final instruction 2
960  * @precharge:  If 1, precharge the banks at the end
961  *
962  * Load user calibration values and optionally precharge the banks.
963  */
964 static void rw_mgr_mem_load_user_ddr3(struct socfpga_sdrseq *seq,
965                                  const u32 fin1, const u32 fin2,
966                                  const int precharge)
967 {
968         u32 grpaddr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
969                       RW_MGR_RUN_SINGLE_GROUP_OFFSET;
970         u32 r;
971
972         for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
973                 /* set rank */
974                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
975
976                 /* precharge all banks ... */
977                 if (precharge)
978                         writel(seq->rwcfg->precharge_all, grpaddr);
979
980                 /*
981                  * USER Use Mirror-ed commands for odd ranks if address
982                  * mirrorring is on
983                  */
984                 if ((seq->rwcfg->mem_address_mirroring >> r) & 0x1) {
985                         set_jump_as_return(seq);
986                         writel(seq->rwcfg->mrs2_mirr, grpaddr);
987                         delay_for_n_mem_clocks(seq, 4);
988                         set_jump_as_return(seq);
989                         writel(seq->rwcfg->mrs3_mirr, grpaddr);
990                         delay_for_n_mem_clocks(seq, 4);
991                         set_jump_as_return(seq);
992                         writel(seq->rwcfg->mrs1_mirr, grpaddr);
993                         delay_for_n_mem_clocks(seq, 4);
994                         set_jump_as_return(seq);
995                         writel(fin1, grpaddr);
996                 } else {
997                         set_jump_as_return(seq);
998                         writel(seq->rwcfg->mrs2, grpaddr);
999                         delay_for_n_mem_clocks(seq, 4);
1000                         set_jump_as_return(seq);
1001                         writel(seq->rwcfg->mrs3, grpaddr);
1002                         delay_for_n_mem_clocks(seq, 4);
1003                         set_jump_as_return(seq);
1004                         writel(seq->rwcfg->mrs1, grpaddr);
1005                         set_jump_as_return(seq);
1006                         writel(fin2, grpaddr);
1007                 }
1008
1009                 if (precharge)
1010                         continue;
1011
1012                 set_jump_as_return(seq);
1013                 writel(seq->rwcfg->zqcl, grpaddr);
1014
1015                 /* tZQinit = tDLLK = 512 ck cycles */
1016                 delay_for_n_mem_clocks(seq, 512);
1017         }
1018 }
1019
1020 /**
1021  * rw_mgr_mem_load_user() - Load user calibration values
1022  * @fin1:       Final instruction 1
1023  * @fin2:       Final instruction 2
1024  * @precharge:  If 1, precharge the banks at the end
1025  *
1026  * Load user calibration values and optionally precharge the banks.
1027  */
1028 static void rw_mgr_mem_load_user(struct socfpga_sdrseq *seq,
1029                                  const u32 fin1, const u32 fin2,
1030                                  const int precharge)
1031 {
1032         if (dram_is_ddr(2))
1033                 rw_mgr_mem_load_user_ddr2(seq, precharge);
1034         else if (dram_is_ddr(3))
1035                 rw_mgr_mem_load_user_ddr3(seq, fin1, fin2, precharge);
1036         else
1037                 hang();
1038 }
1039 /**
1040  * rw_mgr_mem_initialize() - Initialize RW Manager
1041  *
1042  * Initialize RW Manager.
1043  */
1044 static void rw_mgr_mem_initialize(struct socfpga_sdrseq *seq)
1045 {
1046         debug("%s:%d\n", __func__, __LINE__);
1047
1048         /* The reset / cke part of initialization is broadcasted to all ranks */
1049         if (dram_is_ddr(3)) {
1050                 writel(RW_MGR_RANK_ALL, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1051                                         RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
1052         }
1053
1054         /*
1055          * Here's how you load register for a loop
1056          * Counters are located @ 0x800
1057          * Jump address are located @ 0xC00
1058          * For both, registers 0 to 3 are selected using bits 3 and 2, like
1059          * in 0x800, 0x804, 0x808, 0x80C and 0xC00, 0xC04, 0xC08, 0xC0C
1060          * I know this ain't pretty, but Avalon bus throws away the 2 least
1061          * significant bits
1062          */
1063
1064         /* Start with memory RESET activated */
1065
1066         /* tINIT = 200us */
1067
1068         /*
1069          * 200us @ 266MHz (3.75 ns) ~ 54000 clock cycles
1070          * If a and b are the number of iteration in 2 nested loops
1071          * it takes the following number of cycles to complete the operation:
1072          * number_of_cycles = ((2 + n) * a + 2) * b
1073          * where n is the number of instruction in the inner loop
1074          * One possible solution is n = 0 , a = 256 , b = 106 => a = FF,
1075          * b = 6A
1076          */
1077         rw_mgr_mem_init_load_regs(seq, seq->misccfg->tinit_cntr0_val,
1078                                   seq->misccfg->tinit_cntr1_val,
1079                                   seq->misccfg->tinit_cntr2_val,
1080                                   seq->rwcfg->init_reset_0_cke_0);
1081
1082         /* Indicate that memory is stable. */
1083         writel(1, &phy_mgr_cfg->reset_mem_stbl);
1084
1085         if (dram_is_ddr(2)) {
1086                 writel(seq->rwcfg->nop, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1087                                         RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1088
1089                 /* Bring up clock enable. */
1090
1091                 /* tXRP < 400 ck cycles */
1092                 delay_for_n_ns(seq, 400);
1093         } else if (dram_is_ddr(3)) {
1094                 /*
1095                  * transition the RESET to high
1096                  * Wait for 500us
1097                  */
1098
1099                 /*
1100                  * 500us @ 266MHz (3.75 ns) ~ 134000 clock cycles
1101                  * If a and b are the number of iteration in 2 nested loops
1102                  * it takes the following number of cycles to complete the
1103                  * operation number_of_cycles = ((2 + n) * a + 2) * b
1104                  * where n is the number of instruction in the inner loop
1105                  * One possible solution is
1106                  * n = 2 , a = 131 , b = 256 => a = 83, b = FF
1107                  */
1108                 rw_mgr_mem_init_load_regs(seq, seq->misccfg->treset_cntr0_val,
1109                                           seq->misccfg->treset_cntr1_val,
1110                                           seq->misccfg->treset_cntr2_val,
1111                                           seq->rwcfg->init_reset_1_cke_0);
1112                 /* Bring up clock enable. */
1113
1114                 /* tXRP < 250 ck cycles */
1115                 delay_for_n_mem_clocks(seq, 250);
1116         }
1117
1118         rw_mgr_mem_load_user(seq, seq->rwcfg->mrs0_dll_reset_mirr,
1119                              seq->rwcfg->mrs0_dll_reset, 0);
1120 }
1121
1122 /**
1123  * rw_mgr_mem_handoff() - Hand off the memory to user
1124  *
1125  * At the end of calibration we have to program the user settings in
1126  * and hand off the memory to the user.
1127  */
1128 static void rw_mgr_mem_handoff(struct socfpga_sdrseq *seq)
1129 {
1130         rw_mgr_mem_load_user(seq, seq->rwcfg->mrs0_user_mirr,
1131                              seq->rwcfg->mrs0_user, 1);
1132         /*
1133          * Need to wait tMOD (12CK or 15ns) time before issuing other
1134          * commands, but we will have plenty of NIOS cycles before actual
1135          * handoff so its okay.
1136          */
1137 }
1138
1139 /**
1140  * rw_mgr_mem_calibrate_write_test_issue() - Issue write test command
1141  * @group:      Write Group
1142  * @use_dm:     Use DM
1143  *
1144  * Issue write test command. Two variants are provided, one that just tests
1145  * a write pattern and another that tests datamask functionality.
1146  */
1147 static void rw_mgr_mem_calibrate_write_test_issue(struct socfpga_sdrseq *seq,
1148                                                   u32 group, u32 test_dm)
1149 {
1150         const u32 quick_write_mode =
1151                 (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES) &&
1152                 seq->misccfg->enable_super_quick_calibration;
1153         u32 mcc_instruction;
1154         u32 rw_wl_nop_cycles;
1155
1156         /*
1157          * Set counter and jump addresses for the right
1158          * number of NOP cycles.
1159          * The number of supported NOP cycles can range from -1 to infinity
1160          * Three different cases are handled:
1161          *
1162          * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
1163          *    mechanism will be used to insert the right number of NOPs
1164          *
1165          * 2. For a number of NOP cycles equals to 0, the micro-instruction
1166          *    issuing the write command will jump straight to the
1167          *    micro-instruction that turns on DQS (for DDRx), or outputs write
1168          *    data (for RLD), skipping
1169          *    the NOP micro-instruction all together
1170          *
1171          * 3. A number of NOP cycles equal to -1 indicates that DQS must be
1172          *    turned on in the same micro-instruction that issues the write
1173          *    command. Then we need
1174          *    to directly jump to the micro-instruction that sends out the data
1175          *
1176          * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
1177          *       (2 and 3). One jump-counter (0) is used to perform multiple
1178          *       write-read operations.
1179          *       one counter left to issue this command in "multiple-group" mode
1180          */
1181
1182         rw_wl_nop_cycles = seq->gbl.rw_wl_nop_cycles;
1183
1184         if (rw_wl_nop_cycles == -1) {
1185                 /*
1186                  * CNTR 2 - We want to execute the special write operation that
1187                  * turns on DQS right away and then skip directly to the
1188                  * instruction that sends out the data. We set the counter to a
1189                  * large number so that the jump is always taken.
1190                  */
1191                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1192
1193                 /* CNTR 3 - Not used */
1194                 if (test_dm) {
1195                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0_wl_1;
1196                         writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_data,
1197                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1198                         writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1199                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1200                 } else {
1201                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0_wl_1;
1202                         writel(seq->rwcfg->lfsr_wr_rd_bank_0_data,
1203                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1204                         writel(seq->rwcfg->lfsr_wr_rd_bank_0_nop,
1205                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1206                 }
1207         } else if (rw_wl_nop_cycles == 0) {
1208                 /*
1209                  * CNTR 2 - We want to skip the NOP operation and go straight
1210                  * to the DQS enable instruction. We set the counter to a large
1211                  * number so that the jump is always taken.
1212                  */
1213                 writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
1214
1215                 /* CNTR 3 - Not used */
1216                 if (test_dm) {
1217                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0;
1218                         writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_dqs,
1219                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1220                 } else {
1221                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0;
1222                         writel(seq->rwcfg->lfsr_wr_rd_bank_0_dqs,
1223                                &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1224                 }
1225         } else {
1226                 /*
1227                  * CNTR 2 - In this case we want to execute the next instruction
1228                  * and NOT take the jump. So we set the counter to 0. The jump
1229                  * address doesn't count.
1230                  */
1231                 writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
1232                 writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1233
1234                 /*
1235                  * CNTR 3 - Set the nop counter to the number of cycles we
1236                  * need to loop for, minus 1.
1237                  */
1238                 writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
1239                 if (test_dm) {
1240                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_dm_bank_0;
1241                         writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_nop,
1242                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1243                 } else {
1244                         mcc_instruction = seq->rwcfg->lfsr_wr_rd_bank_0;
1245                         writel(seq->rwcfg->lfsr_wr_rd_bank_0_nop,
1246                                &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1247                 }
1248         }
1249
1250         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1251                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1252
1253         if (quick_write_mode)
1254                 writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
1255         else
1256                 writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
1257
1258         writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1259
1260         /*
1261          * CNTR 1 - This is used to ensure enough time elapses
1262          * for read data to come back.
1263          */
1264         writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
1265
1266         if (test_dm) {
1267                 writel(seq->rwcfg->lfsr_wr_rd_dm_bank_0_wait,
1268                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1269         } else {
1270                 writel(seq->rwcfg->lfsr_wr_rd_bank_0_wait,
1271                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1272         }
1273
1274         writel(mcc_instruction, (SDR_PHYGRP_RWMGRGRP_ADDRESS |
1275                                 RW_MGR_RUN_SINGLE_GROUP_OFFSET) +
1276                                 (group << 2));
1277 }
1278
1279 /**
1280  * rw_mgr_mem_calibrate_write_test() - Test writes, check for single/multiple
1281  * pass
1282  * @rank_bgn:           Rank number
1283  * @write_group:        Write Group
1284  * @use_dm:             Use DM
1285  * @all_correct:        All bits must be correct in the mask
1286  * @bit_chk:            Resulting bit mask after the test
1287  * @all_ranks:          Test all ranks
1288  *
1289  * Test writes, can check for a single bit pass or multiple bit pass.
1290  */
1291 static int
1292 rw_mgr_mem_calibrate_write_test(struct socfpga_sdrseq *seq,
1293                                 const u32 rank_bgn, const u32 write_group,
1294                                 const u32 use_dm, const u32 all_correct,
1295                                 u32 *bit_chk, const u32 all_ranks)
1296 {
1297         const u32 rank_end = all_ranks ?
1298                                 seq->rwcfg->mem_number_of_ranks :
1299                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1300         const u32 shift_ratio = seq->rwcfg->mem_dq_per_write_dqs /
1301                                 seq->rwcfg->mem_virtual_groups_per_write_dqs;
1302         const u32 correct_mask_vg = seq->param.write_correct_mask_vg;
1303
1304         u32 tmp_bit_chk, base_rw_mgr, group;
1305         int vg, r;
1306
1307         *bit_chk = seq->param.write_correct_mask;
1308
1309         for (r = rank_bgn; r < rank_end; r++) {
1310                 /* Set rank */
1311                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1312
1313                 tmp_bit_chk = 0;
1314                 for (vg = seq->rwcfg->mem_virtual_groups_per_write_dqs - 1;
1315                      vg >= 0; vg--) {
1316                         /* Reset the FIFOs to get pointers to known state. */
1317                         writel(0, &phy_mgr_cmd->fifo_reset);
1318
1319                         group = write_group *
1320                                 seq->rwcfg->mem_virtual_groups_per_write_dqs
1321                                 + vg;
1322                         rw_mgr_mem_calibrate_write_test_issue(seq, group,
1323                                                               use_dm);
1324
1325                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1326                         tmp_bit_chk <<= shift_ratio;
1327                         tmp_bit_chk |= (correct_mask_vg & ~(base_rw_mgr));
1328                 }
1329
1330                 *bit_chk &= tmp_bit_chk;
1331         }
1332
1333         set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1334         if (all_correct) {
1335                 debug_cond(DLEVEL >= 2,
1336                            "write_test(%u,%u,ALL) : %u == %u => %i\n",
1337                            write_group, use_dm, *bit_chk,
1338                            seq->param.write_correct_mask,
1339                            *bit_chk == seq->param.write_correct_mask);
1340                 return *bit_chk == seq->param.write_correct_mask;
1341         } else {
1342                 debug_cond(DLEVEL >= 2,
1343                            "write_test(%u,%u,ONE) : %u != %i => %i\n",
1344                            write_group, use_dm, *bit_chk, 0, *bit_chk != 0);
1345                 return *bit_chk != 0x00;
1346         }
1347 }
1348
1349 /**
1350  * rw_mgr_mem_calibrate_read_test_patterns() - Read back test patterns
1351  * @rank_bgn:   Rank number
1352  * @group:      Read/Write Group
1353  * @all_ranks:  Test all ranks
1354  *
1355  * Performs a guaranteed read on the patterns we are going to use during a
1356  * read test to ensure memory works.
1357  */
1358 static int
1359 rw_mgr_mem_calibrate_read_test_patterns(struct socfpga_sdrseq *seq,
1360                                         const u32 rank_bgn, const u32 group,
1361                                         const u32 all_ranks)
1362 {
1363         const u32 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1364                          RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1365         const u32 addr_offset =
1366                          (group * seq->rwcfg->mem_virtual_groups_per_read_dqs)
1367                          << 2;
1368         const u32 rank_end = all_ranks ?
1369                                 seq->rwcfg->mem_number_of_ranks :
1370                                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1371         const u32 shift_ratio = seq->rwcfg->mem_dq_per_read_dqs /
1372                                 seq->rwcfg->mem_virtual_groups_per_read_dqs;
1373         const u32 correct_mask_vg = seq->param.read_correct_mask_vg;
1374
1375         u32 tmp_bit_chk, base_rw_mgr, bit_chk;
1376         int vg, r;
1377         int ret = 0;
1378
1379         bit_chk = seq->param.read_correct_mask;
1380
1381         for (r = rank_bgn; r < rank_end; r++) {
1382                 /* Set rank */
1383                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1384
1385                 /* Load up a constant bursts of read commands */
1386                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1387                 writel(seq->rwcfg->guaranteed_read,
1388                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1389
1390                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1391                 writel(seq->rwcfg->guaranteed_read_cont,
1392                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1393
1394                 tmp_bit_chk = 0;
1395                 for (vg = seq->rwcfg->mem_virtual_groups_per_read_dqs - 1;
1396                      vg >= 0; vg--) {
1397                         /* Reset the FIFOs to get pointers to known state. */
1398                         writel(0, &phy_mgr_cmd->fifo_reset);
1399                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1400                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1401                         writel(seq->rwcfg->guaranteed_read,
1402                                addr + addr_offset + (vg << 2));
1403
1404                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1405                         tmp_bit_chk <<= shift_ratio;
1406                         tmp_bit_chk |= correct_mask_vg & ~base_rw_mgr;
1407                 }
1408
1409                 bit_chk &= tmp_bit_chk;
1410         }
1411
1412         writel(seq->rwcfg->clear_dqs_enable, addr + (group << 2));
1413
1414         set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1415
1416         if (bit_chk != seq->param.read_correct_mask)
1417                 ret = -EIO;
1418
1419         debug_cond(DLEVEL >= 1,
1420                    "%s:%d test_load_patterns(%u,ALL) => (%u == %u) => %i\n",
1421                    __func__, __LINE__, group, bit_chk,
1422                    seq->param.read_correct_mask, ret);
1423
1424         return ret;
1425 }
1426
1427 /**
1428  * rw_mgr_mem_calibrate_read_load_patterns() - Load up the patterns for read
1429  * test
1430  * @rank_bgn:   Rank number
1431  * @all_ranks:  Test all ranks
1432  *
1433  * Load up the patterns we are going to use during a read test.
1434  */
1435 static void rw_mgr_mem_calibrate_read_load_patterns(struct socfpga_sdrseq *seq,
1436                                                     const u32 rank_bgn,
1437                                                     const int all_ranks)
1438 {
1439         const u32 rank_end = all_ranks ?
1440                         seq->rwcfg->mem_number_of_ranks :
1441                         (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1442         u32 r;
1443
1444         debug("%s:%d\n", __func__, __LINE__);
1445
1446         for (r = rank_bgn; r < rank_end; r++) {
1447                 /* set rank */
1448                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1449
1450                 /* Load up a constant bursts */
1451                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr0);
1452
1453                 writel(seq->rwcfg->guaranteed_write_wait0,
1454                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1455
1456                 writel(0x20, &sdr_rw_load_mgr_regs->load_cntr1);
1457
1458                 writel(seq->rwcfg->guaranteed_write_wait1,
1459                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1460
1461                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr2);
1462
1463                 writel(seq->rwcfg->guaranteed_write_wait2,
1464                        &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1465
1466                 writel(0x04, &sdr_rw_load_mgr_regs->load_cntr3);
1467
1468                 writel(seq->rwcfg->guaranteed_write_wait3,
1469                        &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1470
1471                 writel(seq->rwcfg->guaranteed_write,
1472                        SDR_PHYGRP_RWMGRGRP_ADDRESS |
1473                        RW_MGR_RUN_SINGLE_GROUP_OFFSET);
1474         }
1475
1476         set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1477 }
1478
1479 /**
1480  * rw_mgr_mem_calibrate_read_test() - Perform READ test on single rank
1481  * @rank_bgn:           Rank number
1482  * @group:              Read/Write group
1483  * @num_tries:          Number of retries of the test
1484  * @all_correct:        All bits must be correct in the mask
1485  * @bit_chk:            Resulting bit mask after the test
1486  * @all_groups:         Test all R/W groups
1487  * @all_ranks:          Test all ranks
1488  *
1489  * Try a read and see if it returns correct data back. Test has dummy reads
1490  * inserted into the mix used to align DQS enable. Test has more thorough
1491  * checks than the regular read test.
1492  */
1493 static int
1494 rw_mgr_mem_calibrate_read_test(struct socfpga_sdrseq *seq,
1495                                const u32 rank_bgn, const u32 group,
1496                                const u32 num_tries, const u32 all_correct,
1497                                u32 *bit_chk,
1498                                const u32 all_groups, const u32 all_ranks)
1499 {
1500         const u32 rank_end = all_ranks ? seq->rwcfg->mem_number_of_ranks :
1501                 (rank_bgn + NUM_RANKS_PER_SHADOW_REG);
1502         const u32 quick_read_mode =
1503                 ((STATIC_CALIB_STEPS & CALIB_SKIP_DELAY_SWEEPS) &&
1504                  seq->misccfg->enable_super_quick_calibration);
1505         u32 correct_mask_vg = seq->param.read_correct_mask_vg;
1506         u32 tmp_bit_chk;
1507         u32 base_rw_mgr;
1508         u32 addr;
1509
1510         int r, vg, ret;
1511
1512         *bit_chk = seq->param.read_correct_mask;
1513
1514         for (r = rank_bgn; r < rank_end; r++) {
1515                 /* set rank */
1516                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_READ_WRITE);
1517
1518                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr1);
1519
1520                 writel(seq->rwcfg->read_b2b_wait1,
1521                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
1522
1523                 writel(0x10, &sdr_rw_load_mgr_regs->load_cntr2);
1524                 writel(seq->rwcfg->read_b2b_wait2,
1525                        &sdr_rw_load_jump_mgr_regs->load_jump_add2);
1526
1527                 if (quick_read_mode)
1528                         writel(0x1, &sdr_rw_load_mgr_regs->load_cntr0);
1529                         /* need at least two (1+1) reads to capture failures */
1530                 else if (all_groups)
1531                         writel(0x06, &sdr_rw_load_mgr_regs->load_cntr0);
1532                 else
1533                         writel(0x32, &sdr_rw_load_mgr_regs->load_cntr0);
1534
1535                 writel(seq->rwcfg->read_b2b,
1536                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
1537                 if (all_groups)
1538                         writel(seq->rwcfg->mem_if_read_dqs_width *
1539                                seq->rwcfg->mem_virtual_groups_per_read_dqs - 1,
1540                                &sdr_rw_load_mgr_regs->load_cntr3);
1541                 else
1542                         writel(0x0, &sdr_rw_load_mgr_regs->load_cntr3);
1543
1544                 writel(seq->rwcfg->read_b2b,
1545                        &sdr_rw_load_jump_mgr_regs->load_jump_add3);
1546
1547                 tmp_bit_chk = 0;
1548                 for (vg = seq->rwcfg->mem_virtual_groups_per_read_dqs - 1;
1549                      vg >= 0; vg--) {
1550                         /* Reset the FIFOs to get pointers to known state. */
1551                         writel(0, &phy_mgr_cmd->fifo_reset);
1552                         writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
1553                                   RW_MGR_RESET_READ_DATAPATH_OFFSET);
1554
1555                         if (all_groups) {
1556                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1557                                        RW_MGR_RUN_ALL_GROUPS_OFFSET;
1558                         } else {
1559                                 addr = SDR_PHYGRP_RWMGRGRP_ADDRESS |
1560                                        RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1561                         }
1562
1563                         writel(seq->rwcfg->read_b2b, addr +
1564                                ((group *
1565                                  seq->rwcfg->mem_virtual_groups_per_read_dqs +
1566                                  vg) << 2));
1567
1568                         base_rw_mgr = readl(SDR_PHYGRP_RWMGRGRP_ADDRESS);
1569                         tmp_bit_chk <<=
1570                                 seq->rwcfg->mem_dq_per_read_dqs /
1571                                 seq->rwcfg->mem_virtual_groups_per_read_dqs;
1572                         tmp_bit_chk |= correct_mask_vg & ~(base_rw_mgr);
1573                 }
1574
1575                 *bit_chk &= tmp_bit_chk;
1576         }
1577
1578         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
1579         writel(seq->rwcfg->clear_dqs_enable, addr + (group << 2));
1580
1581         set_rank_and_odt_mask(seq, 0, RW_MGR_ODT_MODE_OFF);
1582
1583         if (all_correct) {
1584                 ret = (*bit_chk == seq->param.read_correct_mask);
1585                 debug_cond(DLEVEL >= 2,
1586                            "%s:%d read_test(%u,ALL,%u) => (%u == %u) => %i\n",
1587                            __func__, __LINE__, group, all_groups, *bit_chk,
1588                            seq->param.read_correct_mask, ret);
1589         } else  {
1590                 ret = (*bit_chk != 0x00);
1591                 debug_cond(DLEVEL >= 2,
1592                            "%s:%d read_test(%u,ONE,%u) => (%u != %u) => %i\n",
1593                            __func__, __LINE__, group, all_groups, *bit_chk,
1594                            0, ret);
1595         }
1596
1597         return ret;
1598 }
1599
1600 /**
1601  * rw_mgr_mem_calibrate_read_test_all_ranks() - Perform READ test on all ranks
1602  * @grp:                Read/Write group
1603  * @num_tries:          Number of retries of the test
1604  * @all_correct:        All bits must be correct in the mask
1605  * @all_groups:         Test all R/W groups
1606  *
1607  * Perform a READ test across all memory ranks.
1608  */
1609 static int
1610 rw_mgr_mem_calibrate_read_test_all_ranks(struct socfpga_sdrseq *seq,
1611                                          const u32 grp, const u32 num_tries,
1612                                          const u32 all_correct,
1613                                          const u32 all_groups)
1614 {
1615         u32 bit_chk;
1616         return rw_mgr_mem_calibrate_read_test(seq, 0, grp, num_tries,
1617                                               all_correct, &bit_chk, all_groups,
1618                                               1);
1619 }
1620
1621 /**
1622  * rw_mgr_incr_vfifo() - Increase VFIFO value
1623  * @grp:        Read/Write group
1624  *
1625  * Increase VFIFO value.
1626  */
1627 static void rw_mgr_incr_vfifo(const u32 grp)
1628 {
1629         writel(grp, &phy_mgr_cmd->inc_vfifo_hard_phy);
1630 }
1631
1632 /**
1633  * rw_mgr_decr_vfifo() - Decrease VFIFO value
1634  * @grp:        Read/Write group
1635  *
1636  * Decrease VFIFO value.
1637  */
1638 static void rw_mgr_decr_vfifo(struct socfpga_sdrseq *seq, const u32 grp)
1639 {
1640         u32 i;
1641
1642         for (i = 0; i < seq->misccfg->read_valid_fifo_size - 1; i++)
1643                 rw_mgr_incr_vfifo(grp);
1644 }
1645
1646 /**
1647  * find_vfifo_failing_read() - Push VFIFO to get a failing read
1648  * @grp:        Read/Write group
1649  *
1650  * Push VFIFO until a failing read happens.
1651  */
1652 static int find_vfifo_failing_read(struct socfpga_sdrseq *seq,
1653                                    const u32 grp)
1654 {
1655         u32 v, ret, fail_cnt = 0;
1656
1657         for (v = 0; v < seq->misccfg->read_valid_fifo_size; v++) {
1658                 debug_cond(DLEVEL >= 2, "%s:%d: vfifo %u\n",
1659                            __func__, __LINE__, v);
1660                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1661                                                                PASS_ONE_BIT, 0);
1662                 if (!ret) {
1663                         fail_cnt++;
1664
1665                         if (fail_cnt == 2)
1666                                 return v;
1667                 }
1668
1669                 /* Fiddle with FIFO. */
1670                 rw_mgr_incr_vfifo(grp);
1671         }
1672
1673         /* No failing read found! Something must have gone wrong. */
1674         debug_cond(DLEVEL >= 2, "%s:%d: vfifo failed\n", __func__, __LINE__);
1675         return 0;
1676 }
1677
1678 /**
1679  * sdr_find_phase_delay() - Find DQS enable phase or delay
1680  * @working:    If 1, look for working phase/delay, if 0, look for non-working
1681  * @delay:      If 1, look for delay, if 0, look for phase
1682  * @grp:        Read/Write group
1683  * @work:       Working window position
1684  * @work_inc:   Working window increment
1685  * @pd:         DQS Phase/Delay Iterator
1686  *
1687  * Find working or non-working DQS enable phase setting.
1688  */
1689 static int sdr_find_phase_delay(struct socfpga_sdrseq *seq, int working,
1690                                 int delay, const u32 grp, u32 *work,
1691                                 const u32 work_inc, u32 *pd)
1692 {
1693         const u32 max = delay ? seq->iocfg->dqs_en_delay_max :
1694                                 seq->iocfg->dqs_en_phase_max;
1695         u32 ret;
1696
1697         for (; *pd <= max; (*pd)++) {
1698                 if (delay)
1699                         scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, *pd);
1700                 else
1701                         scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, *pd);
1702
1703                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1704                                                                PASS_ONE_BIT, 0);
1705                 if (!working)
1706                         ret = !ret;
1707
1708                 if (ret)
1709                         return 0;
1710
1711                 if (work)
1712                         *work += work_inc;
1713         }
1714
1715         return -EINVAL;
1716 }
1717 /**
1718  * sdr_find_phase() - Find DQS enable phase
1719  * @working:    If 1, look for working phase, if 0, look for non-working phase
1720  * @grp:        Read/Write group
1721  * @work:       Working window position
1722  * @i:          Iterator
1723  * @p:          DQS Phase Iterator
1724  *
1725  * Find working or non-working DQS enable phase setting.
1726  */
1727 static int sdr_find_phase(struct socfpga_sdrseq *seq, int working,
1728                           const u32 grp, u32 *work, u32 *i, u32 *p)
1729 {
1730         const u32 end = seq->misccfg->read_valid_fifo_size + (working ? 0 : 1);
1731         int ret;
1732
1733         for (; *i < end; (*i)++) {
1734                 if (working)
1735                         *p = 0;
1736
1737                 ret = sdr_find_phase_delay(seq, working, 0, grp, work,
1738                                            seq->iocfg->delay_per_opa_tap, p);
1739                 if (!ret)
1740                         return 0;
1741
1742                 if (*p > seq->iocfg->dqs_en_phase_max) {
1743                         /* Fiddle with FIFO. */
1744                         rw_mgr_incr_vfifo(grp);
1745                         if (!working)
1746                                 *p = 0;
1747                 }
1748         }
1749
1750         return -EINVAL;
1751 }
1752
1753 /**
1754  * sdr_working_phase() - Find working DQS enable phase
1755  * @grp:        Read/Write group
1756  * @work_bgn:   Working window start position
1757  * @d:          dtaps output value
1758  * @p:          DQS Phase Iterator
1759  * @i:          Iterator
1760  *
1761  * Find working DQS enable phase setting.
1762  */
1763 static int sdr_working_phase(struct socfpga_sdrseq *seq, const u32 grp,
1764                              u32 *work_bgn, u32 *d, u32 *p, u32 *i)
1765 {
1766         const u32 dtaps_per_ptap = seq->iocfg->delay_per_opa_tap /
1767                                    seq->iocfg->delay_per_dqs_en_dchain_tap;
1768         int ret;
1769
1770         *work_bgn = 0;
1771
1772         for (*d = 0; *d <= dtaps_per_ptap; (*d)++) {
1773                 *i = 0;
1774                 scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, *d);
1775                 ret = sdr_find_phase(seq, 1, grp, work_bgn, i, p);
1776                 if (!ret)
1777                         return 0;
1778                 *work_bgn += seq->iocfg->delay_per_dqs_en_dchain_tap;
1779         }
1780
1781         /* Cannot find working solution */
1782         debug_cond(DLEVEL >= 2, "%s:%d find_dqs_en_phase: no vfifo/ptap/dtap\n",
1783                    __func__, __LINE__);
1784         return -EINVAL;
1785 }
1786
1787 /**
1788  * sdr_backup_phase() - Find DQS enable backup phase
1789  * @grp:        Read/Write group
1790  * @work_bgn:   Working window start position
1791  * @p:          DQS Phase Iterator
1792  *
1793  * Find DQS enable backup phase setting.
1794  */
1795 static void sdr_backup_phase(struct socfpga_sdrseq *seq, const u32 grp,
1796                              u32 *work_bgn, u32 *p)
1797 {
1798         u32 tmp_delay, d;
1799         int ret;
1800
1801         /* Special case code for backing up a phase */
1802         if (*p == 0) {
1803                 *p = seq->iocfg->dqs_en_phase_max;
1804                 rw_mgr_decr_vfifo(seq, grp);
1805         } else {
1806                 (*p)--;
1807         }
1808         tmp_delay = *work_bgn - seq->iocfg->delay_per_opa_tap;
1809         scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, *p);
1810
1811         for (d = 0; d <= seq->iocfg->dqs_en_delay_max && tmp_delay < *work_bgn;
1812              d++) {
1813                 scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, d);
1814
1815                 ret = rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1816                                                                PASS_ONE_BIT, 0);
1817                 if (ret) {
1818                         *work_bgn = tmp_delay;
1819                         break;
1820                 }
1821
1822                 tmp_delay += seq->iocfg->delay_per_dqs_en_dchain_tap;
1823         }
1824
1825         /* Restore VFIFO to old state before we decremented it (if needed). */
1826         (*p)++;
1827         if (*p > seq->iocfg->dqs_en_phase_max) {
1828                 *p = 0;
1829                 rw_mgr_incr_vfifo(grp);
1830         }
1831
1832         scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, 0);
1833 }
1834
1835 /**
1836  * sdr_nonworking_phase() - Find non-working DQS enable phase
1837  * @grp:        Read/Write group
1838  * @work_end:   Working window end position
1839  * @p:          DQS Phase Iterator
1840  * @i:          Iterator
1841  *
1842  * Find non-working DQS enable phase setting.
1843  */
1844 static int sdr_nonworking_phase(struct socfpga_sdrseq *seq,
1845                                 const u32 grp, u32 *work_end, u32 *p, u32 *i)
1846 {
1847         int ret;
1848
1849         (*p)++;
1850         *work_end += seq->iocfg->delay_per_opa_tap;
1851         if (*p > seq->iocfg->dqs_en_phase_max) {
1852                 /* Fiddle with FIFO. */
1853                 *p = 0;
1854                 rw_mgr_incr_vfifo(grp);
1855         }
1856
1857         ret = sdr_find_phase(seq, 0, grp, work_end, i, p);
1858         if (ret) {
1859                 /* Cannot see edge of failing read. */
1860                 debug_cond(DLEVEL >= 2, "%s:%d: end: failed\n",
1861                            __func__, __LINE__);
1862         }
1863
1864         return ret;
1865 }
1866
1867 /**
1868  * sdr_find_window_center() - Find center of the working DQS window.
1869  * @grp:        Read/Write group
1870  * @work_bgn:   First working settings
1871  * @work_end:   Last working settings
1872  *
1873  * Find center of the working DQS enable window.
1874  */
1875 static int sdr_find_window_center(struct socfpga_sdrseq *seq,
1876                                   const u32 grp, const u32 work_bgn,
1877                                   const u32 work_end)
1878 {
1879         u32 work_mid;
1880         int tmp_delay = 0;
1881         int i, p, d;
1882
1883         work_mid = (work_bgn + work_end) / 2;
1884
1885         debug_cond(DLEVEL >= 2, "work_bgn=%d work_end=%d work_mid=%d\n",
1886                    work_bgn, work_end, work_mid);
1887         /* Get the middle delay to be less than a VFIFO delay */
1888         tmp_delay = (seq->iocfg->dqs_en_phase_max + 1)
1889                 * seq->iocfg->delay_per_opa_tap;
1890
1891         debug_cond(DLEVEL >= 2, "vfifo ptap delay %d\n", tmp_delay);
1892         work_mid %= tmp_delay;
1893         debug_cond(DLEVEL >= 2, "new work_mid %d\n", work_mid);
1894
1895         tmp_delay = rounddown(work_mid, seq->iocfg->delay_per_opa_tap);
1896         if (tmp_delay > seq->iocfg->dqs_en_phase_max
1897                 * seq->iocfg->delay_per_opa_tap) {
1898                 tmp_delay = seq->iocfg->dqs_en_phase_max
1899                         * seq->iocfg->delay_per_opa_tap;
1900         }
1901         p = tmp_delay / seq->iocfg->delay_per_opa_tap;
1902
1903         debug_cond(DLEVEL >= 2, "new p %d, tmp_delay=%d\n", p, tmp_delay);
1904
1905         d = DIV_ROUND_UP(work_mid - tmp_delay,
1906                          seq->iocfg->delay_per_dqs_en_dchain_tap);
1907         if (d > seq->iocfg->dqs_en_delay_max)
1908                 d = seq->iocfg->dqs_en_delay_max;
1909         tmp_delay += d * seq->iocfg->delay_per_dqs_en_dchain_tap;
1910
1911         debug_cond(DLEVEL >= 2, "new d %d, tmp_delay=%d\n", d, tmp_delay);
1912
1913         scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
1914         scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, d);
1915
1916         /*
1917          * push vfifo until we can successfully calibrate. We can do this
1918          * because the largest possible margin in 1 VFIFO cycle.
1919          */
1920         for (i = 0; i < seq->misccfg->read_valid_fifo_size; i++) {
1921                 debug_cond(DLEVEL >= 2, "find_dqs_en_phase: center\n");
1922                 if (rw_mgr_mem_calibrate_read_test_all_ranks(seq, grp, 1,
1923                                                              PASS_ONE_BIT,
1924                                                              0)) {
1925                         debug_cond(DLEVEL >= 2,
1926                                    "%s:%d center: found: ptap=%u dtap=%u\n",
1927                                    __func__, __LINE__, p, d);
1928                         return 0;
1929                 }
1930
1931                 /* Fiddle with FIFO. */
1932                 rw_mgr_incr_vfifo(grp);
1933         }
1934
1935         debug_cond(DLEVEL >= 2, "%s:%d center: failed.\n",
1936                    __func__, __LINE__);
1937         return -EINVAL;
1938 }
1939
1940 /**
1941  * rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase() - Find a good DQS enable to
1942  * use
1943  * @grp:        Read/Write Group
1944  *
1945  * Find a good DQS enable to use.
1946  */
1947 static int
1948 rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(struct socfpga_sdrseq *seq,
1949                                              const u32 grp)
1950 {
1951         u32 d, p, i;
1952         u32 dtaps_per_ptap;
1953         u32 work_bgn, work_end;
1954         u32 found_passing_read, found_failing_read = 0, initial_failing_dtap;
1955         int ret;
1956
1957         debug("%s:%d %u\n", __func__, __LINE__, grp);
1958
1959         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
1960
1961         scc_mgr_set_dqs_en_delay_all_ranks(seq, grp, 0);
1962         scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, 0);
1963
1964         /* Step 0: Determine number of delay taps for each phase tap. */
1965         dtaps_per_ptap = seq->iocfg->delay_per_opa_tap /
1966                          seq->iocfg->delay_per_dqs_en_dchain_tap;
1967
1968         /* Step 1: First push vfifo until we get a failing read. */
1969         find_vfifo_failing_read(seq, grp);
1970
1971         /* Step 2: Find first working phase, increment in ptaps. */
1972         work_bgn = 0;
1973         ret = sdr_working_phase(seq, grp, &work_bgn, &d, &p, &i);
1974         if (ret)
1975                 return ret;
1976
1977         work_end = work_bgn;
1978
1979         /*
1980          * If d is 0 then the working window covers a phase tap and we can
1981          * follow the old procedure. Otherwise, we've found the beginning
1982          * and we need to increment the dtaps until we find the end.
1983          */
1984         if (d == 0) {
1985                 /*
1986                  * Step 3a: If we have room, back off by one and
1987                  *          increment in dtaps.
1988                  */
1989                 sdr_backup_phase(seq, grp, &work_bgn, &p);
1990
1991                 /*
1992                  * Step 4a: go forward from working phase to non working
1993                  * phase, increment in ptaps.
1994                  */
1995                 ret = sdr_nonworking_phase(seq, grp, &work_end, &p, &i);
1996                 if (ret)
1997                         return ret;
1998
1999                 /* Step 5a: Back off one from last, increment in dtaps. */
2000
2001                 /* Special case code for backing up a phase */
2002                 if (p == 0) {
2003                         p = seq->iocfg->dqs_en_phase_max;
2004                         rw_mgr_decr_vfifo(seq, grp);
2005                 } else {
2006                         p = p - 1;
2007                 }
2008
2009                 work_end -= seq->iocfg->delay_per_opa_tap;
2010                 scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
2011
2012                 d = 0;
2013
2014                 debug_cond(DLEVEL >= 2, "%s:%d p: ptap=%u\n",
2015                            __func__, __LINE__, p);
2016         }
2017
2018         /* The dtap increment to find the failing edge is done here. */
2019         sdr_find_phase_delay(seq, 0, 1, grp, &work_end,
2020                              seq->iocfg->delay_per_dqs_en_dchain_tap, &d);
2021
2022         /* Go back to working dtap */
2023         if (d != 0)
2024                 work_end -= seq->iocfg->delay_per_dqs_en_dchain_tap;
2025
2026         debug_cond(DLEVEL >= 2,
2027                    "%s:%d p/d: ptap=%u dtap=%u end=%u\n",
2028                    __func__, __LINE__, p, d - 1, work_end);
2029
2030         if (work_end < work_bgn) {
2031                 /* nil range */
2032                 debug_cond(DLEVEL >= 2, "%s:%d end-2: failed\n",
2033                            __func__, __LINE__);
2034                 return -EINVAL;
2035         }
2036
2037         debug_cond(DLEVEL >= 2, "%s:%d found range [%u,%u]\n",
2038                    __func__, __LINE__, work_bgn, work_end);
2039
2040         /*
2041          * We need to calculate the number of dtaps that equal a ptap.
2042          * To do that we'll back up a ptap and re-find the edge of the
2043          * window using dtaps
2044          */
2045         debug_cond(DLEVEL >= 2, "%s:%d calculate dtaps_per_ptap for tracking\n",
2046                    __func__, __LINE__);
2047
2048         /* Special case code for backing up a phase */
2049         if (p == 0) {
2050                 p = seq->iocfg->dqs_en_phase_max;
2051                 rw_mgr_decr_vfifo(seq, grp);
2052                 debug_cond(DLEVEL >= 2, "%s:%d backedup cycle/phase: p=%u\n",
2053                            __func__, __LINE__, p);
2054         } else {
2055                 p = p - 1;
2056                 debug_cond(DLEVEL >= 2, "%s:%d backedup phase only: p=%u",
2057                            __func__, __LINE__, p);
2058         }
2059
2060         scc_mgr_set_dqs_en_phase_all_ranks(seq, grp, p);
2061
2062         /*
2063          * Increase dtap until we first see a passing read (in case the
2064          * window is smaller than a ptap), and then a failing read to
2065          * mark the edge of the window again.
2066          */
2067
2068         /* Find a passing read. */
2069         debug_cond(DLEVEL >= 2, "%s:%d find passing read\n",
2070                    __func__, __LINE__);
2071
2072         initial_failing_dtap = d;
2073
2074         found_passing_read = !sdr_find_phase_delay(seq, 1, 1, grp, NULL, 0, &d);
2075         if (found_passing_read) {
2076                 /* Find a failing read. */
2077                 debug_cond(DLEVEL >= 2, "%s:%d find failing read\n",
2078                            __func__, __LINE__);
2079                 d++;
2080                 found_failing_read = !sdr_find_phase_delay(seq, 0, 1, grp, NULL,
2081                                                            0, &d);
2082         } else {
2083                 debug_cond(DLEVEL >= 1,
2084                            "%s:%d failed to calculate dtaps per ptap. Fall back on static value\n",
2085                            __func__, __LINE__);
2086         }
2087
2088         /*
2089          * The dynamically calculated dtaps_per_ptap is only valid if we
2090          * found a passing/failing read. If we didn't, it means d hit the max
2091          * (seq->iocfg->dqs_en_delay_max). Otherwise, dtaps_per_ptap retains its
2092          * statically calculated value.
2093          */
2094         if (found_passing_read && found_failing_read)
2095                 dtaps_per_ptap = d - initial_failing_dtap;
2096
2097         writel(dtaps_per_ptap, &sdr_reg_file->dtaps_per_ptap);
2098         debug_cond(DLEVEL >= 2, "%s:%d dtaps_per_ptap=%u - %u = %u",
2099                    __func__, __LINE__, d, initial_failing_dtap, dtaps_per_ptap);
2100
2101         /* Step 6: Find the centre of the window. */
2102         ret = sdr_find_window_center(seq, grp, work_bgn, work_end);
2103
2104         return ret;
2105 }
2106
2107 /**
2108  * search_stop_check() - Check if the detected edge is valid
2109  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2110  * @d:                  DQS delay
2111  * @rank_bgn:           Rank number
2112  * @write_group:        Write Group
2113  * @read_group:         Read Group
2114  * @bit_chk:            Resulting bit mask after the test
2115  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2116  * @use_read_test:      Perform read test
2117  *
2118  * Test if the found edge is valid.
2119  */
2120 static u32 search_stop_check(struct socfpga_sdrseq *seq, const int write,
2121                              const int d, const int rank_bgn,
2122                              const u32 write_group, const u32 read_group,
2123                              u32 *bit_chk, u32 *sticky_bit_chk,
2124                              const u32 use_read_test)
2125 {
2126         const u32 ratio = seq->rwcfg->mem_if_read_dqs_width /
2127                           seq->rwcfg->mem_if_write_dqs_width;
2128         const u32 correct_mask = write ? seq->param.write_correct_mask :
2129                                          seq->param.read_correct_mask;
2130         const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2131                                     seq->rwcfg->mem_dq_per_read_dqs;
2132         u32 ret;
2133         /*
2134          * Stop searching when the read test doesn't pass AND when
2135          * we've seen a passing read on every bit.
2136          */
2137         if (write) {                    /* WRITE-ONLY */
2138                 ret = !rw_mgr_mem_calibrate_write_test(seq, rank_bgn,
2139                                                          write_group, 0,
2140                                                          PASS_ONE_BIT, bit_chk,
2141                                                          0);
2142         } else if (use_read_test) {     /* READ-ONLY */
2143                 ret = !rw_mgr_mem_calibrate_read_test(seq, rank_bgn, read_group,
2144                                                         NUM_READ_PB_TESTS,
2145                                                         PASS_ONE_BIT, bit_chk,
2146                                                         0, 0);
2147         } else {                        /* READ-ONLY */
2148                 rw_mgr_mem_calibrate_write_test(seq, rank_bgn, write_group, 0,
2149                                                 PASS_ONE_BIT, bit_chk, 0);
2150                 *bit_chk = *bit_chk >> (per_dqs *
2151                         (read_group - (write_group * ratio)));
2152                 ret = (*bit_chk == 0);
2153         }
2154         *sticky_bit_chk = *sticky_bit_chk | *bit_chk;
2155         ret = ret && (*sticky_bit_chk == correct_mask);
2156         debug_cond(DLEVEL >= 2,
2157                    "%s:%d center(left): dtap=%u => %u == %u && %u",
2158                    __func__, __LINE__, d,
2159                    *sticky_bit_chk, correct_mask, ret);
2160         return ret;
2161 }
2162
2163 /**
2164  * search_left_edge() - Find left edge of DQ/DQS working phase
2165  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2166  * @rank_bgn:           Rank number
2167  * @write_group:        Write Group
2168  * @read_group:         Read Group
2169  * @test_bgn:           Rank number to begin the test
2170  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2171  * @left_edge:          Left edge of the DQ/DQS phase
2172  * @right_edge:         Right edge of the DQ/DQS phase
2173  * @use_read_test:      Perform read test
2174  *
2175  * Find left edge of DQ/DQS working phase.
2176  */
2177 static void search_left_edge(struct socfpga_sdrseq *seq, const int write,
2178                              const int rank_bgn, const u32 write_group,
2179                              const u32 read_group, const u32 test_bgn,
2180                              u32 *sticky_bit_chk, int *left_edge,
2181                              int *right_edge, const u32 use_read_test)
2182 {
2183         const u32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2184                                       seq->iocfg->io_in_delay_max;
2185         const u32 dqs_max = write ? seq->iocfg->io_out1_delay_max :
2186                                     seq->iocfg->dqs_in_delay_max;
2187         const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2188                                     seq->rwcfg->mem_dq_per_read_dqs;
2189         u32 stop, bit_chk;
2190         int i, d;
2191
2192         for (d = 0; d <= dqs_max; d++) {
2193                 if (write)
2194                         scc_mgr_apply_group_dq_out1_delay(seq, d);
2195                 else
2196                         scc_mgr_apply_group_dq_in_delay(seq, test_bgn, d);
2197
2198                 writel(0, &sdr_scc_mgr->update);
2199
2200                 stop = search_stop_check(seq, write, d, rank_bgn, write_group,
2201                                          read_group, &bit_chk, sticky_bit_chk,
2202                                          use_read_test);
2203                 if (stop == 1)
2204                         break;
2205
2206                 /* stop != 1 */
2207                 for (i = 0; i < per_dqs; i++) {
2208                         if (bit_chk & 1) {
2209                                 /*
2210                                  * Remember a passing test as
2211                                  * the left_edge.
2212                                  */
2213                                 left_edge[i] = d;
2214                         } else {
2215                                 /*
2216                                  * If a left edge has not been seen
2217                                  * yet, then a future passing test
2218                                  * will mark this edge as the right
2219                                  * edge.
2220                                  */
2221                                 if (left_edge[i] == delay_max + 1)
2222                                         right_edge[i] = -(d + 1);
2223                         }
2224                         bit_chk >>= 1;
2225                 }
2226         }
2227
2228         /* Reset DQ delay chains to 0 */
2229         if (write)
2230                 scc_mgr_apply_group_dq_out1_delay(seq, 0);
2231         else
2232                 scc_mgr_apply_group_dq_in_delay(seq, test_bgn, 0);
2233
2234         *sticky_bit_chk = 0;
2235         for (i = per_dqs - 1; i >= 0; i--) {
2236                 debug_cond(DLEVEL >= 2,
2237                            "%s:%d vfifo_center: left_edge[%u]: %d right_edge[%u]: %d\n",
2238                            __func__, __LINE__, i, left_edge[i],
2239                            i, right_edge[i]);
2240
2241                 /*
2242                  * Check for cases where we haven't found the left edge,
2243                  * which makes our assignment of the the right edge invalid.
2244                  * Reset it to the illegal value.
2245                  */
2246                 if ((left_edge[i] == delay_max + 1) &&
2247                     (right_edge[i] != delay_max + 1)) {
2248                         right_edge[i] = delay_max + 1;
2249                         debug_cond(DLEVEL >= 2,
2250                                    "%s:%d vfifo_center: reset right_edge[%u]: %d\n",
2251                                    __func__, __LINE__, i, right_edge[i]);
2252                 }
2253
2254                 /*
2255                  * Reset sticky bit
2256                  * READ: except for bits where we have seen both
2257                  *       the left and right edge.
2258                  * WRITE: except for bits where we have seen the
2259                  *        left edge.
2260                  */
2261                 *sticky_bit_chk <<= 1;
2262                 if (write) {
2263                         if (left_edge[i] != delay_max + 1)
2264                                 *sticky_bit_chk |= 1;
2265                 } else {
2266                         if ((left_edge[i] != delay_max + 1) &&
2267                             (right_edge[i] != delay_max + 1))
2268                                 *sticky_bit_chk |= 1;
2269                 }
2270         }
2271 }
2272
2273 /**
2274  * search_right_edge() - Find right edge of DQ/DQS working phase
2275  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2276  * @rank_bgn:           Rank number
2277  * @write_group:        Write Group
2278  * @read_group:         Read Group
2279  * @start_dqs:          DQS start phase
2280  * @start_dqs_en:       DQS enable start phase
2281  * @sticky_bit_chk:     Resulting sticky bit mask after the test
2282  * @left_edge:          Left edge of the DQ/DQS phase
2283  * @right_edge:         Right edge of the DQ/DQS phase
2284  * @use_read_test:      Perform read test
2285  *
2286  * Find right edge of DQ/DQS working phase.
2287  */
2288 static int search_right_edge(struct socfpga_sdrseq *seq, const int write,
2289                              const int rank_bgn, const u32 write_group,
2290                              const u32 read_group, const int start_dqs,
2291                              const int start_dqs_en, u32 *sticky_bit_chk,
2292                              int *left_edge, int *right_edge,
2293                              const u32 use_read_test)
2294 {
2295         const u32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2296                                       seq->iocfg->io_in_delay_max;
2297         const u32 dqs_max = write ? seq->iocfg->io_out1_delay_max :
2298                                     seq->iocfg->dqs_in_delay_max;
2299         const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2300                                     seq->rwcfg->mem_dq_per_read_dqs;
2301         u32 stop, bit_chk;
2302         int i, d;
2303
2304         for (d = 0; d <= dqs_max - start_dqs; d++) {
2305                 if (write) {    /* WRITE-ONLY */
2306                         scc_mgr_apply_group_dqs_io_and_oct_out1(seq,
2307                                                                 write_group,
2308                                                                 d + start_dqs);
2309                 } else {        /* READ-ONLY */
2310                         scc_mgr_set_dqs_bus_in_delay(read_group, d + start_dqs);
2311                         if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2312                                 u32 delay = d + start_dqs_en;
2313                                 if (delay > seq->iocfg->dqs_en_delay_max)
2314                                         delay = seq->iocfg->dqs_en_delay_max;
2315                                 scc_mgr_set_dqs_en_delay(read_group, delay);
2316                         }
2317                         scc_mgr_load_dqs(read_group);
2318                 }
2319
2320                 writel(0, &sdr_scc_mgr->update);
2321
2322                 stop = search_stop_check(seq, write, d, rank_bgn, write_group,
2323                                          read_group, &bit_chk, sticky_bit_chk,
2324                                          use_read_test);
2325                 if (stop == 1) {
2326                         if (write && (d == 0)) {        /* WRITE-ONLY */
2327                                 for (i = 0;
2328                                      i < seq->rwcfg->mem_dq_per_write_dqs;
2329                                      i++) {
2330                                         /*
2331                                          * d = 0 failed, but it passed when
2332                                          * testing the left edge, so it must be
2333                                          * marginal, set it to -1
2334                                          */
2335                                         if (right_edge[i] == delay_max + 1 &&
2336                                             left_edge[i] != delay_max + 1)
2337                                                 right_edge[i] = -1;
2338                                 }
2339                         }
2340                         break;
2341                 }
2342
2343                 /* stop != 1 */
2344                 for (i = 0; i < per_dqs; i++) {
2345                         if (bit_chk & 1) {
2346                                 /*
2347                                  * Remember a passing test as
2348                                  * the right_edge.
2349                                  */
2350                                 right_edge[i] = d;
2351                         } else {
2352                                 if (d != 0) {
2353                                         /*
2354                                          * If a right edge has not
2355                                          * been seen yet, then a future
2356                                          * passing test will mark this
2357                                          * edge as the left edge.
2358                                          */
2359                                         if (right_edge[i] == delay_max + 1)
2360                                                 left_edge[i] = -(d + 1);
2361                                 } else {
2362                                         /*
2363                                          * d = 0 failed, but it passed
2364                                          * when testing the left edge,
2365                                          * so it must be marginal, set
2366                                          * it to -1
2367                                          */
2368                                         if (right_edge[i] == delay_max + 1 &&
2369                                             left_edge[i] != delay_max + 1)
2370                                                 right_edge[i] = -1;
2371                                         /*
2372                                          * If a right edge has not been
2373                                          * seen yet, then a future
2374                                          * passing test will mark this
2375                                          * edge as the left edge.
2376                                          */
2377                                         else if (right_edge[i] == delay_max + 1)
2378                                                 left_edge[i] = -(d + 1);
2379                                 }
2380                         }
2381
2382                         debug_cond(DLEVEL >= 2, "%s:%d center[r,d=%u]: ",
2383                                    __func__, __LINE__, d);
2384                         debug_cond(DLEVEL >= 2,
2385                                    "bit_chk_test=%i left_edge[%u]: %d ",
2386                                    bit_chk & 1, i, left_edge[i]);
2387                         debug_cond(DLEVEL >= 2, "right_edge[%u]: %d\n", i,
2388                                    right_edge[i]);
2389                         bit_chk >>= 1;
2390                 }
2391         }
2392
2393         /* Check that all bits have a window */
2394         for (i = 0; i < per_dqs; i++) {
2395                 debug_cond(DLEVEL >= 2,
2396                            "%s:%d write_center: left_edge[%u]: %d right_edge[%u]: %d",
2397                            __func__, __LINE__, i, left_edge[i],
2398                            i, right_edge[i]);
2399                 if ((left_edge[i] == dqs_max + 1) ||
2400                     (right_edge[i] == dqs_max + 1))
2401                         return i + 1;   /* FIXME: If we fail, retval > 0 */
2402         }
2403
2404         return 0;
2405 }
2406
2407 /**
2408  * get_window_mid_index() - Find the best middle setting of DQ/DQS phase
2409  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2410  * @left_edge:          Left edge of the DQ/DQS phase
2411  * @right_edge:         Right edge of the DQ/DQS phase
2412  * @mid_min:            Best DQ/DQS phase middle setting
2413  *
2414  * Find index and value of the middle of the DQ/DQS working phase.
2415  */
2416 static int get_window_mid_index(struct socfpga_sdrseq *seq,
2417                                 const int write, int *left_edge,
2418                                 int *right_edge, int *mid_min)
2419 {
2420         const u32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2421                                     seq->rwcfg->mem_dq_per_read_dqs;
2422         int i, mid, min_index;
2423
2424         /* Find middle of window for each DQ bit */
2425         *mid_min = left_edge[0] - right_edge[0];
2426         min_index = 0;
2427         for (i = 1; i < per_dqs; i++) {
2428                 mid = left_edge[i] - right_edge[i];
2429                 if (mid < *mid_min) {
2430                         *mid_min = mid;
2431                         min_index = i;
2432                 }
2433         }
2434
2435         /*
2436          * -mid_min/2 represents the amount that we need to move DQS.
2437          * If mid_min is odd and positive we'll need to add one to make
2438          * sure the rounding in further calculations is correct (always
2439          * bias to the right), so just add 1 for all positive values.
2440          */
2441         if (*mid_min > 0)
2442                 (*mid_min)++;
2443         *mid_min = *mid_min / 2;
2444
2445         debug_cond(DLEVEL >= 1, "%s:%d vfifo_center: *mid_min=%d (index=%u)\n",
2446                    __func__, __LINE__, *mid_min, min_index);
2447         return min_index;
2448 }
2449
2450 /**
2451  * center_dq_windows() - Center the DQ/DQS windows
2452  * @write:              Perform read (Stage 2) or write (Stage 3) calibration
2453  * @left_edge:          Left edge of the DQ/DQS phase
2454  * @right_edge:         Right edge of the DQ/DQS phase
2455  * @mid_min:            Adjusted DQ/DQS phase middle setting
2456  * @orig_mid_min:       Original DQ/DQS phase middle setting
2457  * @min_index:          DQ/DQS phase middle setting index
2458  * @test_bgn:           Rank number to begin the test
2459  * @dq_margin:          Amount of shift for the DQ
2460  * @dqs_margin:         Amount of shift for the DQS
2461  *
2462  * Align the DQ/DQS windows in each group.
2463  */
2464 static void center_dq_windows(struct socfpga_sdrseq *seq,
2465                               const int write, int *left_edge, int *right_edge,
2466                               const int mid_min, const int orig_mid_min,
2467                               const int min_index, const int test_bgn,
2468                               int *dq_margin, int *dqs_margin)
2469 {
2470         const s32 delay_max = write ? seq->iocfg->io_out1_delay_max :
2471                                       seq->iocfg->io_in_delay_max;
2472         const s32 per_dqs = write ? seq->rwcfg->mem_dq_per_write_dqs :
2473                                     seq->rwcfg->mem_dq_per_read_dqs;
2474         const s32 delay_off = write ? SCC_MGR_IO_OUT1_DELAY_OFFSET :
2475                                       SCC_MGR_IO_IN_DELAY_OFFSET;
2476         const s32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | delay_off;
2477
2478         s32 temp_dq_io_delay1;
2479         int shift_dq, i, p;
2480
2481         /* Initialize data for export structures */
2482         *dqs_margin = delay_max + 1;
2483         *dq_margin  = delay_max + 1;
2484
2485         /* add delay to bring centre of all DQ windows to the same "level" */
2486         for (i = 0, p = test_bgn; i < per_dqs; i++, p++) {
2487                 /* Use values before divide by 2 to reduce round off error */
2488                 shift_dq = (left_edge[i] - right_edge[i] -
2489                         (left_edge[min_index] - right_edge[min_index]))/2  +
2490                         (orig_mid_min - mid_min);
2491
2492                 debug_cond(DLEVEL >= 2,
2493                            "vfifo_center: before: shift_dq[%u]=%d\n",
2494                            i, shift_dq);
2495
2496                 temp_dq_io_delay1 = readl(addr + (i << 2));
2497
2498                 if (shift_dq + temp_dq_io_delay1 > delay_max)
2499                         shift_dq = delay_max - temp_dq_io_delay1;
2500                 else if (shift_dq + temp_dq_io_delay1 < 0)
2501                         shift_dq = -temp_dq_io_delay1;
2502
2503                 debug_cond(DLEVEL >= 2,
2504                            "vfifo_center: after: shift_dq[%u]=%d\n",
2505                            i, shift_dq);
2506
2507                 if (write)
2508                         scc_mgr_set_dq_out1_delay(i,
2509                                                   temp_dq_io_delay1 + shift_dq);
2510                 else
2511                         scc_mgr_set_dq_in_delay(p,
2512                                                 temp_dq_io_delay1 + shift_dq);
2513
2514                 scc_mgr_load_dq(p);
2515
2516                 debug_cond(DLEVEL >= 2,
2517                            "vfifo_center: margin[%u]=[%d,%d]\n", i,
2518                            left_edge[i] - shift_dq + (-mid_min),
2519                            right_edge[i] + shift_dq - (-mid_min));
2520
2521                 /* To determine values for export structures */
2522                 if (left_edge[i] - shift_dq + (-mid_min) < *dq_margin)
2523                         *dq_margin = left_edge[i] - shift_dq + (-mid_min);
2524
2525                 if (right_edge[i] + shift_dq - (-mid_min) < *dqs_margin)
2526                         *dqs_margin = right_edge[i] + shift_dq - (-mid_min);
2527         }
2528 }
2529
2530 /**
2531  * rw_mgr_mem_calibrate_vfifo_center() - Per-bit deskew DQ and centering
2532  * @rank_bgn:           Rank number
2533  * @rw_group:           Read/Write Group
2534  * @test_bgn:           Rank at which the test begins
2535  * @use_read_test:      Perform a read test
2536  * @update_fom:         Update FOM
2537  *
2538  * Per-bit deskew DQ and centering.
2539  */
2540 static int rw_mgr_mem_calibrate_vfifo_center(struct socfpga_sdrseq *seq,
2541                                              const u32 rank_bgn,
2542                                              const u32 rw_group,
2543                                              const u32 test_bgn,
2544                                              const int use_read_test,
2545                                              const int update_fom)
2546 {
2547         const u32 addr =
2548                 SDR_PHYGRP_SCCGRP_ADDRESS + SCC_MGR_DQS_IN_DELAY_OFFSET +
2549                 (rw_group << 2);
2550         /*
2551          * Store these as signed since there are comparisons with
2552          * signed numbers.
2553          */
2554         u32 sticky_bit_chk;
2555         s32 left_edge[seq->rwcfg->mem_dq_per_read_dqs];
2556         s32 right_edge[seq->rwcfg->mem_dq_per_read_dqs];
2557         s32 orig_mid_min, mid_min;
2558         s32 new_dqs, start_dqs, start_dqs_en = 0, final_dqs_en;
2559         s32 dq_margin, dqs_margin;
2560         int i, min_index;
2561         int ret;
2562
2563         debug("%s:%d: %u %u", __func__, __LINE__, rw_group, test_bgn);
2564
2565         start_dqs = readl(addr);
2566         if (seq->iocfg->shift_dqs_en_when_shift_dqs)
2567                 start_dqs_en = readl(addr - seq->iocfg->dqs_en_delay_offset);
2568
2569         /* set the left and right edge of each bit to an illegal value */
2570         /* use (seq->iocfg->io_in_delay_max + 1) as an illegal value */
2571         sticky_bit_chk = 0;
2572         for (i = 0; i < seq->rwcfg->mem_dq_per_read_dqs; i++) {
2573                 left_edge[i]  = seq->iocfg->io_in_delay_max + 1;
2574                 right_edge[i] = seq->iocfg->io_in_delay_max + 1;
2575         }
2576
2577         /* Search for the left edge of the window for each bit */
2578         search_left_edge(seq, 0, rank_bgn, rw_group, rw_group, test_bgn,
2579                          &sticky_bit_chk,
2580                          left_edge, right_edge, use_read_test);
2581
2582
2583         /* Search for the right edge of the window for each bit */
2584         ret = search_right_edge(seq, 0, rank_bgn, rw_group, rw_group,
2585                                 start_dqs, start_dqs_en,
2586                                 &sticky_bit_chk,
2587                                 left_edge, right_edge, use_read_test);
2588         if (ret) {
2589                 /*
2590                  * Restore delay chain settings before letting the loop
2591                  * in rw_mgr_mem_calibrate_vfifo to retry different
2592                  * dqs/ck relationships.
2593                  */
2594                 scc_mgr_set_dqs_bus_in_delay(rw_group, start_dqs);
2595                 if (seq->iocfg->shift_dqs_en_when_shift_dqs)
2596                         scc_mgr_set_dqs_en_delay(rw_group, start_dqs_en);
2597
2598                 scc_mgr_load_dqs(rw_group);
2599                 writel(0, &sdr_scc_mgr->update);
2600
2601                 debug_cond(DLEVEL >= 1,
2602                            "%s:%d vfifo_center: failed to find edge [%u]: %d %d",
2603                            __func__, __LINE__, i, left_edge[i], right_edge[i]);
2604                 if (use_read_test) {
2605                         set_failing_group_stage(seq, rw_group *
2606                                 seq->rwcfg->mem_dq_per_read_dqs + i,
2607                                 CAL_STAGE_VFIFO,
2608                                 CAL_SUBSTAGE_VFIFO_CENTER);
2609                 } else {
2610                         set_failing_group_stage(seq, rw_group *
2611                                 seq->rwcfg->mem_dq_per_read_dqs + i,
2612                                 CAL_STAGE_VFIFO_AFTER_WRITES,
2613                                 CAL_SUBSTAGE_VFIFO_CENTER);
2614                 }
2615                 return -EIO;
2616         }
2617
2618         min_index = get_window_mid_index(seq, 0, left_edge, right_edge,
2619                                          &mid_min);
2620
2621         /* Determine the amount we can change DQS (which is -mid_min) */
2622         orig_mid_min = mid_min;
2623         new_dqs = start_dqs - mid_min;
2624         if (new_dqs > seq->iocfg->dqs_in_delay_max)
2625                 new_dqs = seq->iocfg->dqs_in_delay_max;
2626         else if (new_dqs < 0)
2627                 new_dqs = 0;
2628
2629         mid_min = start_dqs - new_dqs;
2630         debug_cond(DLEVEL >= 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
2631                    mid_min, new_dqs);
2632
2633         if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2634                 if (start_dqs_en - mid_min > seq->iocfg->dqs_en_delay_max)
2635                         mid_min += start_dqs_en - mid_min -
2636                                    seq->iocfg->dqs_en_delay_max;
2637                 else if (start_dqs_en - mid_min < 0)
2638                         mid_min += start_dqs_en - mid_min;
2639         }
2640         new_dqs = start_dqs - mid_min;
2641
2642         debug_cond(DLEVEL >= 1,
2643                    "vfifo_center: start_dqs=%d start_dqs_en=%d new_dqs=%d mid_min=%d\n",
2644                    start_dqs,
2645                    seq->iocfg->shift_dqs_en_when_shift_dqs ? start_dqs_en : -1,
2646                    new_dqs, mid_min);
2647
2648         /* Add delay to bring centre of all DQ windows to the same "level". */
2649         center_dq_windows(seq, 0, left_edge, right_edge, mid_min, orig_mid_min,
2650                           min_index, test_bgn, &dq_margin, &dqs_margin);
2651
2652         /* Move DQS-en */
2653         if (seq->iocfg->shift_dqs_en_when_shift_dqs) {
2654                 final_dqs_en = start_dqs_en - mid_min;
2655                 scc_mgr_set_dqs_en_delay(rw_group, final_dqs_en);
2656                 scc_mgr_load_dqs(rw_group);
2657         }
2658
2659         /* Move DQS */
2660         scc_mgr_set_dqs_bus_in_delay(rw_group, new_dqs);
2661         scc_mgr_load_dqs(rw_group);
2662         debug_cond(DLEVEL >= 2,
2663                    "%s:%d vfifo_center: dq_margin=%d dqs_margin=%d",
2664                    __func__, __LINE__, dq_margin, dqs_margin);
2665
2666         /*
2667          * Do not remove this line as it makes sure all of our decisions
2668          * have been applied. Apply the update bit.
2669          */
2670         writel(0, &sdr_scc_mgr->update);
2671
2672         if ((dq_margin < 0) || (dqs_margin < 0))
2673                 return -EINVAL;
2674
2675         return 0;
2676 }
2677
2678 /**
2679  * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the
2680  * device
2681  * @rw_group:   Read/Write Group
2682  * @phase:      DQ/DQS phase
2683  *
2684  * Because initially no communication ca be reliably performed with the memory
2685  * device, the sequencer uses a guaranteed write mechanism to write data into
2686  * the memory device.
2687  */
2688 static int rw_mgr_mem_calibrate_guaranteed_write(struct socfpga_sdrseq *seq,
2689                                                  const u32 rw_group,
2690                                                  const u32 phase)
2691 {
2692         int ret;
2693
2694         /* Set a particular DQ/DQS phase. */
2695         scc_mgr_set_dqdqs_output_phase_all_ranks(seq, rw_group, phase);
2696
2697         debug_cond(DLEVEL >= 1, "%s:%d guaranteed write: g=%u p=%u\n",
2698                    __func__, __LINE__, rw_group, phase);
2699
2700         /*
2701          * Altera EMI_RM 2015.05.04 :: Figure 1-25
2702          * Load up the patterns used by read calibration using the
2703          * current DQDQS phase.
2704          */
2705         rw_mgr_mem_calibrate_read_load_patterns(seq, 0, 1);
2706
2707         if (seq->gbl.phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
2708                 return 0;
2709
2710         /*
2711          * Altera EMI_RM 2015.05.04 :: Figure 1-26
2712          * Back-to-Back reads of the patterns used for calibration.
2713          */
2714         ret = rw_mgr_mem_calibrate_read_test_patterns(seq, 0, rw_group, 1);
2715         if (ret)
2716                 debug_cond(DLEVEL >= 1,
2717                            "%s:%d Guaranteed read test failed: g=%u p=%u\n",
2718                            __func__, __LINE__, rw_group, phase);
2719         return ret;
2720 }
2721
2722 /**
2723  * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
2724  * @rw_group:   Read/Write Group
2725  * @test_bgn:   Rank at which the test begins
2726  *
2727  * DQS enable calibration ensures reliable capture of the DQ signal without
2728  * glitches on the DQS line.
2729  */
2730 static int
2731 rw_mgr_mem_calibrate_dqs_enable_calibration(struct socfpga_sdrseq *seq,
2732                                             const u32 rw_group,
2733                                             const u32 test_bgn)
2734 {
2735         /*
2736          * Altera EMI_RM 2015.05.04 :: Figure 1-27
2737          * DQS and DQS Eanble Signal Relationships.
2738          */
2739
2740         /* We start at zero, so have one less dq to devide among */
2741         const u32 delay_step = seq->iocfg->io_in_delay_max /
2742                                (seq->rwcfg->mem_dq_per_read_dqs - 1);
2743         int ret;
2744         u32 i, p, d, r;
2745
2746         debug("%s:%d (%u,%u)\n", __func__, __LINE__, rw_group, test_bgn);
2747
2748         /* Try different dq_in_delays since the DQ path is shorter than DQS. */
2749         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
2750              r += NUM_RANKS_PER_SHADOW_REG) {
2751                 for (i = 0, p = test_bgn, d = 0;
2752                      i < seq->rwcfg->mem_dq_per_read_dqs;
2753                      i++, p++, d += delay_step) {
2754                         debug_cond(DLEVEL >= 1,
2755                                    "%s:%d: g=%u r=%u i=%u p=%u d=%u\n",
2756                                    __func__, __LINE__, rw_group, r, i, p, d);
2757
2758                         scc_mgr_set_dq_in_delay(p, d);
2759                         scc_mgr_load_dq(p);
2760                 }
2761
2762                 writel(0, &sdr_scc_mgr->update);
2763         }
2764
2765         /*
2766          * Try rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase across different
2767          * dq_in_delay values
2768          */
2769         ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase(seq, rw_group);
2770
2771         debug_cond(DLEVEL >= 1,
2772                    "%s:%d: g=%u found=%u; Reseting delay chain to zero\n",
2773                    __func__, __LINE__, rw_group, !ret);
2774
2775         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
2776              r += NUM_RANKS_PER_SHADOW_REG) {
2777                 scc_mgr_apply_group_dq_in_delay(seq, test_bgn, 0);
2778                 writel(0, &sdr_scc_mgr->update);
2779         }
2780
2781         return ret;
2782 }
2783
2784 /**
2785  * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
2786  * @rw_group:           Read/Write Group
2787  * @test_bgn:           Rank at which the test begins
2788  * @use_read_test:      Perform a read test
2789  * @update_fom:         Update FOM
2790  *
2791  * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
2792  * within a group.
2793  */
2794 static int
2795 rw_mgr_mem_calibrate_dq_dqs_centering(struct socfpga_sdrseq *seq,
2796                                       const u32 rw_group, const u32 test_bgn,
2797                                       const int use_read_test,
2798                                       const int update_fom)
2799
2800 {
2801         int ret, grp_calibrated;
2802         u32 rank_bgn, sr;
2803
2804         /*
2805          * Altera EMI_RM 2015.05.04 :: Figure 1-28
2806          * Read per-bit deskew can be done on a per shadow register basis.
2807          */
2808         grp_calibrated = 1;
2809         for (rank_bgn = 0, sr = 0;
2810              rank_bgn < seq->rwcfg->mem_number_of_ranks;
2811              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
2812                 ret = rw_mgr_mem_calibrate_vfifo_center(seq, rank_bgn, rw_group,
2813                                                         test_bgn,
2814                                                         use_read_test,
2815                                                         update_fom);
2816                 if (!ret)
2817                         continue;
2818
2819                 grp_calibrated = 0;
2820         }
2821
2822         if (!grp_calibrated)
2823                 return -EIO;
2824
2825         return 0;
2826 }
2827
2828 /**
2829  * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
2830  * @rw_group:           Read/Write Group
2831  * @test_bgn:           Rank at which the test begins
2832  *
2833  * Stage 1: Calibrate the read valid prediction FIFO.
2834  *
2835  * This function implements UniPHY calibration Stage 1, as explained in
2836  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2837  *
2838  * - read valid prediction will consist of finding:
2839  *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
2840  *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
2841  *  - we also do a per-bit deskew on the DQ lines.
2842  */
2843 static int rw_mgr_mem_calibrate_vfifo(struct socfpga_sdrseq *seq,
2844                                       const u32 rw_group, const u32 test_bgn)
2845 {
2846         u32 p, d;
2847         u32 dtaps_per_ptap;
2848         u32 failed_substage;
2849
2850         int ret;
2851
2852         debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
2853
2854         /* Update info for sims */
2855         reg_file_set_group(rw_group);
2856         reg_file_set_stage(CAL_STAGE_VFIFO);
2857         reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
2858
2859         failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
2860
2861         /* USER Determine number of delay taps for each phase tap. */
2862         dtaps_per_ptap = DIV_ROUND_UP(seq->iocfg->delay_per_opa_tap,
2863                                       seq->iocfg->delay_per_dqs_en_dchain_tap)
2864                                       - 1;
2865
2866         for (d = 0; d <= dtaps_per_ptap; d += 2) {
2867                 /*
2868                  * In RLDRAMX we may be messing the delay of pins in
2869                  * the same write rw_group but outside of the current read
2870                  * the rw_group, but that's ok because we haven't calibrated
2871                  * output side yet.
2872                  */
2873                 if (d > 0) {
2874                         scc_mgr_apply_group_all_out_delay_add_all_ranks(seq,
2875                                                                         rw_group,
2876                                                                         d);
2877                 }
2878
2879                 for (p = 0; p <= seq->iocfg->dqdqs_out_phase_max; p++) {
2880                         /* 1) Guaranteed Write */
2881                         ret = rw_mgr_mem_calibrate_guaranteed_write(seq,
2882                                                                     rw_group,
2883                                                                     p);
2884                         if (ret)
2885                                 break;
2886
2887                         /* 2) DQS Enable Calibration */
2888                         ret = rw_mgr_mem_calibrate_dqs_enable_calibration(seq,
2889                                                                           rw_group,
2890                                                                           test_bgn);
2891                         if (ret) {
2892                                 failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
2893                                 continue;
2894                         }
2895
2896                         /* 3) Centering DQ/DQS */
2897                         /*
2898                          * If doing read after write calibration, do not update
2899                          * FOM now. Do it then.
2900                          */
2901                         ret = rw_mgr_mem_calibrate_dq_dqs_centering(seq,
2902                                                                     rw_group,
2903                                                                     test_bgn,
2904                                                                     1, 0);
2905                         if (ret) {
2906                                 failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
2907                                 continue;
2908                         }
2909
2910                         /* All done. */
2911                         goto cal_done_ok;
2912                 }
2913         }
2914
2915         /* Calibration Stage 1 failed. */
2916         set_failing_group_stage(seq, rw_group, CAL_STAGE_VFIFO,
2917                                 failed_substage);
2918         return 0;
2919
2920         /* Calibration Stage 1 completed OK. */
2921 cal_done_ok:
2922         /*
2923          * Reset the delay chains back to zero if they have moved > 1
2924          * (check for > 1 because loop will increase d even when pass in
2925          * first case).
2926          */
2927         if (d > 2)
2928                 scc_mgr_zero_group(seq, rw_group, 1);
2929
2930         return 1;
2931 }
2932
2933 /**
2934  * rw_mgr_mem_calibrate_vfifo_end() - DQ/DQS Centering.
2935  * @rw_group:           Read/Write Group
2936  * @test_bgn:           Rank at which the test begins
2937  *
2938  * Stage 3: DQ/DQS Centering.
2939  *
2940  * This function implements UniPHY calibration Stage 3, as explained in
2941  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2942  */
2943 static int rw_mgr_mem_calibrate_vfifo_end(struct socfpga_sdrseq *seq,
2944                                           const u32 rw_group,
2945                                           const u32 test_bgn)
2946 {
2947         int ret;
2948
2949         debug("%s:%d %u %u", __func__, __LINE__, rw_group, test_bgn);
2950
2951         /* Update info for sims. */
2952         reg_file_set_group(rw_group);
2953         reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
2954         reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
2955
2956         ret = rw_mgr_mem_calibrate_dq_dqs_centering(seq, rw_group, test_bgn, 0,
2957                                                     1);
2958         if (ret)
2959                 set_failing_group_stage(seq, rw_group,
2960                                         CAL_STAGE_VFIFO_AFTER_WRITES,
2961                                         CAL_SUBSTAGE_VFIFO_CENTER);
2962         return ret;
2963 }
2964
2965 /**
2966  * rw_mgr_mem_calibrate_lfifo() - Minimize latency
2967  *
2968  * Stage 4: Minimize latency.
2969  *
2970  * This function implements UniPHY calibration Stage 4, as explained in
2971  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
2972  * Calibrate LFIFO to find smallest read latency.
2973  */
2974 static u32 rw_mgr_mem_calibrate_lfifo(struct socfpga_sdrseq *seq)
2975 {
2976         int found_one = 0;
2977
2978         debug("%s:%d\n", __func__, __LINE__);
2979
2980         /* Update info for sims. */
2981         reg_file_set_stage(CAL_STAGE_LFIFO);
2982         reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
2983
2984         /* Load up the patterns used by read calibration for all ranks */
2985         rw_mgr_mem_calibrate_read_load_patterns(seq, 0, 1);
2986
2987         do {
2988                 writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
2989                 debug_cond(DLEVEL >= 2, "%s:%d lfifo: read_lat=%u",
2990                            __func__, __LINE__, seq->gbl.curr_read_lat);
2991
2992                 if (!rw_mgr_mem_calibrate_read_test_all_ranks(seq, 0,
2993                                                               NUM_READ_TESTS,
2994                                                               PASS_ALL_BITS, 1))
2995                         break;
2996
2997                 found_one = 1;
2998                 /*
2999                  * Reduce read latency and see if things are
3000                  * working correctly.
3001                  */
3002                 seq->gbl.curr_read_lat--;
3003         } while (seq->gbl.curr_read_lat > 0);
3004
3005         /* Reset the fifos to get pointers to known state. */
3006         writel(0, &phy_mgr_cmd->fifo_reset);
3007
3008         if (found_one) {
3009                 /* Add a fudge factor to the read latency that was determined */
3010                 seq->gbl.curr_read_lat += 2;
3011                 writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3012                 debug_cond(DLEVEL >= 2,
3013                            "%s:%d lfifo: success: using read_lat=%u\n",
3014                            __func__, __LINE__, seq->gbl.curr_read_lat);
3015         } else {
3016                 set_failing_group_stage(seq, 0xff, CAL_STAGE_LFIFO,
3017                                         CAL_SUBSTAGE_READ_LATENCY);
3018
3019                 debug_cond(DLEVEL >= 2,
3020                            "%s:%d lfifo: failed at initial read_lat=%u\n",
3021                            __func__, __LINE__, seq->gbl.curr_read_lat);
3022         }
3023
3024         return found_one;
3025 }
3026
3027 /**
3028  * search_window() - Search for the/part of the window with DM/DQS shift
3029  * @search_dm:          If 1, search for the DM shift, if 0, search for DQS
3030  *                      shift
3031  * @rank_bgn:           Rank number
3032  * @write_group:        Write Group
3033  * @bgn_curr:           Current window begin
3034  * @end_curr:           Current window end
3035  * @bgn_best:           Current best window begin
3036  * @end_best:           Current best window end
3037  * @win_best:           Size of the best window
3038  * @new_dqs:            New DQS value (only applicable if search_dm = 0).
3039  *
3040  * Search for the/part of the window with DM/DQS shift.
3041  */
3042 static void search_window(struct socfpga_sdrseq *seq,
3043                           const int search_dm, const u32 rank_bgn,
3044                           const u32 write_group, int *bgn_curr, int *end_curr,
3045                           int *bgn_best, int *end_best, int *win_best,
3046                           int new_dqs)
3047 {
3048         u32 bit_chk;
3049         const int max = seq->iocfg->io_out1_delay_max - new_dqs;
3050         int d, di;
3051
3052         /* Search for the/part of the window with DM/DQS shift. */
3053         for (di = max; di >= 0; di -= DELTA_D) {
3054                 if (search_dm) {
3055                         d = di;
3056                         scc_mgr_apply_group_dm_out1_delay(seq, d);
3057                 } else {
3058                         /* For DQS, we go from 0...max */
3059                         d = max - di;
3060                         /*
3061                          * Note: This only shifts DQS, so are we limiting
3062                          *       ourselves to width of DQ unnecessarily.
3063                          */
3064                         scc_mgr_apply_group_dqs_io_and_oct_out1(seq,
3065                                                                 write_group,
3066                                                                 d + new_dqs);
3067                 }
3068
3069                 writel(0, &sdr_scc_mgr->update);
3070
3071                 if (rw_mgr_mem_calibrate_write_test(seq, rank_bgn, write_group,
3072                                                     1, PASS_ALL_BITS, &bit_chk,
3073                                                     0)) {
3074                         /* Set current end of the window. */
3075                         *end_curr = search_dm ? -d : d;
3076
3077                         /*
3078                          * If a starting edge of our window has not been seen
3079                          * this is our current start of the DM window.
3080                          */
3081                         if (*bgn_curr == seq->iocfg->io_out1_delay_max + 1)
3082                                 *bgn_curr = search_dm ? -d : d;
3083
3084                         /*
3085                          * If current window is bigger than best seen.
3086                          * Set best seen to be current window.
3087                          */
3088                         if ((*end_curr - *bgn_curr + 1) > *win_best) {
3089                                 *win_best = *end_curr - *bgn_curr + 1;
3090                                 *bgn_best = *bgn_curr;
3091                                 *end_best = *end_curr;
3092                         }
3093                 } else {
3094                         /* We just saw a failing test. Reset temp edge. */
3095                         *bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3096                         *end_curr = seq->iocfg->io_out1_delay_max + 1;
3097
3098                         /* Early exit is only applicable to DQS. */
3099                         if (search_dm)
3100                                 continue;
3101
3102                         /*
3103                          * Early exit optimization: if the remaining delay
3104                          * chain space is less than already seen largest
3105                          * window we can exit.
3106                          */
3107                         if (*win_best - 1 > seq->iocfg->io_out1_delay_max
3108                                 - new_dqs - d)
3109                                 break;
3110                 }
3111         }
3112 }
3113
3114 /*
3115  * rw_mgr_mem_calibrate_writes_center() - Center all windows
3116  * @rank_bgn:           Rank number
3117  * @write_group:        Write group
3118  * @test_bgn:           Rank at which the test begins
3119  *
3120  * Center all windows. Do per-bit-deskew to possibly increase size of
3121  * certain windows.
3122  */
3123 static int
3124 rw_mgr_mem_calibrate_writes_center(struct socfpga_sdrseq *seq,
3125                                    const u32 rank_bgn, const u32 write_group,
3126                                    const u32 test_bgn)
3127 {
3128         int i;
3129         u32 sticky_bit_chk;
3130         u32 min_index;
3131         int left_edge[seq->rwcfg->mem_dq_per_write_dqs];
3132         int right_edge[seq->rwcfg->mem_dq_per_write_dqs];
3133         int mid;
3134         int mid_min, orig_mid_min;
3135         int new_dqs, start_dqs;
3136         int dq_margin, dqs_margin, dm_margin;
3137         int bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3138         int end_curr = seq->iocfg->io_out1_delay_max + 1;
3139         int bgn_best = seq->iocfg->io_out1_delay_max + 1;
3140         int end_best = seq->iocfg->io_out1_delay_max + 1;
3141         int win_best = 0;
3142
3143         int ret;
3144
3145         debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
3146
3147         dm_margin = 0;
3148
3149         start_dqs = readl((SDR_PHYGRP_SCCGRP_ADDRESS |
3150                           SCC_MGR_IO_OUT1_DELAY_OFFSET) +
3151                           (seq->rwcfg->mem_dq_per_write_dqs << 2));
3152
3153         /* Per-bit deskew. */
3154
3155         /*
3156          * Set the left and right edge of each bit to an illegal value.
3157          * Use (seq->iocfg->io_out1_delay_max + 1) as an illegal value.
3158          */
3159         sticky_bit_chk = 0;
3160         for (i = 0; i < seq->rwcfg->mem_dq_per_write_dqs; i++) {
3161                 left_edge[i]  = seq->iocfg->io_out1_delay_max + 1;
3162                 right_edge[i] = seq->iocfg->io_out1_delay_max + 1;
3163         }
3164
3165         /* Search for the left edge of the window for each bit. */
3166         search_left_edge(seq, 1, rank_bgn, write_group, 0, test_bgn,
3167                          &sticky_bit_chk,
3168                          left_edge, right_edge, 0);
3169
3170         /* Search for the right edge of the window for each bit. */
3171         ret = search_right_edge(seq, 1, rank_bgn, write_group, 0,
3172                                 start_dqs, 0,
3173                                 &sticky_bit_chk,
3174                                 left_edge, right_edge, 0);
3175         if (ret) {
3176                 set_failing_group_stage(seq, test_bgn + ret - 1,
3177                                         CAL_STAGE_WRITES,
3178                                         CAL_SUBSTAGE_WRITES_CENTER);
3179                 return -EINVAL;
3180         }
3181
3182         min_index = get_window_mid_index(seq, 1, left_edge, right_edge,
3183                                          &mid_min);
3184
3185         /* Determine the amount we can change DQS (which is -mid_min). */
3186         orig_mid_min = mid_min;
3187         new_dqs = start_dqs;
3188         mid_min = 0;
3189         debug_cond(DLEVEL >= 1,
3190                    "%s:%d write_center: start_dqs=%d new_dqs=%d mid_min=%d\n",
3191                    __func__, __LINE__, start_dqs, new_dqs, mid_min);
3192
3193         /* Add delay to bring centre of all DQ windows to the same "level". */
3194         center_dq_windows(seq, 1, left_edge, right_edge, mid_min, orig_mid_min,
3195                           min_index, 0, &dq_margin, &dqs_margin);
3196
3197         /* Move DQS */
3198         scc_mgr_apply_group_dqs_io_and_oct_out1(seq, write_group, new_dqs);
3199         writel(0, &sdr_scc_mgr->update);
3200
3201         /* Centre DM */
3202         debug_cond(DLEVEL >= 2, "%s:%d write_center: DM\n", __func__, __LINE__);
3203
3204         /*
3205          * Set the left and right edge of each bit to an illegal value.
3206          * Use (seq->iocfg->io_out1_delay_max + 1) as an illegal value.
3207          */
3208         left_edge[0]  = seq->iocfg->io_out1_delay_max + 1;
3209         right_edge[0] = seq->iocfg->io_out1_delay_max + 1;
3210
3211         /* Search for the/part of the window with DM shift. */
3212         search_window(seq, 1, rank_bgn, write_group, &bgn_curr, &end_curr,
3213                       &bgn_best, &end_best, &win_best, 0);
3214
3215         /* Reset DM delay chains to 0. */
3216         scc_mgr_apply_group_dm_out1_delay(seq, 0);
3217
3218         /*
3219          * Check to see if the current window nudges up aganist 0 delay.
3220          * If so we need to continue the search by shifting DQS otherwise DQS
3221          * search begins as a new search.
3222          */
3223         if (end_curr != 0) {
3224                 bgn_curr = seq->iocfg->io_out1_delay_max + 1;
3225                 end_curr = seq->iocfg->io_out1_delay_max + 1;
3226         }
3227
3228         /* Search for the/part of the window with DQS shifts. */
3229         search_window(seq, 0, rank_bgn, write_group, &bgn_curr, &end_curr,
3230                       &bgn_best, &end_best, &win_best, new_dqs);
3231
3232         /* Assign left and right edge for cal and reporting. */
3233         left_edge[0] = -1 * bgn_best;
3234         right_edge[0] = end_best;
3235
3236         debug_cond(DLEVEL >= 2, "%s:%d dm_calib: left=%d right=%d\n",
3237                    __func__, __LINE__, left_edge[0], right_edge[0]);
3238
3239         /* Move DQS (back to orig). */
3240         scc_mgr_apply_group_dqs_io_and_oct_out1(seq, write_group, new_dqs);
3241
3242         /* Move DM */
3243
3244         /* Find middle of window for the DM bit. */
3245         mid = (left_edge[0] - right_edge[0]) / 2;
3246
3247         /* Only move right, since we are not moving DQS/DQ. */
3248         if (mid < 0)
3249                 mid = 0;
3250
3251         /* dm_marign should fail if we never find a window. */
3252         if (win_best == 0)
3253                 dm_margin = -1;
3254         else
3255                 dm_margin = left_edge[0] - mid;
3256
3257         scc_mgr_apply_group_dm_out1_delay(seq, mid);
3258         writel(0, &sdr_scc_mgr->update);
3259
3260         debug_cond(DLEVEL >= 2,
3261                    "%s:%d dm_calib: left=%d right=%d mid=%d dm_margin=%d\n",
3262                    __func__, __LINE__, left_edge[0], right_edge[0],
3263                    mid, dm_margin);
3264         /* Export values. */
3265         seq->gbl.fom_out += dq_margin + dqs_margin;
3266
3267         debug_cond(DLEVEL >= 2,
3268                    "%s:%d write_center: dq_margin=%d dqs_margin=%d dm_margin=%d\n",
3269                    __func__, __LINE__, dq_margin, dqs_margin, dm_margin);
3270
3271         /*
3272          * Do not remove this line as it makes sure all of our
3273          * decisions have been applied.
3274          */
3275         writel(0, &sdr_scc_mgr->update);
3276
3277         if ((dq_margin < 0) || (dqs_margin < 0) || (dm_margin < 0))
3278                 return -EINVAL;
3279
3280         return 0;
3281 }
3282
3283 /**
3284  * rw_mgr_mem_calibrate_writes() - Write Calibration Part One
3285  * @rank_bgn:           Rank number
3286  * @group:              Read/Write Group
3287  * @test_bgn:           Rank at which the test begins
3288  *
3289  * Stage 2: Write Calibration Part One.
3290  *
3291  * This function implements UniPHY calibration Stage 2, as explained in
3292  * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
3293  */
3294 static int rw_mgr_mem_calibrate_writes(struct socfpga_sdrseq *seq,
3295                                        const u32 rank_bgn, const u32 group,
3296                                        const u32 test_bgn)
3297 {
3298         int ret;
3299
3300         /* Update info for sims */
3301         debug("%s:%d %u %u\n", __func__, __LINE__, group, test_bgn);
3302
3303         reg_file_set_group(group);
3304         reg_file_set_stage(CAL_STAGE_WRITES);
3305         reg_file_set_sub_stage(CAL_SUBSTAGE_WRITES_CENTER);
3306
3307         ret = rw_mgr_mem_calibrate_writes_center(seq, rank_bgn, group,
3308                                                  test_bgn);
3309         if (ret)
3310                 set_failing_group_stage(seq, group, CAL_STAGE_WRITES,
3311                                         CAL_SUBSTAGE_WRITES_CENTER);
3312
3313         return ret;
3314 }
3315
3316 /**
3317  * mem_precharge_and_activate() - Precharge all banks and activate
3318  *
3319  * Precharge all banks and activate row 0 in bank "000..." and bank "111...".
3320  */
3321 static void mem_precharge_and_activate(struct socfpga_sdrseq *seq)
3322 {
3323         int r;
3324
3325         for (r = 0; r < seq->rwcfg->mem_number_of_ranks; r++) {
3326                 /* Set rank. */
3327                 set_rank_and_odt_mask(seq, r, RW_MGR_ODT_MODE_OFF);
3328
3329                 /* Precharge all banks. */
3330                 writel(seq->rwcfg->precharge_all, SDR_PHYGRP_RWMGRGRP_ADDRESS |
3331                                              RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3332
3333                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr0);
3334                 writel(seq->rwcfg->activate_0_and_1_wait1,
3335                        &sdr_rw_load_jump_mgr_regs->load_jump_add0);
3336
3337                 writel(0x0F, &sdr_rw_load_mgr_regs->load_cntr1);
3338                 writel(seq->rwcfg->activate_0_and_1_wait2,
3339                        &sdr_rw_load_jump_mgr_regs->load_jump_add1);
3340
3341                 /* Activate rows. */
3342                 writel(seq->rwcfg->activate_0_and_1,
3343                        SDR_PHYGRP_RWMGRGRP_ADDRESS |
3344                        RW_MGR_RUN_SINGLE_GROUP_OFFSET);
3345         }
3346 }
3347
3348 /**
3349  * mem_init_latency() - Configure memory RLAT and WLAT settings
3350  *
3351  * Configure memory RLAT and WLAT parameters.
3352  */
3353 static void mem_init_latency(struct socfpga_sdrseq *seq)
3354 {
3355         /*
3356          * For AV/CV, LFIFO is hardened and always runs at full rate
3357          * so max latency in AFI clocks, used here, is correspondingly
3358          * smaller.
3359          */
3360         const u32 max_latency = (1 << seq->misccfg->max_latency_count_width)
3361                 - 1;
3362         u32 rlat, wlat;
3363
3364         debug("%s:%d\n", __func__, __LINE__);
3365
3366         /*
3367          * Read in write latency.
3368          * WL for Hard PHY does not include additive latency.
3369          */
3370         wlat = readl(&data_mgr->t_wl_add);
3371         wlat += readl(&data_mgr->mem_t_add);
3372
3373         seq->gbl.rw_wl_nop_cycles = wlat - 1;
3374
3375         /* Read in readl latency. */
3376         rlat = readl(&data_mgr->t_rl_add);
3377
3378         /* Set a pretty high read latency initially. */
3379         seq->gbl.curr_read_lat = rlat + 16;
3380         if (seq->gbl.curr_read_lat > max_latency)
3381                 seq->gbl.curr_read_lat = max_latency;
3382
3383         writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3384
3385         /* Advertise write latency. */
3386         writel(wlat, &phy_mgr_cfg->afi_wlat);
3387 }
3388
3389 /**
3390  * @mem_skip_calibrate() - Set VFIFO and LFIFO to instant-on settings
3391  *
3392  * Set VFIFO and LFIFO to instant-on settings in skip calibration mode.
3393  */
3394 static void mem_skip_calibrate(struct socfpga_sdrseq *seq)
3395 {
3396         u32 vfifo_offset;
3397         u32 i, j, r;
3398
3399         debug("%s:%d\n", __func__, __LINE__);
3400         /* Need to update every shadow register set used by the interface */
3401         for (r = 0; r < seq->rwcfg->mem_number_of_ranks;
3402              r += NUM_RANKS_PER_SHADOW_REG) {
3403                 /*
3404                  * Set output phase alignment settings appropriate for
3405                  * skip calibration.
3406                  */
3407                 for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3408                         scc_mgr_set_dqs_en_phase(i, 0);
3409                         if (seq->iocfg->dll_chain_length == 6)
3410                                 scc_mgr_set_dqdqs_output_phase(i, 6);
3411                         else
3412                                 scc_mgr_set_dqdqs_output_phase(i, 7);
3413                         /*
3414                          * Case:33398
3415                          *
3416                          * Write data arrives to the I/O two cycles before write
3417                          * latency is reached (720 deg).
3418                          *   -> due to bit-slip in a/c bus
3419                          *   -> to allow board skew where dqs is longer than ck
3420                          *      -> how often can this happen!?
3421                          *      -> can claim back some ptaps for high freq
3422                          *       support if we can relax this, but i digress...
3423                          *
3424                          * The write_clk leads mem_ck by 90 deg
3425                          * The minimum ptap of the OPA is 180 deg
3426                          * Each ptap has (360 / IO_DLL_CHAIN_LENGH) deg of delay
3427                          * The write_clk is always delayed by 2 ptaps
3428                          *
3429                          * Hence, to make DQS aligned to CK, we need to delay
3430                          * DQS by:
3431                          *    (720 - 90 - 180 - 2) *
3432                          *      (360 / seq->iocfg->dll_chain_length)
3433                          *
3434                          * Dividing the above by
3435                          (360 / seq->iocfg->dll_chain_length)
3436                          * gives us the number of ptaps, which simplies to:
3437                          *
3438                          *    (1.25 * seq->iocfg->dll_chain_length - 2)
3439                          */
3440                         scc_mgr_set_dqdqs_output_phase(i,
3441                                        ((125 * seq->iocfg->dll_chain_length)
3442                                        / 100) - 2);
3443                 }
3444                 writel(0xff, &sdr_scc_mgr->dqs_ena);
3445                 writel(0xff, &sdr_scc_mgr->dqs_io_ena);
3446
3447                 for (i = 0; i < seq->rwcfg->mem_if_write_dqs_width; i++) {
3448                         writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3449                                   SCC_MGR_GROUP_COUNTER_OFFSET);
3450                 }
3451                 writel(0xff, &sdr_scc_mgr->dq_ena);
3452                 writel(0xff, &sdr_scc_mgr->dm_ena);
3453                 writel(0, &sdr_scc_mgr->update);
3454         }
3455
3456         /* Compensate for simulation model behaviour */
3457         for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3458                 scc_mgr_set_dqs_bus_in_delay(i, 10);
3459                 scc_mgr_load_dqs(i);
3460         }
3461         writel(0, &sdr_scc_mgr->update);
3462
3463         /*
3464          * ArriaV has hard FIFOs that can only be initialized by incrementing
3465          * in sequencer.
3466          */
3467         vfifo_offset = seq->misccfg->calib_vfifo_offset;
3468         for (j = 0; j < vfifo_offset; j++)
3469                 writel(0xff, &phy_mgr_cmd->inc_vfifo_hard_phy);
3470         writel(0, &phy_mgr_cmd->fifo_reset);
3471
3472         /*
3473          * For Arria V and Cyclone V with hard LFIFO, we get the skip-cal
3474          * setting from generation-time constant.
3475          */
3476         seq->gbl.curr_read_lat = seq->misccfg->calib_lfifo_offset;
3477         writel(seq->gbl.curr_read_lat, &phy_mgr_cfg->phy_rlat);
3478 }
3479
3480 /**
3481  * mem_calibrate() - Memory calibration entry point.
3482  *
3483  * Perform memory calibration.
3484  */
3485 static u32 mem_calibrate(struct socfpga_sdrseq *seq)
3486 {
3487         u32 i;
3488         u32 rank_bgn, sr;
3489         u32 write_group, write_test_bgn;
3490         u32 read_group, read_test_bgn;
3491         u32 run_groups, current_run;
3492         u32 failing_groups = 0;
3493         u32 group_failed = 0;
3494
3495         const u32 rwdqs_ratio = seq->rwcfg->mem_if_read_dqs_width /
3496                                 seq->rwcfg->mem_if_write_dqs_width;
3497
3498         debug("%s:%d\n", __func__, __LINE__);
3499
3500         /* Initialize the data settings */
3501         seq->gbl.error_substage = CAL_SUBSTAGE_NIL;
3502         seq->gbl.error_stage = CAL_STAGE_NIL;
3503         seq->gbl.error_group = 0xff;
3504         seq->gbl.fom_in = 0;
3505         seq->gbl.fom_out = 0;
3506
3507         /* Initialize WLAT and RLAT. */
3508         mem_init_latency(seq);
3509
3510         /* Initialize bit slips. */
3511         mem_precharge_and_activate(seq);
3512
3513         for (i = 0; i < seq->rwcfg->mem_if_read_dqs_width; i++) {
3514                 writel(i, SDR_PHYGRP_SCCGRP_ADDRESS |
3515                           SCC_MGR_GROUP_COUNTER_OFFSET);
3516                 /* Only needed once to set all groups, pins, DQ, DQS, DM. */
3517                 if (i == 0)
3518                         scc_mgr_set_hhp_extras();
3519
3520                 scc_set_bypass_mode(i);
3521         }
3522
3523         /* Calibration is skipped. */
3524         if ((seq->dyn_calib_steps & CALIB_SKIP_ALL) == CALIB_SKIP_ALL) {
3525                 /*
3526                  * Set VFIFO and LFIFO to instant-on settings in skip
3527                  * calibration mode.
3528                  */
3529                 mem_skip_calibrate(seq);
3530
3531                 /*
3532                  * Do not remove this line as it makes sure all of our
3533                  * decisions have been applied.
3534                  */
3535                 writel(0, &sdr_scc_mgr->update);
3536                 return 1;
3537         }
3538
3539         /* Calibration is not skipped. */
3540         for (i = 0; i < NUM_CALIB_REPEAT; i++) {
3541                 /*
3542                  * Zero all delay chain/phase settings for all
3543                  * groups and all shadow register sets.
3544                  */
3545                 scc_mgr_zero_all(seq);
3546
3547                 run_groups = ~0;
3548
3549                 for (write_group = 0, write_test_bgn = 0; write_group
3550                         < seq->rwcfg->mem_if_write_dqs_width; write_group++,
3551                         write_test_bgn += seq->rwcfg->mem_dq_per_write_dqs) {
3552                         /* Initialize the group failure */
3553                         group_failed = 0;
3554
3555                         current_run = run_groups & ((1 <<
3556                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP) - 1);
3557                         run_groups = run_groups >>
3558                                 RW_MGR_NUM_DQS_PER_WRITE_GROUP;
3559
3560                         if (current_run == 0)
3561                                 continue;
3562
3563                         writel(write_group, SDR_PHYGRP_SCCGRP_ADDRESS |
3564                                             SCC_MGR_GROUP_COUNTER_OFFSET);
3565                         scc_mgr_zero_group(seq, write_group, 0);
3566
3567                         for (read_group = write_group * rwdqs_ratio,
3568                              read_test_bgn = 0;
3569                              read_group < (write_group + 1) * rwdqs_ratio;
3570                              read_group++,
3571                              read_test_bgn += seq->rwcfg->mem_dq_per_read_dqs) {
3572                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_VFIFO)
3573                                         continue;
3574
3575                                 /* Calibrate the VFIFO */
3576                                 if (rw_mgr_mem_calibrate_vfifo(seq, read_group,
3577                                                                read_test_bgn))
3578                                         continue;
3579
3580                                 if (!(seq->gbl.phy_debug_mode_flags &
3581                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3582                                         return 0;
3583
3584                                 /* The group failed, we're done. */
3585                                 goto grp_failed;
3586                         }
3587
3588                         /* Calibrate the output side */
3589                         for (rank_bgn = 0, sr = 0;
3590                              rank_bgn < seq->rwcfg->mem_number_of_ranks;
3591                              rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
3592                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3593                                         continue;
3594
3595                                 /* Not needed in quick mode! */
3596                                 if (STATIC_CALIB_STEPS &
3597                                     CALIB_SKIP_DELAY_SWEEPS)
3598                                         continue;
3599
3600                                 /* Calibrate WRITEs */
3601                                 if (!rw_mgr_mem_calibrate_writes(seq, rank_bgn,
3602                                                                  write_group,
3603                                                                  write_test_bgn))
3604                                         continue;
3605
3606                                 group_failed = 1;
3607                                 if (!(seq->gbl.phy_debug_mode_flags &
3608                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3609                                         return 0;
3610                         }
3611
3612                         /* Some group failed, we're done. */
3613                         if (group_failed)
3614                                 goto grp_failed;
3615
3616                         for (read_group = write_group * rwdqs_ratio,
3617                              read_test_bgn = 0;
3618                              read_group < (write_group + 1) * rwdqs_ratio;
3619                              read_group++,
3620                              read_test_bgn += seq->rwcfg->mem_dq_per_read_dqs) {
3621                                 if (STATIC_CALIB_STEPS & CALIB_SKIP_WRITES)
3622                                         continue;
3623
3624                                 if (!rw_mgr_mem_calibrate_vfifo_end(seq,
3625                                                                     read_group,
3626                                                                     read_test_bgn))
3627                                         continue;
3628
3629                                 if (!(seq->gbl.phy_debug_mode_flags &
3630                                       PHY_DEBUG_SWEEP_ALL_GROUPS))
3631                                         return 0;
3632
3633                                 /* The group failed, we're done. */
3634                                 goto grp_failed;
3635                         }
3636
3637                         /* No group failed, continue as usual. */
3638                         continue;
3639
3640 grp_failed:             /* A group failed, increment the counter. */
3641                         failing_groups++;
3642                 }
3643
3644                 /*
3645                  * USER If there are any failing groups then report
3646                  * the failure.
3647                  */
3648                 if (failing_groups != 0)
3649                         return 0;
3650
3651                 if (STATIC_CALIB_STEPS & CALIB_SKIP_LFIFO)
3652                         continue;
3653
3654                 /* Calibrate the LFIFO */
3655                 if (!rw_mgr_mem_calibrate_lfifo(seq))
3656                         return 0;
3657         }
3658
3659         /*
3660          * Do not remove this line as it makes sure all of our decisions
3661          * have been applied.
3662          */
3663         writel(0, &sdr_scc_mgr->update);
3664         return 1;
3665 }
3666
3667 /**
3668  * run_mem_calibrate() - Perform memory calibration
3669  *
3670  * This function triggers the entire memory calibration procedure.
3671  */
3672 static int run_mem_calibrate(struct socfpga_sdrseq *seq)
3673 {
3674         int pass;
3675         u32 ctrl_cfg;
3676
3677         debug("%s:%d\n", __func__, __LINE__);
3678
3679         /* Reset pass/fail status shown on afi_cal_success/fail */
3680         writel(PHY_MGR_CAL_RESET, &phy_mgr_cfg->cal_status);
3681
3682         /* Stop tracking manager. */
3683         ctrl_cfg = readl(&sdr_ctrl->ctrl_cfg);
3684         writel(ctrl_cfg & ~SDR_CTRLGRP_CTRLCFG_DQSTRKEN_MASK,
3685                &sdr_ctrl->ctrl_cfg);
3686
3687         phy_mgr_initialize(seq);
3688         rw_mgr_mem_initialize(seq);
3689
3690         /* Perform the actual memory calibration. */
3691         pass = mem_calibrate(seq);
3692
3693         mem_precharge_and_activate(seq);
3694         writel(0, &phy_mgr_cmd->fifo_reset);
3695
3696         /* Handoff. */
3697         rw_mgr_mem_handoff(seq);
3698         /*
3699          * In Hard PHY this is a 2-bit control:
3700          * 0: AFI Mux Select
3701          * 1: DDIO Mux Select
3702          */
3703         writel(0x2, &phy_mgr_cfg->mux_sel);
3704
3705         /* Start tracking manager. */
3706         writel(ctrl_cfg, &sdr_ctrl->ctrl_cfg);
3707
3708         return pass;
3709 }
3710
3711 /**
3712  * debug_mem_calibrate() - Report result of memory calibration
3713  * @pass:       Value indicating whether calibration passed or failed
3714  *
3715  * This function reports the results of the memory calibration
3716  * and writes debug information into the register file.
3717  */
3718 static void debug_mem_calibrate(struct socfpga_sdrseq *seq, int pass)
3719 {
3720         u32 debug_info;
3721
3722         if (pass) {
3723                 debug("%s: CALIBRATION PASSED\n", __FILE__);
3724
3725                 seq->gbl.fom_in /= 2;
3726                 seq->gbl.fom_out /= 2;
3727
3728                 if (seq->gbl.fom_in > 0xff)
3729                         seq->gbl.fom_in = 0xff;
3730
3731                 if (seq->gbl.fom_out > 0xff)
3732                         seq->gbl.fom_out = 0xff;
3733
3734                 /* Update the FOM in the register file */
3735                 debug_info = seq->gbl.fom_in;
3736                 debug_info |= seq->gbl.fom_out << 8;
3737                 writel(debug_info, &sdr_reg_file->fom);
3738
3739                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3740                 writel(PHY_MGR_CAL_SUCCESS, &phy_mgr_cfg->cal_status);
3741         } else {
3742                 debug("%s: CALIBRATION FAILED\n", __FILE__);
3743
3744                 debug_info = seq->gbl.error_stage;
3745                 debug_info |= seq->gbl.error_substage << 8;
3746                 debug_info |= seq->gbl.error_group << 16;
3747
3748                 writel(debug_info, &sdr_reg_file->failing_stage);
3749                 writel(debug_info, &phy_mgr_cfg->cal_debug_info);
3750                 writel(PHY_MGR_CAL_FAIL, &phy_mgr_cfg->cal_status);
3751
3752                 /* Update the failing group/stage in the register file */
3753                 debug_info = seq->gbl.error_stage;
3754                 debug_info |= seq->gbl.error_substage << 8;
3755                 debug_info |= seq->gbl.error_group << 16;
3756                 writel(debug_info, &sdr_reg_file->failing_stage);
3757         }
3758
3759         debug("%s: Calibration complete\n", __FILE__);
3760 }
3761
3762 /**
3763  * hc_initialize_rom_data() - Initialize ROM data
3764  *
3765  * Initialize ROM data.
3766  */
3767 static void hc_initialize_rom_data(void)
3768 {
3769         unsigned int nelem = 0;
3770         const u32 *rom_init;
3771         u32 i, addr;
3772
3773         socfpga_get_seq_inst_init(&rom_init, &nelem);
3774         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_INST_ROM_WRITE_OFFSET;
3775         for (i = 0; i < nelem; i++)
3776                 writel(rom_init[i], addr + (i << 2));
3777
3778         socfpga_get_seq_ac_init(&rom_init, &nelem);
3779         addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_AC_ROM_WRITE_OFFSET;
3780         for (i = 0; i < nelem; i++)
3781                 writel(rom_init[i], addr + (i << 2));
3782 }
3783
3784 /**
3785  * initialize_reg_file() - Initialize SDR register file
3786  *
3787  * Initialize SDR register file.
3788  */
3789 static void initialize_reg_file(struct socfpga_sdrseq *seq)
3790 {
3791         /* Initialize the register file with the correct data */
3792         writel(seq->misccfg->reg_file_init_seq_signature,
3793                &sdr_reg_file->signature);
3794         writel(0, &sdr_reg_file->debug_data_addr);
3795         writel(0, &sdr_reg_file->cur_stage);
3796         writel(0, &sdr_reg_file->fom);
3797         writel(0, &sdr_reg_file->failing_stage);
3798         writel(0, &sdr_reg_file->debug1);
3799         writel(0, &sdr_reg_file->debug2);
3800 }
3801
3802 /**
3803  * initialize_hps_phy() - Initialize HPS PHY
3804  *
3805  * Initialize HPS PHY.
3806  */
3807 static void initialize_hps_phy(void)
3808 {
3809         u32 reg;
3810         /*
3811          * Tracking also gets configured here because it's in the
3812          * same register.
3813          */
3814         u32 trk_sample_count = 7500;
3815         u32 trk_long_idle_sample_count = (10 << 16) | 100;
3816         /*
3817          * Format is number of outer loops in the 16 MSB, sample
3818          * count in 16 LSB.
3819          */
3820
3821         reg = 0;
3822         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ACDELAYEN_SET(2);
3823         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQDELAYEN_SET(1);
3824         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSDELAYEN_SET(1);
3825         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_DQSLOGICDELAYEN_SET(1);
3826         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_RESETDELAYEN_SET(0);
3827         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_LPDDRDIS_SET(1);
3828         /*
3829          * This field selects the intrinsic latency to RDATA_EN/FULL path.
3830          * 00-bypass, 01- add 5 cycles, 10- add 10 cycles, 11- add 15 cycles.
3831          */
3832         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_ADDLATSEL_SET(0);
3833         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_SET(
3834                 trk_sample_count);
3835         writel(reg, &sdr_ctrl->phy_ctrl0);
3836
3837         reg = 0;
3838         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_SAMPLECOUNT_31_20_SET(
3839                 trk_sample_count >>
3840                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_0_SAMPLECOUNT_19_0_WIDTH);
3841         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_SET(
3842                 trk_long_idle_sample_count);
3843         writel(reg, &sdr_ctrl->phy_ctrl1);
3844
3845         reg = 0;
3846         reg |= SDR_CTRLGRP_PHYCTRL_PHYCTRL_2_LONGIDLESAMPLECOUNT_31_20_SET(
3847                 trk_long_idle_sample_count >>
3848                 SDR_CTRLGRP_PHYCTRL_PHYCTRL_1_LONGIDLESAMPLECOUNT_19_0_WIDTH);
3849         writel(reg, &sdr_ctrl->phy_ctrl2);
3850 }
3851
3852 /**
3853  * initialize_tracking() - Initialize tracking
3854  *
3855  * Initialize the register file with usable initial data.
3856  */
3857 static void initialize_tracking(struct socfpga_sdrseq *seq)
3858 {
3859         /*
3860          * Initialize the register file with the correct data.
3861          * Compute usable version of value in case we skip full
3862          * computation later.
3863          */
3864         writel(DIV_ROUND_UP(seq->iocfg->delay_per_opa_tap,
3865                             seq->iocfg->delay_per_dchain_tap) - 1,
3866                &sdr_reg_file->dtaps_per_ptap);
3867
3868         /* trk_sample_count */
3869         writel(7500, &sdr_reg_file->trk_sample_count);
3870
3871         /* longidle outer loop [15:0] */
3872         writel((10 << 16) | (100 << 0), &sdr_reg_file->trk_longidle);
3873
3874         /*
3875          * longidle sample count [31:24]
3876          * trfc, worst case of 933Mhz 4Gb [23:16]
3877          * trcd, worst case [15:8]
3878          * vfifo wait [7:0]
3879          */
3880         writel((243 << 24) | (14 << 16) | (10 << 8) | (4 << 0),
3881                &sdr_reg_file->delays);
3882
3883         /* mux delay */
3884         if (dram_is_ddr(2)) {
3885                 writel(0, &sdr_reg_file->trk_rw_mgr_addr);
3886         } else if (dram_is_ddr(3)) {
3887                 writel((seq->rwcfg->idle << 24) |
3888                        (seq->rwcfg->activate_1 << 16) |
3889                        (seq->rwcfg->sgle_read << 8) |
3890                        (seq->rwcfg->precharge_all << 0),
3891                        &sdr_reg_file->trk_rw_mgr_addr);
3892         }
3893
3894         writel(seq->rwcfg->mem_if_read_dqs_width,
3895                &sdr_reg_file->trk_read_dqs_width);
3896
3897         /* trefi [7:0] */
3898         if (dram_is_ddr(2)) {
3899                 writel(1000 << 0, &sdr_reg_file->trk_rfsh);
3900         } else if (dram_is_ddr(3)) {
3901                 writel((seq->rwcfg->refresh_all << 24) | (1000 << 0),
3902                        &sdr_reg_file->trk_rfsh);
3903         }
3904 }
3905
3906 int sdram_calibration_full(struct socfpga_sdr *sdr)
3907 {
3908         u32 pass;
3909         struct socfpga_sdrseq seq;
3910
3911         /*
3912          * For size reasons, this file uses hard coded addresses.
3913          * Check if we are called with the correct address.
3914          */
3915         if (sdr != (struct socfpga_sdr *)SOCFPGA_SDR_ADDRESS)
3916                 return -ENODEV;
3917
3918         memset(&seq, 0, sizeof(seq));
3919
3920         seq.rwcfg = socfpga_get_sdram_rwmgr_config();
3921         seq.iocfg = socfpga_get_sdram_io_config();
3922         seq.misccfg = socfpga_get_sdram_misc_config();
3923
3924         /* Set the calibration enabled by default */
3925         seq.gbl.phy_debug_mode_flags |= PHY_DEBUG_ENABLE_CAL_RPT;
3926         /*
3927          * Only sweep all groups (regardless of fail state) by default
3928          * Set enabled read test by default.
3929          */
3930 #if DISABLE_GUARANTEED_READ
3931         seq.gbl.phy_debug_mode_flags |= PHY_DEBUG_DISABLE_GUARANTEED_READ;
3932 #endif
3933         /* Initialize the register file */
3934         initialize_reg_file(&seq);
3935
3936         /* Initialize any PHY CSR */
3937         initialize_hps_phy();
3938
3939         scc_mgr_initialize();
3940
3941         initialize_tracking(&seq);
3942
3943         debug("%s: Preparing to start memory calibration\n", __FILE__);
3944
3945         debug("%s:%d\n", __func__, __LINE__);
3946         debug_cond(DLEVEL >= 1,
3947                    "DDR3 FULL_RATE ranks=%u cs/dimm=%u dq/dqs=%u,%u vg/dqs=%u,%u ",
3948                    seq.rwcfg->mem_number_of_ranks,
3949                    seq.rwcfg->mem_number_of_cs_per_dimm,
3950                    seq.rwcfg->mem_dq_per_read_dqs,
3951                    seq.rwcfg->mem_dq_per_write_dqs,
3952                    seq.rwcfg->mem_virtual_groups_per_read_dqs,
3953                    seq.rwcfg->mem_virtual_groups_per_write_dqs);
3954         debug_cond(DLEVEL >= 1,
3955                    "dqs=%u,%u dq=%u dm=%u ptap_delay=%u dtap_delay=%u ",
3956                    seq.rwcfg->mem_if_read_dqs_width,
3957                    seq.rwcfg->mem_if_write_dqs_width,
3958                    seq.rwcfg->mem_data_width, seq.rwcfg->mem_data_mask_width,
3959                    seq.iocfg->delay_per_opa_tap,
3960                    seq.iocfg->delay_per_dchain_tap);
3961         debug_cond(DLEVEL >= 1, "dtap_dqsen_delay=%u, dll=%u",
3962                    seq.iocfg->delay_per_dqs_en_dchain_tap,
3963                    seq.iocfg->dll_chain_length);
3964         debug_cond(DLEVEL >= 1,
3965                    "max values: en_p=%u dqdqs_p=%u en_d=%u dqs_in_d=%u ",
3966                    seq.iocfg->dqs_en_phase_max, seq.iocfg->dqdqs_out_phase_max,
3967                    seq.iocfg->dqs_en_delay_max, seq.iocfg->dqs_in_delay_max);
3968         debug_cond(DLEVEL >= 1, "io_in_d=%u io_out1_d=%u io_out2_d=%u ",
3969                    seq.iocfg->io_in_delay_max, seq.iocfg->io_out1_delay_max,
3970                    seq.iocfg->io_out2_delay_max);
3971         debug_cond(DLEVEL >= 1, "dqs_in_reserve=%u dqs_out_reserve=%u\n",
3972                    seq.iocfg->dqs_in_reserve, seq.iocfg->dqs_out_reserve);
3973
3974         hc_initialize_rom_data();
3975
3976         /* update info for sims */
3977         reg_file_set_stage(CAL_STAGE_NIL);
3978         reg_file_set_group(0);
3979
3980         /*
3981          * Load global needed for those actions that require
3982          * some dynamic calibration support.
3983          */
3984         seq.dyn_calib_steps = STATIC_CALIB_STEPS;
3985         /*
3986          * Load global to allow dynamic selection of delay loop settings
3987          * based on calibration mode.
3988          */
3989         if (!(seq.dyn_calib_steps & CALIB_SKIP_DELAY_LOOPS))
3990                 seq.skip_delay_mask = 0xff;
3991         else
3992                 seq.skip_delay_mask = 0x0;
3993
3994         pass = run_mem_calibrate(&seq);
3995         debug_mem_calibrate(&seq, pass);
3996         return pass;
3997 }