2 * Copyright (C) 2013, Intel Corporation
3 * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com>
5 * Ported from Intel released Quark UEFI BIOS
6 * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei
8 * SPDX-License-Identifier: Intel
12 #include <asm/arch/device.h>
13 #include <asm/arch/mrc.h>
14 #include <asm/arch/msg_port.h>
19 static const uint8_t vref_codes[64] = {
20 /* lowest to highest */
21 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38,
22 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
23 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
24 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
25 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
26 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
27 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
28 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
31 void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
33 msg_port_write(unit, addr,
34 (msg_port_read(unit, addr) & ~(mask)) |
38 void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask)
40 msg_port_alt_write(unit, addr,
41 (msg_port_alt_read(unit, addr) & ~(mask)) |
45 void mrc_post_code(uint8_t major, uint8_t minor)
47 /* send message to UART */
48 DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor);
55 /* Delay number of nanoseconds */
56 void delay_n(uint32_t ns)
58 /* 1000 MHz clock has 1ns period --> no conversion required */
59 uint64_t final_tsc = rdtsc();
61 final_tsc += ((get_tbclk_mhz() * ns) / 1000);
63 while (rdtsc() < final_tsc)
67 /* Delay number of microseconds */
68 void delay_u(uint32_t ms)
70 /* 64-bit math is not an option, just use loops */
75 /* Select Memory Manager as the source for PRI interface */
76 void select_mem_mgr(void)
82 dco = msg_port_read(MEM_CTLR, DCO);
84 msg_port_write(MEM_CTLR, DCO, dco);
89 /* Select HTE as the source for PRI interface */
96 dco = msg_port_read(MEM_CTLR, DCO);
98 msg_port_write(MEM_CTLR, DCO, dco);
105 * data should be formated using DCMD_Xxxx macro or emrsXCommand structure
107 void dram_init_command(uint32_t data)
109 pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data);
110 pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0);
111 msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0);
113 DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data);
116 /* Send DRAM wake command using special MCU side-band WAKE opcode */
117 void dram_wake_command(void)
121 msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0);
126 void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane)
128 /* send message to UART */
129 DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);
133 * This function will program the RCVEN delays
135 * (currently doesn't comprehend rank)
137 void set_rcvn(uint8_t channel, uint8_t rank,
138 uint8_t byte_lane, uint32_t pi_count)
146 DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n",
147 channel, rank, byte_lane, pi_count);
150 * RDPTR (1/2 MCLK, 64 PIs)
151 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
152 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
154 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
155 channel * DDRIODQ_CH_OFFSET;
156 msk = (byte_lane & 1) ? 0xf00000 : 0xf00;
157 temp = (byte_lane & 1) ? (pi_count / HALF_CLK) << 20 :
158 (pi_count / HALF_CLK) << 8;
159 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
161 /* Adjust PI_COUNT */
162 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
165 * PI (1/64 MCLK, 1 PIs)
166 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
167 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
169 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
170 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
171 channel * DDRIODQ_CH_OFFSET);
173 temp = pi_count << 24;
174 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
178 * BL0/1 -> B01DBCTL1[08/11] (+1 select)
179 * BL0/1 -> B01DBCTL1[02/05] (enable)
181 reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
182 channel * DDRIODQ_CH_OFFSET;
187 msk |= (byte_lane & 1) ? (1 << 5) : (1 << 2);
188 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
192 msk |= (byte_lane & 1) ? (1 << 11) : (1 << 8);
193 if (pi_count < EARLY_DB)
196 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
199 if (pi_count > 0x3f) {
200 training_message(channel, rank, byte_lane);
201 mrc_post_code(0xee, 0xe0);
208 * This function will return the current RCVEN delay on the given
209 * channel, rank, byte_lane as an absolute PI count.
211 * (currently doesn't comprehend rank)
213 uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane)
222 * RDPTR (1/2 MCLK, 64 PIs)
223 * BL0 -> B01PTRCTL0[11:08] (0x0-0xF)
224 * BL1 -> B01PTRCTL0[23:20] (0x0-0xF)
226 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
227 channel * DDRIODQ_CH_OFFSET;
228 temp = msg_port_alt_read(DDRPHY, reg);
229 temp >>= (byte_lane & 1) ? 20 : 8;
232 /* Adjust PI_COUNT */
233 pi_count = temp * HALF_CLK;
236 * PI (1/64 MCLK, 1 PIs)
237 * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)
238 * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)
240 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
241 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
242 channel * DDRIODQ_CH_OFFSET);
243 temp = msg_port_alt_read(DDRPHY, reg);
247 /* Adjust PI_COUNT */
256 * This function will program the RDQS delays based on an absolute
259 * (currently doesn't comprehend rank)
261 void set_rdqs(uint8_t channel, uint8_t rank,
262 uint8_t byte_lane, uint32_t pi_count)
269 DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n",
270 channel, rank, byte_lane, pi_count);
274 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
275 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
277 reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
278 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
279 channel * DDRIODQ_CH_OFFSET);
281 temp = pi_count << 0;
282 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
284 /* error check (shouldn't go above 0x3F) */
285 if (pi_count > 0x47) {
286 training_message(channel, rank, byte_lane);
287 mrc_post_code(0xee, 0xe1);
294 * This function will return the current RDQS delay on the given
295 * channel, rank, byte_lane as an absolute PI count.
297 * (currently doesn't comprehend rank)
299 uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
309 * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)
310 * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)
312 reg = (byte_lane & 1) ? B1RXDQSPICODE : B0RXDQSPICODE;
313 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
314 channel * DDRIODQ_CH_OFFSET);
315 temp = msg_port_alt_read(DDRPHY, reg);
317 /* Adjust PI_COUNT */
318 pi_count = temp & 0x7f;
326 * This function will program the WDQS delays based on an absolute
329 * (currently doesn't comprehend rank)
331 void set_wdqs(uint8_t channel, uint8_t rank,
332 uint8_t byte_lane, uint32_t pi_count)
340 DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n",
341 channel, rank, byte_lane, pi_count);
344 * RDPTR (1/2 MCLK, 64 PIs)
345 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
346 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
348 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
349 channel * DDRIODQ_CH_OFFSET;
350 msk = (byte_lane & 1) ? 0xf0000 : 0xf0;
351 temp = pi_count / HALF_CLK;
352 temp <<= (byte_lane & 1) ? 16 : 4;
353 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
355 /* Adjust PI_COUNT */
356 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
359 * PI (1/64 MCLK, 1 PIs)
360 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
361 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
363 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
364 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
365 channel * DDRIODQ_CH_OFFSET);
367 temp = pi_count << 16;
368 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
372 * BL0/1 -> B01DBCTL1[07/10] (+1 select)
373 * BL0/1 -> B01DBCTL1[01/04] (enable)
375 reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
376 channel * DDRIODQ_CH_OFFSET;
381 msk |= (byte_lane & 1) ? (1 << 4) : (1 << 1);
382 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
386 msk |= (byte_lane & 1) ? (1 << 10) : (1 << 7);
387 if (pi_count < EARLY_DB)
390 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
393 if (pi_count > 0x3f) {
394 training_message(channel, rank, byte_lane);
395 mrc_post_code(0xee, 0xe2);
402 * This function will return the amount of WDQS delay on the given
403 * channel, rank, byte_lane as an absolute PI count.
405 * (currently doesn't comprehend rank)
407 uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane)
416 * RDPTR (1/2 MCLK, 64 PIs)
417 * BL0 -> B01PTRCTL0[07:04] (0x0-0xF)
418 * BL1 -> B01PTRCTL0[19:16] (0x0-0xF)
420 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
421 channel * DDRIODQ_CH_OFFSET;
422 temp = msg_port_alt_read(DDRPHY, reg);
423 temp >>= (byte_lane & 1) ? 16 : 4;
426 /* Adjust PI_COUNT */
427 pi_count = (temp * HALF_CLK);
430 * PI (1/64 MCLK, 1 PIs)
431 * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)
432 * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)
434 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
435 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
436 channel * DDRIODQ_CH_OFFSET);
437 temp = msg_port_alt_read(DDRPHY, reg);
441 /* Adjust PI_COUNT */
450 * This function will program the WDQ delays based on an absolute
453 * (currently doesn't comprehend rank)
455 void set_wdq(uint8_t channel, uint8_t rank,
456 uint8_t byte_lane, uint32_t pi_count)
464 DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n",
465 channel, rank, byte_lane, pi_count);
468 * RDPTR (1/2 MCLK, 64 PIs)
469 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
470 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
472 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
473 channel * DDRIODQ_CH_OFFSET;
474 msk = (byte_lane & 1) ? 0xf000 : 0xf;
475 temp = pi_count / HALF_CLK;
476 temp <<= (byte_lane & 1) ? 12 : 0;
477 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
479 /* Adjust PI_COUNT */
480 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
483 * PI (1/64 MCLK, 1 PIs)
484 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
485 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
487 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
488 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
489 channel * DDRIODQ_CH_OFFSET);
491 temp = pi_count << 8;
492 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
496 * BL0/1 -> B01DBCTL1[06/09] (+1 select)
497 * BL0/1 -> B01DBCTL1[00/03] (enable)
499 reg = B01DBCTL1 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
500 channel * DDRIODQ_CH_OFFSET;
505 msk |= (byte_lane & 1) ? (1 << 3) : (1 << 0);
506 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
510 msk |= (byte_lane & 1) ? (1 << 9) : (1 << 6);
511 if (pi_count < EARLY_DB)
514 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
517 if (pi_count > 0x3f) {
518 training_message(channel, rank, byte_lane);
519 mrc_post_code(0xee, 0xe3);
526 * This function will return the amount of WDQ delay on the given
527 * channel, rank, byte_lane as an absolute PI count.
529 * (currently doesn't comprehend rank)
531 uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane)
540 * RDPTR (1/2 MCLK, 64 PIs)
541 * BL0 -> B01PTRCTL0[03:00] (0x0-0xF)
542 * BL1 -> B01PTRCTL0[15:12] (0x0-0xF)
544 reg = B01PTRCTL0 + (byte_lane >> 1) * DDRIODQ_BL_OFFSET +
545 channel * DDRIODQ_CH_OFFSET;
546 temp = msg_port_alt_read(DDRPHY, reg);
547 temp >>= (byte_lane & 1) ? 12 : 0;
550 /* Adjust PI_COUNT */
551 pi_count = temp * HALF_CLK;
554 * PI (1/64 MCLK, 1 PIs)
555 * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)
556 * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)
558 reg = (byte_lane & 1) ? B1DLLPICODER0 : B0DLLPICODER0;
559 reg += ((byte_lane >> 1) * DDRIODQ_BL_OFFSET +
560 channel * DDRIODQ_CH_OFFSET);
561 temp = msg_port_alt_read(DDRPHY, reg);
565 /* Adjust PI_COUNT */
574 * This function will program the WCMD delays based on an absolute
577 void set_wcmd(uint8_t channel, uint32_t pi_count)
586 * RDPTR (1/2 MCLK, 64 PIs)
587 * CMDPTRREG[11:08] (0x0-0xF)
589 reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
591 temp = pi_count / HALF_CLK;
593 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
595 /* Adjust PI_COUNT */
596 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
599 * PI (1/64 MCLK, 1 PIs)
600 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
601 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
602 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
603 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
604 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
605 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
606 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
607 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
609 reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
611 temp = (pi_count << 24) | (pi_count << 16) |
612 (pi_count << 8) | (pi_count << 0);
614 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
615 reg = CMDDLLPICODER0 + channel * DDRIOCCC_CH_OFFSET; /* PO */
616 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
620 * CMDCFGREG0[17] (+1 select)
621 * CMDCFGREG0[16] (enable)
623 reg = CMDCFGREG0 + channel * DDRIOCCC_CH_OFFSET;
629 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
634 if (pi_count < EARLY_DB)
637 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
641 mrc_post_code(0xee, 0xe4);
647 * This function will return the amount of WCMD delay on the given
648 * channel as an absolute PI count.
650 uint32_t get_wcmd(uint8_t channel)
659 * RDPTR (1/2 MCLK, 64 PIs)
660 * CMDPTRREG[11:08] (0x0-0xF)
662 reg = CMDPTRREG + channel * DDRIOCCC_CH_OFFSET;
663 temp = msg_port_alt_read(DDRPHY, reg);
667 /* Adjust PI_COUNT */
668 pi_count = temp * HALF_CLK;
671 * PI (1/64 MCLK, 1 PIs)
672 * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)
673 * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)
674 * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)
675 * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)
676 * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)
677 * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)
678 * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)
679 * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)
681 reg = CMDDLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
682 temp = msg_port_alt_read(DDRPHY, reg);
686 /* Adjust PI_COUNT */
695 * This function will program the WCLK delays based on an absolute
698 void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count)
707 * RDPTR (1/2 MCLK, 64 PIs)
708 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
709 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
711 reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
713 temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);
714 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
716 /* Adjust PI_COUNT */
717 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
720 * PI (1/64 MCLK, 1 PIs)
721 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
722 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
724 reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
725 reg += (channel * DDRIOCCC_CH_OFFSET);
727 temp = (pi_count << 16) | (pi_count << 8);
728 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
730 reg = rank ? ECCB1DLLPICODER1 : ECCB1DLLPICODER1;
731 reg += (channel * DDRIOCCC_CH_OFFSET);
732 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
734 reg = rank ? ECCB1DLLPICODER2 : ECCB1DLLPICODER2;
735 reg += (channel * DDRIOCCC_CH_OFFSET);
736 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
738 reg = rank ? ECCB1DLLPICODER3 : ECCB1DLLPICODER3;
739 reg += (channel * DDRIOCCC_CH_OFFSET);
740 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
744 * CCCFGREG1[11:08] (+1 select)
745 * CCCFGREG1[03:00] (enable)
747 reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
753 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
758 if (pi_count < EARLY_DB)
761 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
765 mrc_post_code(0xee, 0xe5);
771 * This function will return the amout of WCLK delay on the given
772 * channel, rank as an absolute PI count.
774 uint32_t get_wclk(uint8_t channel, uint8_t rank)
783 * RDPTR (1/2 MCLK, 64 PIs)
784 * CCPTRREG[15:12] -> CLK1 (0x0-0xF)
785 * CCPTRREG[11:08] -> CLK0 (0x0-0xF)
787 reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
788 temp = msg_port_alt_read(DDRPHY, reg);
789 temp >>= rank ? 12 : 8;
792 /* Adjust PI_COUNT */
793 pi_count = temp * HALF_CLK;
796 * PI (1/64 MCLK, 1 PIs)
797 * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)
798 * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)
800 reg = rank ? ECCB1DLLPICODER0 : ECCB1DLLPICODER0;
801 reg += (channel * DDRIOCCC_CH_OFFSET);
802 temp = msg_port_alt_read(DDRPHY, reg);
803 temp >>= rank ? 16 : 8;
814 * This function will program the WCTL delays based on an absolute
817 * (currently doesn't comprehend rank)
819 void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count)
828 * RDPTR (1/2 MCLK, 64 PIs)
829 * CCPTRREG[31:28] (0x0-0xF)
830 * CCPTRREG[27:24] (0x0-0xF)
832 reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
834 temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);
835 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
837 /* Adjust PI_COUNT */
838 pi_count -= ((pi_count / HALF_CLK) & 0xf) * HALF_CLK;
841 * PI (1/64 MCLK, 1 PIs)
842 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
843 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
845 reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
847 temp = (pi_count << 24);
848 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
850 reg = ECCB1DLLPICODER1 + channel * DDRIOCCC_CH_OFFSET;
851 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
853 reg = ECCB1DLLPICODER2 + channel * DDRIOCCC_CH_OFFSET;
854 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
856 reg = ECCB1DLLPICODER3 + channel * DDRIOCCC_CH_OFFSET;
857 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
861 * CCCFGREG1[13:12] (+1 select)
862 * CCCFGREG1[05:04] (enable)
864 reg = CCCFGREG1 + channel * DDRIOCCC_CH_OFFSET;
870 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))
875 if (pi_count < EARLY_DB)
878 mrc_alt_write_mask(DDRPHY, reg, temp, msk);
882 mrc_post_code(0xee, 0xe6);
888 * This function will return the amount of WCTL delay on the given
889 * channel, rank as an absolute PI count.
891 * (currently doesn't comprehend rank)
893 uint32_t get_wctl(uint8_t channel, uint8_t rank)
902 * RDPTR (1/2 MCLK, 64 PIs)
903 * CCPTRREG[31:28] (0x0-0xF)
904 * CCPTRREG[27:24] (0x0-0xF)
906 reg = CCPTRREG + channel * DDRIOCCC_CH_OFFSET;
907 temp = msg_port_alt_read(DDRPHY, reg);
911 /* Adjust PI_COUNT */
912 pi_count = temp * HALF_CLK;
915 * PI (1/64 MCLK, 1 PIs)
916 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
917 * ECCB1DLLPICODER?[29:24] (0x00-0x3F)
919 reg = ECCB1DLLPICODER0 + channel * DDRIOCCC_CH_OFFSET;
920 temp = msg_port_alt_read(DDRPHY, reg);
924 /* Adjust PI_COUNT */
933 * This function will program the internal Vref setting in a given
934 * byte lane in a given channel.
936 void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting)
938 uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
942 DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n",
943 channel, byte_lane, setting);
945 mrc_alt_write_mask(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
946 (byte_lane >> 1) * DDRIODQ_BL_OFFSET,
947 vref_codes[setting] << 2, 0xfc);
950 * need to wait ~300ns for Vref to settle
951 * (check that this is necessary)
955 /* ??? may need to clear pointers ??? */
961 * This function will return the internal Vref setting for the given
962 * channel, byte_lane.
964 uint32_t get_vref(uint8_t channel, uint8_t byte_lane)
967 uint32_t ret_val = sizeof(vref_codes) / 2;
968 uint32_t reg = (byte_lane & 0x1) ? B1VREFCTL : B0VREFCTL;
973 temp = msg_port_alt_read(DDRPHY, reg + channel * DDRIODQ_CH_OFFSET +
974 (byte_lane >> 1) * DDRIODQ_BL_OFFSET);
978 for (j = 0; j < sizeof(vref_codes); j++) {
979 if (vref_codes[j] == temp) {
991 * This function will return a 32-bit address in the desired
994 uint32_t get_addr(uint8_t channel, uint8_t rank)
996 uint32_t offset = 32 * 1024 * 1024; /* 32MB */
998 /* Begin product specific code */
1000 DPF(D_ERROR, "ILLEGAL CHANNEL\n");
1005 DPF(D_ERROR, "ILLEGAL RANK\n");
1009 /* use 256MB lowest density as per DRP == 0x0003 */
1010 offset += rank * (256 * 1024 * 1024);
1016 * This function will sample the DQTRAINSTS registers in the given
1017 * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.
1019 * It will return an encoded 32-bit date in which each bit corresponds to
1020 * the sampled value on the byte lane.
1022 uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel,
1023 uint8_t rank, bool rcvn)
1025 uint8_t j; /* just a counter */
1026 uint8_t bl; /* which BL in the module (always 2 per module) */
1027 uint8_t bl_grp; /* which BL module */
1028 /* byte lane divisor */
1029 uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1030 uint32_t msk[2]; /* BLx in module */
1031 /* DQTRAINSTS register contents for each sample */
1032 uint32_t sampled_val[SAMPLE_SIZE];
1033 uint32_t num_0s; /* tracks the number of '0' samples */
1034 uint32_t num_1s; /* tracks the number of '1' samples */
1035 uint32_t ret_val = 0x00; /* assume all '0' samples */
1036 uint32_t address = get_addr(channel, rank);
1038 /* initialise msk[] */
1039 msk[0] = rcvn ? (1 << 1) : (1 << 9); /* BL0 */
1040 msk[1] = rcvn ? (1 << 0) : (1 << 8); /* BL1 */
1042 /* cycle through each byte lane group */
1043 for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) {
1044 /* take SAMPLE_SIZE samples */
1045 for (j = 0; j < SAMPLE_SIZE; j++) {
1046 hte_mem_op(address, mrc_params->first_run,
1048 mrc_params->first_run = 0;
1051 * record the contents of the proper
1052 * DQTRAINSTS register
1054 sampled_val[j] = msg_port_alt_read(DDRPHY,
1056 bl_grp * DDRIODQ_BL_OFFSET +
1057 channel * DDRIODQ_CH_OFFSET);
1061 * look for a majority value (SAMPLE_SIZE / 2) + 1
1062 * on the byte lane and set that value in the corresponding
1065 for (bl = 0; bl < 2; bl++) {
1066 num_0s = 0x00; /* reset '0' tracker for byte lane */
1067 num_1s = 0x00; /* reset '1' tracker for byte lane */
1068 for (j = 0; j < SAMPLE_SIZE; j++) {
1069 if (sampled_val[j] & msk[bl])
1074 if (num_1s > num_0s)
1075 ret_val |= (1 << (bl + bl_grp * 2));
1080 * "ret_val.0" contains the status of BL0
1081 * "ret_val.1" contains the status of BL1
1082 * "ret_val.2" contains the status of BL2
1088 /* This function will find the rising edge transition on RCVN or WDQS */
1089 void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[],
1090 uint8_t channel, uint8_t rank, bool rcvn)
1092 bool all_edges_found; /* determines stop condition */
1093 bool direction[NUM_BYTE_LANES]; /* direction indicator */
1094 uint8_t sample; /* sample counter */
1095 uint8_t bl; /* byte lane counter */
1096 /* byte lane divisor */
1097 uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1098 uint32_t sample_result[SAMPLE_CNT]; /* results of sample_dqs() */
1100 uint32_t transition_pattern;
1104 /* select hte and request initial configuration */
1106 mrc_params->first_run = 1;
1108 /* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */
1109 for (sample = 0; sample < SAMPLE_CNT; sample++) {
1110 /* program the desired delays for sample */
1111 for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) {
1112 /* increase sample delay by 26 PI (0.2 CLK) */
1114 set_rcvn(channel, rank, bl,
1115 delay[bl] + sample * SAMPLE_DLY);
1117 set_wdqs(channel, rank, bl,
1118 delay[bl] + sample * SAMPLE_DLY);
1122 /* take samples (Tsample_i) */
1123 sample_result[sample] = sample_dqs(mrc_params,
1124 channel, rank, rcvn);
1127 "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",
1128 rcvn ? "RCVN" : "WDQS", channel, rank, sample,
1129 sample * SAMPLE_DLY, sample_result[sample]);
1133 * This pattern will help determine where we landed and ultimately
1134 * how to place RCVEN/WDQS.
1136 for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1137 /* build transition_pattern (MSB is 1st sample) */
1138 transition_pattern = 0;
1139 for (sample = 0; sample < SAMPLE_CNT; sample++) {
1140 transition_pattern |=
1141 ((sample_result[sample] & (1 << bl)) >> bl) <<
1142 (SAMPLE_CNT - 1 - sample);
1145 DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);
1148 * set up to look for rising edge based on
1149 * transition_pattern
1151 switch (transition_pattern) {
1152 case 0: /* sampled 0->0->0 */
1153 /* move forward from T3 looking for 0->1 */
1154 delay[bl] += 2 * SAMPLE_DLY;
1155 direction[bl] = FORWARD;
1157 case 1: /* sampled 0->0->1 */
1158 case 5: /* sampled 1->0->1 (bad duty cycle) *HSD#237503* */
1159 /* move forward from T2 looking for 0->1 */
1160 delay[bl] += 1 * SAMPLE_DLY;
1161 direction[bl] = FORWARD;
1163 case 2: /* sampled 0->1->0 (bad duty cycle) *HSD#237503* */
1164 case 3: /* sampled 0->1->1 */
1165 /* move forward from T1 looking for 0->1 */
1166 delay[bl] += 0 * SAMPLE_DLY;
1167 direction[bl] = FORWARD;
1169 case 4: /* sampled 1->0->0 (assumes BL8, HSD#234975) */
1170 /* move forward from T3 looking for 0->1 */
1171 delay[bl] += 2 * SAMPLE_DLY;
1172 direction[bl] = FORWARD;
1174 case 6: /* sampled 1->1->0 */
1175 case 7: /* sampled 1->1->1 */
1176 /* move backward from T1 looking for 1->0 */
1177 delay[bl] += 0 * SAMPLE_DLY;
1178 direction[bl] = BACKWARD;
1181 mrc_post_code(0xee, 0xee);
1185 /* program delays */
1187 set_rcvn(channel, rank, bl, delay[bl]);
1189 set_wdqs(channel, rank, bl, delay[bl]);
1193 * Based on the observed transition pattern on the byte lane,
1194 * begin looking for a rising edge with single PI granularity.
1197 all_edges_found = true; /* assume all byte lanes passed */
1199 temp = sample_dqs(mrc_params, channel, rank, rcvn);
1200 /* check all each byte lane for proper edge */
1201 for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1202 if (temp & (1 << bl)) {
1204 if (direction[bl] == BACKWARD) {
1206 * keep looking for edge
1209 all_edges_found = false;
1212 set_rcvn(channel, rank,
1215 set_wdqs(channel, rank,
1221 if (direction[bl] == FORWARD) {
1223 * keep looking for edge
1226 all_edges_found = false;
1229 set_rcvn(channel, rank,
1232 set_wdqs(channel, rank,
1238 } while (!all_edges_found);
1240 /* restore DDR idle state */
1241 dram_init_command(DCMD_PREA(rank));
1243 DPF(D_TRN, "Delay %03X %03X %03X %03X\n",
1244 delay[0], delay[1], delay[2], delay[3]);
1250 * This function will return a 32 bit mask that will be used to
1251 * check for byte lane failures.
1253 uint32_t byte_lane_mask(struct mrc_params *mrc_params)
1256 uint32_t ret_val = 0x00;
1259 * set ret_val based on NUM_BYTE_LANES such that you will check
1260 * only BL0 in result
1262 * (each bit in result represents a byte lane)
1264 for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)
1265 ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));
1269 * need to adjust the mask for 16-bit mode
1271 if (mrc_params->channel_width == X16)
1272 ret_val |= (ret_val << 2);
1278 * Check memory executing simple write/read/verify at the specified address.
1280 * Bits in the result indicate failure on specific byte lane.
1282 uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address)
1284 uint32_t result = 0;
1285 uint8_t first_run = 0;
1287 if (mrc_params->hte_setup) {
1288 mrc_params->hte_setup = 0;
1293 result = hte_basic_write_read(mrc_params, address, first_run,
1296 DPF(D_TRN, "check_rw_coarse result is %x\n", result);
1302 * Check memory executing write/read/verify of many data patterns
1303 * at the specified address. Bits in the result indicate failure
1304 * on specific byte lane.
1306 uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address)
1309 uint8_t first_run = 0;
1311 if (mrc_params->hte_setup) {
1312 mrc_params->hte_setup = 0;
1317 result = hte_write_stress_bit_lanes(mrc_params, address, first_run);
1319 DPF(D_TRN, "check_bls_ex result is %x\n", result);
1325 * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1
1327 * The function takes pointer to previous 32 bit value and
1328 * modifies it to next value.
1330 void lfsr32(uint32_t *lfsr_ptr)
1338 for (i = 0; i < 32; i++) {
1339 bit = 1 ^ (lfsr & 1);
1340 bit = bit ^ ((lfsr & 2) >> 1);
1341 bit = bit ^ ((lfsr & 4) >> 2);
1342 bit = bit ^ ((lfsr & 0x400000) >> 22);
1344 lfsr = ((lfsr >> 1) | (bit << 31));
1350 /* Clear the pointers in a given byte lane in a given channel */
1351 void clear_pointers(void)
1358 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1359 for (bl = 0; bl < NUM_BYTE_LANES; bl++) {
1360 mrc_alt_write_mask(DDRPHY,
1362 channel * DDRIODQ_CH_OFFSET +
1363 (bl >> 1) * DDRIODQ_BL_OFFSET,
1364 ~(1 << 8), (1 << 8));
1366 mrc_alt_write_mask(DDRPHY,
1368 channel * DDRIODQ_CH_OFFSET +
1369 (bl >> 1) * DDRIODQ_BL_OFFSET,
1370 (1 << 8), (1 << 8));
1377 static void print_timings_internal(uint8_t algo, uint8_t channel, uint8_t rank,
1384 DPF(D_INFO, "\nRCVN[%02d:%02d]", channel, rank);
1387 DPF(D_INFO, "\nWDQS[%02d:%02d]", channel, rank);
1390 DPF(D_INFO, "\nWDQx[%02d:%02d]", channel, rank);
1393 DPF(D_INFO, "\nRDQS[%02d:%02d]", channel, rank);
1396 DPF(D_INFO, "\nVREF[%02d:%02d]", channel, rank);
1399 DPF(D_INFO, "\nWCMD[%02d:%02d]", channel, rank);
1402 DPF(D_INFO, "\nWCTL[%02d:%02d]", channel, rank);
1405 DPF(D_INFO, "\nWCLK[%02d:%02d]", channel, rank);
1411 for (bl = 0; bl < NUM_BYTE_LANES / bl_divisor; bl++) {
1414 DPF(D_INFO, " %03d", get_rcvn(channel, rank, bl));
1417 DPF(D_INFO, " %03d", get_wdqs(channel, rank, bl));
1420 DPF(D_INFO, " %03d", get_wdq(channel, rank, bl));
1423 DPF(D_INFO, " %03d", get_rdqs(channel, rank, bl));
1426 DPF(D_INFO, " %03d", get_vref(channel, bl));
1429 DPF(D_INFO, " %03d", get_wcmd(channel));
1432 DPF(D_INFO, " %03d", get_wctl(channel, rank));
1435 DPF(D_INFO, " %03d", get_wclk(channel, rank));
1443 void print_timings(struct mrc_params *mrc_params)
1448 uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1;
1450 DPF(D_INFO, "\n---------------------------");
1451 DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");
1452 DPF(D_INFO, "\n===========================");
1454 for (algo = 0; algo < MAX_ALGOS; algo++) {
1455 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1456 if (mrc_params->channel_enables & (1 << channel)) {
1457 for (rank = 0; rank < NUM_RANKS; rank++) {
1458 if (mrc_params->rank_enables &
1460 print_timings_internal(algo,
1469 DPF(D_INFO, "\n---------------------------");