Hi Akshay, On 21 May 2014 23:33, Akshay Saraswat <aksha...@samsung.com> wrote: > Sometimes Read DQ and DQS are not in phase. Since, this > phase shift differs from board to board, we need to > calibrate it at DRAM init phase, that's read DQ calibration. > This patch adds SW Read DQ calibration routine to compensate > this skew.
The English could be a bit better here - also suggest formatting to 72 columns or more. > > Signed-off-by: Alim Akhtar <alim.akh...@samsung.com> > Signed-off-by: Akshay Saraswat <aksha...@samsung.com> > --- > arch/arm/cpu/armv7/exynos/dmc_init_ddr3.c | 244 > +++++++++++++++++++++++++++++- > arch/arm/cpu/armv7/exynos/exynos5_setup.h | 5 +- > arch/arm/include/asm/arch-exynos/dmc.h | 3 + > arch/arm/include/asm/arch-exynos/power.h | 4 +- > 4 files changed, 252 insertions(+), 4 deletions(-) > > diff --git a/arch/arm/cpu/armv7/exynos/dmc_init_ddr3.c > b/arch/arm/cpu/armv7/exynos/dmc_init_ddr3.c > index 0654c6a..a4f6099 100644 > --- a/arch/arm/cpu/armv7/exynos/dmc_init_ddr3.c > +++ b/arch/arm/cpu/armv7/exynos/dmc_init_ddr3.c > @@ -6,6 +6,7 @@ > * SPDX-License-Identifier: GPL-2.0+ > */ > > +#include <common.h> > #include <config.h> > #include <asm/io.h> > #include <asm/arch/clock.h> > @@ -16,7 +17,12 @@ > #include "exynos5_setup.h" > #include "clock_init.h" > > -#define TIMEOUT 10000 > +#define TIMEOUT 10000 Is this in milliseconds? If so TIMEOUT_MS, or maybe microseconds, TIMEOUT _US > +#define NUM_BYTE_LANES 4 > +#define DEFAULT_DQS 8 > +#define DEFAULT_DQS_X4 0x08080808 What does this mean? > +#define FALSE 0 > +#define TRUE 1 You can just use true and false in U-Boot now (the bool type). > > #ifdef CONFIG_EXYNOS5250 > static void reset_phy_ctrl(void) > @@ -222,6 +228,219 @@ int ddr3_mem_ctrl_init(int reset) > #endif > > #ifdef CONFIG_EXYNOS5420 > +/* > + * RAM address to use in the test. > + * > + * We'll use 4 words at this address and 4 at this address + 0x80 (Ares > + * interleaves channels every 128 bytes). This will allow us to evaluate > all of > + * the chips in a 1 chip per channel (2GB) system and half the chips in a 2 > + * chip per channel (4GB) system. We can't test the 2nd chip since we need > to > + * do tests before the 2nd chip is enabled. Looking at the 2nd chip isn't > + * critical because the 1st and 2nd chip have very similar timings (they'd > + * better have similar timings, since there's only a single adjustment that > is > + * shared by both chips). > + */ > +const unsigned int test_addr = 0x20000000; Isn't this CONFIG_SYS_SDRAM_BASE? > + > +/* Test pattern with which RAM will be tested */ > +static const unsigned int test_pattern[] = { > + 0x5A5A5A5A, > + 0xA5A5A5A5, > + 0xF0F0F0F0, > + 0x0F0F0F0F, Lower case hex throughout? > +}; > + > +/* /** Should fix for each function. > + * This function is a test vector for sw read leveling, > + * it compares the read data with the written data. > + * > + * @param ch DMC channel number > + * @param byte_lane which DQS byte offset, > + * possible values are 0,1,2,3 > + * @returns TRUE if memory was good, FALSE if not. @return > + */ > +static int dmc_valid_window_test_vector(int ch, int byte_lane) > +{ > + unsigned int read_data; > + unsigned int mask; > + int i; > + > + mask = 0xFF << (8 * byte_lane); > + > + for (i = 0; i < ARRAY_SIZE(test_pattern); i++) { > + read_data = readl(test_addr + i*4 + ch*0x80); Spaces around * operators > + if ((read_data & mask) != (test_pattern[i] & mask)) > + return FALSE; > + } > + > + return TRUE; > +} > + > +/* > + * This function returns current read offset value. > + * > + * @param phy_ctrl pointer to the current phy controller > + */ > +static unsigned int dmc_get_read_offset_value(struct exynos5420_phy_control > + *phy_ctrl) > +{ > + return readl(&phy_ctrl->phy_con4); > +} > + > +/* > + * This function performs resync, so that slave DLL is updated. > + * > + * @param phy_ctrl pointer to the current phy controller > + */ > +static void ddr_phy_set_do_resync(struct exynos5420_phy_control *phy_ctrl) > +{ > + setbits_le32(&phy_ctrl->phy_con10, PHY_CON10_CTRL_OFFSETR3); > + clrbits_le32(&phy_ctrl->phy_con10, PHY_CON10_CTRL_OFFSETR3); > +} > + > +/* > + * This function sets read offset value register with 'offset'. > + * > + * ...we also call call ddr_phy_set_do_resync(). > + * > + * @param phy_ctrl pointer to the current phy controller > + * @param offset offset to read DQS > + */ > +static void dmc_set_read_offset_value(struct exynos5420_phy_control > *phy_ctrl, > + unsigned int offset) > +{ > + writel(offset, &phy_ctrl->phy_con4); > + ddr_phy_set_do_resync(phy_ctrl); > +} > + > +/* > + * Convert a 2s complement byte to a byte with a sign bit. > + * > + * NOTE: you shouldn't use normal math on the number returned by this > function. > + * As an example, -10 = 0xf6. After this function -10 = 0x8a. If you > wanted > + * to do math and get the average of 10 and -10 (should be 0): > + * 0x8a + 0xa = 0x94 (-108) > + * 0x94 / 2 = 0xca (-54) > + * ...and 0xca = sign bit plus 0x4a, or -74 > + * > + * Also note that you lose the ability to represent -128 since there are two > + * representations of 0. > + * > + * @param b The byte to convert in two's complement. > + * @returns The 7-bit value + sign bit. > + */ > + > +unsigned char make_signed_byte(signed char b) > +{ > + if (b < 0) > + return 0x80 | -b; > + else > + return b; > +} > + > +/* > + * Test various shifts starting at 'start' and going to 'end'. > + * > + * For each byte lane, we'll walk through shift starting at 'start' and going > + * to 'end' (inclusive). When we are finally able to read the test pattern > + * we'll store the value in the results array. > + * > + * @param phy_ctrl pointer to the current phy controller > + * @param ch channel number > + * @param start the start shift. -127 to 127 > + * @param end the end shift. -127 to 127 > + * @param results we'll store results for each byte lane. > + */ > + > +void test_shifts(struct exynos5420_phy_control *phy_ctrl, int ch, > + int start, int end, int results[NUM_BYTE_LANES]) > +{ > + int incr = (start < end) ? 1 : -1; > + int byte_lane; > + > + for (byte_lane = 0; byte_lane < NUM_BYTE_LANES; byte_lane++) { > + int shift; > + > + dmc_set_read_offset_value(phy_ctrl, DEFAULT_DQS_X4); > + results[byte_lane] = DEFAULT_DQS; > + > + for (shift = start; shift != (end + incr); shift += incr) { > + unsigned int byte_offsetr; > + unsigned int offsetr; > + > + byte_offsetr = make_signed_byte(shift); > + > + offsetr = dmc_get_read_offset_value(phy_ctrl); > + offsetr &= ~(0xFF << (8 * byte_lane)); > + offsetr |= (byte_offsetr << (8 * byte_lane)); > + dmc_set_read_offset_value(phy_ctrl, offsetr); > + > + if (dmc_valid_window_test_vector(ch, byte_lane)) { > + results[byte_lane] = shift; > + break; > + } > + } > + } > +} > + > +/* > + * This function performs SW read leveling to compensate DQ-DQS skew at > + * receiver it first finds the optimal read offset value on each DQS > + * then applies the value to PHY. > + * > + * Read offset value has its min margin and max margin. If read offset > + * value exceeds its min or max margin, read data will have corruption. > + * To avoid this we are doing sw read leveling. > + * > + * SW read leveling is: > + * 1> Finding offset value's left_limit and right_limit > + * 2> and calculate its center value > + * 3> finally programs that center value to PHY > + * 4> then PHY gets its optimal offset value. > + * > + * @param phy_ctrl pointer to the current phy controller > + * @param ch channel number > + * @param coarse_lock_val The coarse lock value read from PHY_CON13. > + * (0 - 0x7f) > + */ > +static void software_find_read_offset(struct exynos5420_phy_control > *phy_ctrl, > + int ch, unsigned int coarse_lock_val) > +{ > + unsigned int offsetr_cent; > + int byte_lane; > + int left_limit; > + int right_limit; > + int left[NUM_BYTE_LANES]; > + int right[NUM_BYTE_LANES]; > + int i; > + > + /* Fill the memory with test patterns */ > + for (i = 0; i < ARRAY_SIZE(test_pattern); i++) > + writel(test_pattern[i], test_addr + i*4 + ch*0x80); > + > + /* Figure out the limits we'll test with; keep -127 < limit < 127 */ > + left_limit = DEFAULT_DQS - coarse_lock_val; > + right_limit = DEFAULT_DQS + coarse_lock_val; > + if (right_limit > 127) > + right_limit = 127; > + > + /* Fill in the location where reads were OK from left and right */ > + test_shifts(phy_ctrl, ch, left_limit, right_limit, left); > + test_shifts(phy_ctrl, ch, right_limit, left_limit, right); > + > + /* Make a final value by taking the center between the left and right > */ > + offsetr_cent = 0; > + for (byte_lane = 0; byte_lane < NUM_BYTE_LANES; byte_lane++) { > + int temp_center; > + unsigned int vmwc; > + > + temp_center = (left[byte_lane] + right[byte_lane]) / 2; > + vmwc = make_signed_byte(temp_center); > + offsetr_cent |= vmwc << (8 * byte_lane); > + } > + dmc_set_read_offset_value(phy_ctrl, offsetr_cent); > +} > + > int ddr3_mem_ctrl_init(int reset) > { > struct exynos5420_clock *clk = > @@ -231,6 +450,7 @@ int ddr3_mem_ctrl_init(int reset) > struct exynos5420_phy_control *phy0_ctrl, *phy1_ctrl; > struct exynos5420_dmc *drex0, *drex1; > struct exynos5420_tzasc *tzasc0, *tzasc1; > + struct exynos5_power *pmu; > struct mem_timings *mem; > uint32_t val, n_lock_r, n_lock_w_phy0, n_lock_w_phy1; > uint32_t lock0_info, lock1_info; > @@ -247,6 +467,7 @@ int ddr3_mem_ctrl_init(int reset) > tzasc1 = (struct exynos5420_tzasc *)(samsung_get_base_dmc_tzasc() > + DMC_OFFSET); > mem = clock_get_mem_timings(); > + pmu = (struct exynos5_power *)EXYNOS5420_POWER_BASE; > > /* Enable PAUSE for DREX */ > setbits_le32(&clk->pause, ENABLE_BIT); > @@ -566,6 +787,27 @@ int ddr3_mem_ctrl_init(int reset) > setbits_le32(&phy1_ctrl->phy_con2, DLL_DESKEW_EN); > } > > + /* > + * Do software read leveling > + * > + * Do this before we turn on auto refresh since the auto refresh can > + * be in conflict with the resync operation that's part of setting > + * read leveling. > + */ > + if (!reset) { > + /* restore calibrated value after resume */ > + dmc_set_read_offset_value(phy0_ctrl, readl(&pmu->pmu_spare1)); > + dmc_set_read_offset_value(phy1_ctrl, readl(&pmu->pmu_spare2)); > + } else { > + software_find_read_offset(phy0_ctrl, 0, > + CTRL_LOCK_COARSE(lock0_info)); > + software_find_read_offset(phy1_ctrl, 1, > + CTRL_LOCK_COARSE(lock1_info)); > + /* save calibrated value to restore after resume */ > + writel(dmc_get_read_offset_value(phy0_ctrl), > &pmu->pmu_spare1); > + writel(dmc_get_read_offset_value(phy1_ctrl), > &pmu->pmu_spare2); > + } > + > /* Send PALL command */ > dmc_config_prech(mem, &drex0->directcmd); > dmc_config_prech(mem, &drex1->directcmd); > diff --git a/arch/arm/cpu/armv7/exynos/exynos5_setup.h > b/arch/arm/cpu/armv7/exynos/exynos5_setup.h > index 1cf9caf..f4ee2a2 100644 > --- a/arch/arm/cpu/armv7/exynos/exynos5_setup.h > +++ b/arch/arm/cpu/armv7/exynos/exynos5_setup.h > @@ -282,8 +282,11 @@ > #define PHY_CON12_VAL 0x10107F50 > #define CTRL_START (1 << 6) > #define CTRL_DLL_ON (1 << 5) > +#define CTRL_LOCK_COARSE_OFFSET (10) Don't need () > +#define CTRL_LOCK_COARSE_MASK (0x7F << CTRL_LOCK_COARSE_OFFSET) > +#define CTRL_LOCK_COARSE(x) (((x) & CTRL_LOCK_COARSE_MASK) >> \ > + CTRL_LOCK_COARSE_OFFSET) > #define CTRL_FORCE_MASK (0x7F << 8) > -#define CTRL_LOCK_COARSE_MASK (0x7F << 10) > #define CTRL_FINE_LOCKED 0x7 > > #define CTRL_OFFSETD_RESET_VAL 0x8 > diff --git a/arch/arm/include/asm/arch-exynos/dmc.h > b/arch/arm/include/asm/arch-exynos/dmc.h > index d78536d..ec3f9b6 100644 > --- a/arch/arm/include/asm/arch-exynos/dmc.h > +++ b/arch/arm/include/asm/arch-exynos/dmc.h > @@ -467,6 +467,9 @@ enum mem_manuf { > /* PHY_CON1 register fields */ > #define PHY_CON1_RDLVL_RDDATA_ADJ_SHIFT 0 > > +/* PHY_CON4 rgister fields */ > +#define PHY_CON10_CTRL_OFFSETR3 (1 << 24) > + > /* PHY_CON12 register fields */ > #define PHY_CON12_CTRL_START_POINT_SHIFT 24 > #define PHY_CON12_CTRL_INC_SHIFT 16 > diff --git a/arch/arm/include/asm/arch-exynos/power.h > b/arch/arm/include/asm/arch-exynos/power.h > index c9609a2..bcef43f 100644 > --- a/arch/arm/include/asm/arch-exynos/power.h > +++ b/arch/arm/include/asm/arch-exynos/power.h > @@ -906,8 +906,8 @@ struct exynos5420_power { > unsigned int sysip_dat3; > unsigned char res11[0xe0]; > unsigned int pmu_spare0; > - unsigned int pmu_spare1; > - unsigned int pmu_spare2; > + unsigned int pmu_spare1; /* Store PHY0_CON4 for read leveling */ > + unsigned int pmu_spare2; /* Store PHY1_CON4 for read leveling */ > unsigned int pmu_spare3; > unsigned char res12[0x4]; > unsigned int cg_status0; > -- > 1.8.3.2 > Regards, Simon _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot