Attached is a proposed bulk write optimization for mips_m4k file transfers. The motivation was to speed-up image loading on the mips_m4k target to reach similar transfer times as the xscale target.
Files modified: M 2399 src/target/mips32.h M 2399 src/target/mips_ejtag.c M 2399 src/target/mips32_pracc.c M 2399 src/target/mips_ejtag.h M 2399 src/target/mips32_pracc.h M 2399 src/target/mips_m4k.c Summary: Bulk transfers are currently done with word accesses using the PrAcc data register. This optimization uses a working-area, if available, to load a download routine into ram. The download routine uses EJTAG FASTDATA transfers to transfer the data. This greatly decreases load times because: - code fetches are no longer through the PrAcc register, the code is executing in ram. - stack is also in the working-area - Use of only the FASTDATA register minimizes JTAG access. - the call to jtag_execute_queue() is made only after building the entire buffer transfer queue. The idea is taken from the xscale debug_handler loaded into a mini instruction cache. The mips_m4k does not have a convenient place to load a download routine like the xscale's mini IC. Instead this patch has been tested with a working-area in system ram. This requires that the external memory controller be initialized, most commonly in a board script. The alternative is to lock enough ways in the instruction cache to make an internal code area. But the system ram approach is simpler and allows full use of the cache for the program under debugging. If a working area is not defined, the code will work as before; bulk writes will issue a series of word writes that uses the PrAcc data register. This only affects bulk memory writes; byte, short and word accesses still use the PrAcc data register. There is support for FASTDATA bulk reads. However, there is currently no bulk_read_memory equivalent for bulk_write_memory. I have probably missed some problems with this approach or it's methods. Still this code builds and runs and the result is file transfers times decrease dramatically. > load_image boot-ram.bin 0xa0500000 mips32_pracc_fastdata_xfer using 0xa0001000 for write handler 253424 byte written at address 0xa0500000 downloaded 253424 byte in 3.110119s - David
Index: src/target/mips32.h =================================================================== --- src/target/mips32.h (revision 2399) +++ src/target/mips32.h (working copy) @@ -78,6 +78,7 @@ #define MIPS32_OP_ADDI 0x08 #define MIPS32_OP_AND 0x24 #define MIPS32_OP_COP0 0x10 +#define MIPS32_OP_JR 0x08 #define MIPS32_OP_LUI 0x0F #define MIPS32_OP_LW 0x23 #define MIPS32_OP_LBU 0x24 @@ -104,6 +105,7 @@ #define MIPS32_B(off) MIPS32_BEQ(0, 0, off) #define MIPS32_BEQ(src,tar,off) MIPS32_I_INST(MIPS32_OP_BEQ, src, tar, off) #define MIPS32_BNE(src,tar,off) MIPS32_I_INST(MIPS32_OP_BNE, src, tar, off) +#define MIPS32_JR(reg) MIPS32_R_INST(0, reg, 0, 0, 0, MIPS32_OP_JR) #define MIPS32_MFC0(gpr, cpr, sel) MIPS32_R_INST(MIPS32_OP_COP0, MIPS32_COP0_MF, gpr, cpr, 0, sel) #define MIPS32_MTC0(gpr,cpr, sel) MIPS32_R_INST(MIPS32_OP_COP0, MIPS32_COP0_MT, gpr, cpr, 0, sel) #define MIPS32_LBU(reg, off, base) MIPS32_I_INST(MIPS32_OP_LBU, base, reg, off) Index: src/target/mips_ejtag.c =================================================================== --- src/target/mips_ejtag.c (revision 2399) +++ src/target/mips_ejtag.c (working copy) @@ -4,6 +4,8 @@ * * * Copyright (C) 2008 by David T.L. Wong * * * + * Copyright (C) 2009 by David N. Claffey <dnclaf...@gmail.com> * + * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * @@ -146,6 +148,46 @@ return ERROR_OK; } +int mips_ejtag_fastdata_scan(mips_ejtag_t *ejtag_info, int write, uint32_t *data) +{ + jtag_tap_t *tap; + tap = ejtag_info->tap; + + if (tap==NULL) + return ERROR_FAIL; + + scan_field_t fields[2]; + uint8_t spracc = 0; + uint8_t t[4] = { 0,0,0,0 }; + + /* fastdata 1-bit register */ + fields[0].tap = tap; + fields[0].num_bits = 1; + fields[0].out_value = &spracc; + fields[0].in_value = NULL; + + /* processor access data register 32 bit */ + fields[1].tap = tap; + fields[1].num_bits = 32; + fields[1].out_value = t; + + if (write) + { + fields[1].in_value = NULL; + buf_set_u32(t, 0, 32, *data); + } + else + { + fields[1].in_value = (uint8_t *) data; + } + + jtag_add_dr_scan(2, fields, jtag_get_end_state()); + + keep_alive(); + + return ERROR_OK; +} + int mips_ejtag_step_enable(mips_ejtag_t *ejtag_info) { uint32_t code[] = { Index: src/target/mips32_pracc.c =================================================================== --- src/target/mips32_pracc.c (revision 2399) +++ src/target/mips32_pracc.c (working copy) @@ -4,6 +4,8 @@ * * * Copyright (C) 2008 by David T.L. Wong * * * + * Copyright (C) 2009 by David N. Claffey <dnclaf...@gmail.com> * + * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * @@ -774,6 +776,153 @@ return retval; } +/* fastdata upload/download requires an initialized working area + to load the download code; it should not be called otherwise + fetch order from the fastdata area + 1. start addr + 2. end addr + 3. data ... + */ +int mips32_pracc_fastdata_xfer( mips_ejtag_t *ejtag_info, struct working_area_s *source, + int write, uint32_t addr, int count, uint32_t *buf) +{ + uint32_t handler_code[] = { + /* caution when editing, table is modified below */ + /* r15 points to the start of this code */ + MIPS32_SW(8,MIPS32_FASTDATA_HANDLER_SIZE - 4,15), + MIPS32_SW(9,MIPS32_FASTDATA_HANDLER_SIZE - 8,15), + MIPS32_SW(10,MIPS32_FASTDATA_HANDLER_SIZE - 12,15), + MIPS32_SW(11,MIPS32_FASTDATA_HANDLER_SIZE - 16,15), + /* start of fastdata area in t0 */ + MIPS32_LUI(8,UPPER16(MIPS32_PRACC_FASTDATA_AREA)), + MIPS32_ORI(8,8,LOWER16(MIPS32_PRACC_FASTDATA_AREA)), + MIPS32_LW(9,0,8), /* start addr in t1 */ + MIPS32_LW(10,0,8), /* end addr to t2 */ +//loop: + /* 8 */ MIPS32_LW(11,0,0), /* lw t3,[t8 | r9] */ + /* 9 */ MIPS32_SW(11,0,0), /* sw t3,[r9 | r8] */ + MIPS32_BNE(10,9,NEG16(3)), /* bne $t2,t1,loop */ + MIPS32_ADDI(9,9,4), /* addi t1,t1,4 */ + + MIPS32_LW(8,MIPS32_FASTDATA_HANDLER_SIZE - 4,15), + MIPS32_LW(9,MIPS32_FASTDATA_HANDLER_SIZE - 8,15), + MIPS32_LW(10,MIPS32_FASTDATA_HANDLER_SIZE - 12,15), + MIPS32_LW(11,MIPS32_FASTDATA_HANDLER_SIZE - 16,15), + + MIPS32_LUI(15,UPPER16(MIPS32_PRACC_TEXT)), + MIPS32_ORI(15,15,LOWER16(MIPS32_PRACC_TEXT)), + MIPS32_JR(15), /* jr start */ + MIPS32_MFC0(15,31,0), /* move COP0 DeSave to $15 */ + MIPS32_NOP, + }; + + uint32_t jmp_code[] = { + MIPS32_MTC0(15,31,0), /* move $15 to COP0 DeSave */ + /* 1 */ MIPS32_LUI(15,0), /* addr of working area added below */ + /* 2 */ MIPS32_ORI(15,15,0), /* addr of working area added below */ + MIPS32_JR(15), /* jump to ram program */ + MIPS32_NOP, + }; + +#define JMP_CODE_SIZE (sizeof(jmp_code)/sizeof(jmp_code[0])) +#define HANDLER_CODE_SIZE sizeof(handler_code)/sizeof(handler_code[0]) + + int retval, i; + uint32_t val, ejtag_ctrl, address; + + if(source->size < MIPS32_FASTDATA_HANDLER_SIZE) + return ERROR_TARGET_RESOURCE_NOT_AVAILABLE; + + if(write) + { + handler_code[8] = MIPS32_LW(11,0,8); /* load data from probe at fastdata area */ + handler_code[9] = MIPS32_SW(11,0,9); /* store data to RAM @ r9 */ + } + else + { + handler_code[8] = MIPS32_LW(11,0,9); /* load data from RAM @ r9 */ + handler_code[9] = MIPS32_SW(11,0,8); /* store data to probe at fastdata area */ + } + + /* write program into RAM */ + mips32_pracc_write_mem32(ejtag_info, source->address, HANDLER_CODE_SIZE, handler_code); + + /* quick verify RAM is working */ + mips32_pracc_read_u32(ejtag_info, source->address, &val); + if(val != handler_code[0]) + { + LOG_ERROR("fastdata handler verify failed\n"); + return ERROR_TARGET_RESOURCE_NOT_AVAILABLE; + } + + LOG_INFO("%s using 0x%.8x for write handler\n", __func__, source->address); + + jmp_code[1] |= UPPER16(source->address); + jmp_code[2] |= LOWER16(source->address); + + for (i = 0; i < (int) JMP_CODE_SIZE; i++) + { + if ((retval = wait_for_pracc_rw(ejtag_info, &ejtag_ctrl)) != ERROR_OK) + return retval; + + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_DATA, NULL); + mips_ejtag_drscan_32(ejtag_info, &jmp_code[i]); + + /* Clear the access pending bit (let the processor eat!) */ + + ejtag_ctrl = ejtag_info->ejtag_ctrl & ~EJTAG_CTRL_PRACC; + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_CONTROL, NULL); + mips_ejtag_drscan_32(ejtag_info, &ejtag_ctrl); + + } + + if ((retval = wait_for_pracc_rw(ejtag_info, &ejtag_ctrl)) != ERROR_OK) + return retval; + + /* next fetch to dmseg should be in FASTDATA_AREA, check */ + address = 0; + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_ADDRESS, NULL); + mips_ejtag_drscan_32(ejtag_info, &address); + + if( address != MIPS32_PRACC_FASTDATA_AREA) + return ERROR_FAIL; + + /* Send the load start address */ + val = addr; + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_FASTDATA, NULL); + mips_ejtag_fastdata_scan(ejtag_info, 1, &val); + + /* Send the load end address */ + val = addr + (count - 1)*4; + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_FASTDATA, NULL); + mips_ejtag_fastdata_scan(ejtag_info, 1, &val); + + for(i = 0; i < count; i++) + { + /* Send the data out using fastdata (clears the access pending bit) */ + if ((retval = mips_ejtag_fastdata_scan(ejtag_info, write, buf++)) != ERROR_OK) + return retval; + } + + if ((retval = jtag_execute_queue()) != ERROR_OK) + { + LOG_ERROR("fastdata load failed"); + return retval; + } + + if ((retval = wait_for_pracc_rw(ejtag_info, &ejtag_ctrl)) != ERROR_OK) + return retval; + + address = 0; + mips_ejtag_set_instr(ejtag_info, EJTAG_INST_ADDRESS, NULL); + mips_ejtag_drscan_32(ejtag_info, &address); + + if(address != MIPS32_PRACC_TEXT) + LOG_ERROR("mini program did not return to start\n"); + + return retval; +} + int mips32_pracc_write_regs(mips_ejtag_t *ejtag_info, uint32_t *regs) { uint32_t code[] = { Index: src/target/mips_ejtag.h =================================================================== --- src/target/mips_ejtag.h (revision 2399) +++ src/target/mips_ejtag.h (working copy) @@ -112,6 +112,7 @@ extern int mips_ejtag_get_impcode(mips_ejtag_t *ejtag_info, uint32_t *impcode); extern int mips_ejtag_get_idcode(mips_ejtag_t *ejtag_info, uint32_t *idcode); extern int mips_ejtag_drscan_32(mips_ejtag_t *ejtag_info, uint32_t *data); +extern int mips_ejtag_fastdata_scan(mips_ejtag_t *ejtag_info, int write, uint32_t *data); extern int mips_ejtag_init(mips_ejtag_t *ejtag_info); extern int mips_ejtag_config_step(mips_ejtag_t *ejtag_info, int enable_step); Index: src/target/mips32_pracc.h =================================================================== --- src/target/mips32_pracc.h (revision 2399) +++ src/target/mips32_pracc.h (working copy) @@ -24,6 +24,8 @@ #include "mips_ejtag.h" +#define MIPS32_PRACC_FASTDATA_AREA 0xFF200000 +#define MIPS32_PRACC_FASTDATA_SIZE 16 #define MIPS32_PRACC_TEXT 0xFF200200 //#define MIPS32_PRACC_STACK 0xFF2FFFFC #define MIPS32_PRACC_STACK 0xFF204000 @@ -32,6 +34,7 @@ #define MIPS32_PRACC_PARAM_OUT (MIPS32_PRACC_PARAM_IN + MIPS32_PRACC_PARAM_IN_SIZE) #define MIPS32_PRACC_PARAM_OUT_SIZE 0x1000 +#define MIPS32_FASTDATA_HANDLER_SIZE 0x80 #define UPPER16(uint32_t) (uint32_t >> 16) #define LOWER16(uint32_t) (uint32_t & 0xFFFF) #define NEG16(v) (((~(v)) + 1) & 0xFFFF) @@ -39,6 +42,8 @@ extern int mips32_pracc_read_mem(mips_ejtag_t *ejtag_info, uint32_t addr, int size, int count, void *buf); extern int mips32_pracc_write_mem(mips_ejtag_t *ejtag_info, uint32_t addr, int size, int count, void *buf); +extern int mips32_pracc_fastdata_xfer( mips_ejtag_t *ejtag_info, struct working_area_s *source, + int write, uint32_t addr, int count, uint32_t *buf); extern int mips32_pracc_read_mem8(mips_ejtag_t *ejtag_info, uint32_t addr, int count, uint8_t *buf); extern int mips32_pracc_read_mem16(mips_ejtag_t *ejtag_info, uint32_t addr, int count, uint16_t *buf); Index: src/target/mips_m4k.c =================================================================== --- src/target/mips_m4k.c (revision 2399) +++ src/target/mips_m4k.c (working copy) @@ -4,6 +4,8 @@ * * * Copyright (C) 2008 by David T.L. Wong * * * + * Copyright (C) 2009 by David N. Claffey <dnclaf...@gmail.com> * + * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * @@ -906,7 +908,49 @@ int mips_m4k_bulk_write_memory(target_t *target, uint32_t address, uint32_t count, uint8_t *buffer) { - return mips_m4k_write_memory(target, address, 4, count, buffer); + mips32_common_t *mips32 = target->arch_info; + mips_ejtag_t *ejtag_info = &mips32->ejtag_info; + struct working_area_s *source; + int retval; + int write = 1; + + LOG_DEBUG("address: 0x%8.8x, count: 0x%8.8x", address, count); + + if (target->state != TARGET_HALTED) + { + LOG_WARNING("target not halted"); + return ERROR_TARGET_NOT_HALTED; + } + + /* check alignment */ + if (address & 0x3u) + return ERROR_TARGET_UNALIGNED_ACCESS; + + /* Get memory for block write handler */ + retval = target_alloc_working_area(target, MIPS32_FASTDATA_HANDLER_SIZE, &source); + if (retval != ERROR_OK) + { + LOG_WARNING("No working area available, falling back to non-bulk write"); + return mips_m4k_write_memory(target, address, 4, count, buffer); + }; + + /* TAP data register is loaded LSB first (little endian) */ + if (target->endianness == TARGET_BIG_ENDIAN) + { + uint32_t i, t32; + for(i = 0; i < (count*4); i+=4) + { + t32 = be_to_h_u32((uint8_t *) &buffer[i]); + h_u32_to_le(&buffer[i], t32); + } + } + + retval = mips32_pracc_fastdata_xfer(ejtag_info, source, write, address, count, (uint32_t *) buffer); + + if (source) + target_free_working_area(target, source); + + return retval; } int mips_m4k_checksum_memory(target_t *target, uint32_t address, uint32_t size, uint32_t *checksum)
_______________________________________________ Openocd-development mailing list Openocd-development@lists.berlios.de https://lists.berlios.de/mailman/listinfo/openocd-development