Forum Discussion

Altera_Forum's avatar
Altera_Forum
Icon for Honored Contributor rankHonored Contributor
15 years ago

MMC SPI Core with FIFO

Hi,

The MMC SD card access using Altera's normal spi IP core is quite slow, because the core has only one byte Tx and Rx buffer, so the driver can send and receive only one byte at once. The simple way to improve this situation is adding FIFO buffers.

To upload the modified IP directly will conflict the 'Copyright' of Altera, so I only show the idea of tampering and if necessary, please do it by yourself.

At first, open the generated spi core 'mmc_spi.v' by SOPC builder with your editor and change the attribute of the signal RRDY like


  //reg              RRDY; 
  wire             RRDY;   
and add some variables like


  wire       tx_fifo_data_out;        
  wire       tx_fifo_count; 
  wire                 tx_fifo_empty; 
  wire                 tx_fifo_full; 
 
  wire       rx_fifo_data_out;        
  wire       rx_fifo_count; 
  wire                 rx_fifo_empty; 
  wire                 rx_fifo_full;
.

Also add FIFO buffers in somewhere of the spi module like


mmc_spi_tx_fifio the_mmc_spi_tx_fifio ( 
    .aclr    (~reset_n), 
    .clock    (clk), 
    .data    (data_from_cpu), 
    .rdreq    (write_shift_reg), 
    .sclr    (status_wr_strobe), 
    .wrreq    (write_tx_holding), 
    .empty    (tx_fifo_empty), 
    .full    (tx_fifo_full), 
    .q        (tx_fifo_data_out), 
    .usedw    (tx_fifo_count) 
    ); 
 
mmc_spi_rx_fifo the_mmc_spi_rx_fifo ( 
    .aclr    (~reset_n), 
    .clock    (clk), 
    .data    (shift_reg), 
    .rdreq    (data_rd_strobe), 
    .sclr    (status_wr_strobe), 
    .wrreq    ((state == 17) & (slowcount == 0)), 
    .empty    (rx_fifo_empty), 
    .full    (rx_fifo_full), 
    .q        (rx_fifo_data_out), 
    .usedw    (rx_fifo_count) 
    ); 
(the files of these buffers are included in the attached gz file) and modify the logic like


//  assign TMT = ~transmitting & ~tx_holding_primed; 
  assign TMT = ~transmitting & tx_fifo_empty; 
  assign p1_data_to_cpu = ((mem_addr == 2))? spi_status : 
    ((mem_addr == 3))? spi_control : 
    ((mem_addr == 6))? endofpacketvalue_reg : 
    ((mem_addr == 5))? spi_slave_select_reg : 
//    rx_holding_reg; 
    {rx_fifo_count & ~rx_fifo_empty, rx_fifo_data_out}; 
     
 
  // As long as there's an empty spot somewhere, 
  //it's safe to write data. 
//  assign TRDY = ~(transmitting & tx_holding_primed); 
  assign TRDY = ~tx_fifo_count; 
 
 
  // Enable write to shift register. 
//  assign write_shift_reg = tx_holding_primed & ~transmitting; 
  assign write_shift_reg = ~tx_fifo_empty & ~transmitting; 
 
  always @(posedge clk or negedge reset_n) 
    begin 
      if (reset_n == 0) 
        begin 
          shift_reg <= 0; 
          rx_holding_reg <= 0; 
          EOP <= 0; 
//          RRDY <= 0; 
          ROE <= 0; 
          TOE <= 0; 
          tx_holding_reg <= 0; 
          tx_holding_primed <= 0; 
          transmitting <= 0; 
          SCLK_reg <= 0; 
          MISO_reg <= 0; 
        end 
 
          if (write_shift_reg) 
            begin 
//              shift_reg <= tx_holding_reg; 
              shift_reg <= tx_fifo_data_out; 
              transmitting <= 1; 
            end 
          if (write_shift_reg & ~write_tx_holding) 
              // Clear tx_holding_primed 
              tx_holding_primed <= 0; 
 
//          if (data_rd_strobe) 
//              // On data read, clear the RRDY bit. 
//              RRDY <= 0; 
 
          if (status_wr_strobe) 
            begin 
              // On status write, clear all status bits (ignore the data). 
              EOP <= 0; 
 
//              RRDY <= 0; 
              ROE <= 0; 
              TOE <= 0; 
            end 
          if (slowclock) 
            begin 
              if (state == 17) 
                begin 
                  transmitting <= 0; 
//                  RRDY <= 1; 
                  rx_holding_reg <= shift_reg; 
                  SCLK_reg <= 0; 
//                  if (RRDY) 
                  if (rx_fifo_full) 
                      ROE <= 1; 
                end 
.

The register mapping is changed like


//Register map: 
//addr      register                        type 
//0         rdata count | read data     r 
//1                          write data    w 
//2                          status        r/w 
//3                          control       r/w 
//4                   reserved 
//5                          slave-enable  r/w 
//6                    end-of-packet-value r/w
.

Note that the upper 8 bits of register 0 indicates the received data's byte counts.

This improved core can be driven by the normal Thomas Chou's driver, but to induce the effective functionality of FIFOs, we must rewrite it. Unfortunately, the driver must be shared with normal spi cores (which have non-FIFO buffers), thus I add a new spi mode ' SPI_FIFO'


struct spi_device {
    struct device        dev;
    struct spi_master    *master;
    u32            max_speed_hz;
    u8            chip_select;
    u8            mode;# define    SPI_CPHA    0x01            /* clock phase */# define    SPI_CPOL    0x02            /* clock polarity */# define    SPI_MODE_0    (0|0)            /* (original MicroWire) */# define    SPI_MODE_1    (0|SPI_CPHA)# define    SPI_MODE_2    (SPI_CPOL|0)# define    SPI_MODE_3    (SPI_CPOL|SPI_CPHA)# define    SPI_CS_HIGH    0x04            /* chipselect active high? */# define    SPI_LSB_FIRST    0x08            /* per-word bits-on-wire */# define    SPI_3WIRE    0x10            /* SI/SO signals shared */# define    SPI_LOOP    0x20            /* loopback mode */# define    SPI_FIFO    0x40            /* FIFO mode */
    u8            bits_per_word;
    int            irq;
in the file 'spi.h' and add a code in the file 'mmc_spi.c' to set this. For the details, please refer the attached files.

By these improvements, the peak transfer rate goes up to 60 times faster than normal one on my NEEK.

Kazu

2 Replies

  • Altera_Forum's avatar
    Altera_Forum
    Icon for Honored Contributor rankHonored Contributor

    Hi,

    I forgot to mention that the signal 'TRDY' is treated as the threshold of 'tx' FIFO buffer. So we need special attentions around 'TRDY', for example,

    
    //  assign readyfordata = TRDY; 
      assign readyfordata = ~tx_fifo_full; 
    //  assign write_tx_holding = data_wr_strobe & TRDY; 
      assign write_tx_holding = data_wr_strobe; 
    //          if (data_wr_strobe & ~TRDY) 
              if (data_wr_strobe & tx_fifo_full) 
                  // You wrote when I wasn't ready. 
                  TOE <= 1; 
    
    etc.

    And there was a bug in the driver 'altspi.c'.

    # ifdef MMC_SPI_FIFO
        if (hw->mode == SPI_FIFO) { 
            if (hw->txd_count < hw->len) {
                if (spsta & ALTERA_SPI_STATUS_TRDY_MSK) {        
                    txd_limit = ((hw->len - hw->txd_count) > MMC_SPI_FIFO_DEPTH / 2) ? hw->txd_count + MMC_SPI_FIFO_DEPTH / 2 : hw->len;  
                    for (count = hw->txd_count; count < txd_limit; count++, hw->txd_count++)
                        writel(hw_txbyte(hw, count), hw->base + ALTERA_SPI_TXDATA);
                } else {
                    count = hw->txd_count++;   // <--- Here!
                    writel(hw_txbyte(hw, count), hw->base + ALTERA_SPI_TXDATA);
                }
            } else {
                if (hw->count == hw->len) {
                    complete(&hw->done);
                }
            }
        } else {
    
    Kazu
  • Altera_Forum's avatar
    Altera_Forum
    Icon for Honored Contributor rankHonored Contributor

    Thank you for the really informative post! Several years and versions of Quartus have gone by and I was hoping you might be willing to update the instructions for a current system?

    I created a project in Quartus and then a working Nios system in QSYS with a spi port. I then added the two files from your tgz into the submodules directory and edited my spi.v file as per your instructions to generate the following diff:

    
    @@ -85,3 +85,3 @@ module ft3_proto_qsys_fpga_spi0 (
       reg              ROE;
    -  reg              RRDY;
    +  wire              RRDY;
       wire             SCLK;
    @@ -138,2 +138,39 @@ module ft3_proto_qsys_fpga_spi0 (
       wire             write_tx_holding;
    +
    +  wire     tx_fifo_data_out;
    +  wire     tx_fifo_count;
    +  wire             tx_fifo_empty;
    +  wire             tx_fifo_full;
    +
    +  wire     rx_fifo_data_out;
    +  wire     rx_fifo_count;
    +  wire             rx_fifo_empty;
    +  wire             rx_fifo_full;
    +
    +  mmc_spi_tx_fifio the_mmc_spi_tx_fifio (
    +    .aclr     (~reset_n),
    +    .clock    (clk),
    +    .data     (data_from_cpu),
    +    .rdreq    (write_shift_reg),
    +    .sclr     (status_wr_strobe),
    +    .wrreq    (write_tx_holding),
    +    .empty    (tx_fifo_empty),
    +    .full     (tx_fifo_full),
    +    .q        (tx_fifo_data_out),
    +    .usedw    (tx_fifo_count)
    +    );
    +
    +mmc_spi_rx_fifo the_mmc_spi_rx_fifo (
    +    .aclr    (~reset_n),
    +    .clock    (clk),
    +    .data    (shift_reg),
    +    .rdreq    (data_rd_strobe),
    +    .sclr    (status_wr_strobe),
    +    .wrreq    ((state == 17) & (slowcount == 0)),
    +    .empty    (rx_fifo_empty),
    +    .full    (rx_fifo_full),
    +    .q        (rx_fifo_data_out),
    +    .usedw    (rx_fifo_count)
    +    );
    +
       //spi_control_port, which is an e_avalon_slave
    @@ -185,3 +222,4 @@ module ft3_proto_qsys_fpga_spi0 (
       assign endofpacketvalue_wr_strobe = wr_strobe & (mem_addr == 6);
    -  assign TMT = ~transmitting & ~tx_holding_primed;
    +//  assign TMT = ~transmitting & ~tx_holding_primed;
    +  assign TMT = ~transmitting & tx_fifo_empty;
       assign E = ROE | TOE;
    @@ -192,3 +230,4 @@ module ft3_proto_qsys_fpga_spi0 (
       // Ready to accept streaming data.
    -  assign readyfordata = TRDY;
    +//  assign readyfordata = TRDY;
    +  assign readyfordata = ~tx_fifo_full;
    @@ -286,3 +325,4 @@ module ft3_proto_qsys_fpga_spi0 (
         ((mem_addr == 5))? spi_slave_select_reg :
    -    rx_holding_reg;
    +//    rx_holding_reg;
    +    {rx_fifo_count & ~rx_fifo_empty, rx_fifo_data_out};
    @@ -324,9 +364,12 @@ module ft3_proto_qsys_fpga_spi0 (
       //it's safe to write data.
    -  assign TRDY = ~(transmitting & tx_holding_primed);
    +//  assign TRDY = ~(transmitting & tx_holding_primed);
    +  assign TRDY = ~tx_fifo_count;
       // Enable write to tx_holding_register.
    -  assign write_tx_holding = data_wr_strobe & TRDY;
    +//  assign write_tx_holding = data_wr_strobe & TRDY;
    +  assign write_tx_holding = data_wr_strobe;
       // Enable write to shift register.
    -  assign write_shift_reg = tx_holding_primed & ~transmitting;
    +//  assign write_shift_reg = tx_holding_primed & ~transmitting;
    +  assign write_shift_reg = ~tx_fifo_empty & ~transmitting;
    @@ -339,3 +382,3 @@ module ft3_proto_qsys_fpga_spi0 (
               EOP <= 0;
    -          RRDY <= 0;
    +//          RRDY <= 0;
               ROE <= 0;
    @@ -355,3 +398,4 @@ module ft3_proto_qsys_fpga_spi0 (
                 end
    -          if (data_wr_strobe & ~TRDY)
    +//          if (data_wr_strobe & ~TRDY)
    +          if (data_wr_strobe & tx_fifo_full)
                   // You wrote when I wasn't ready.
    @@ -364,3 +408,4 @@ module ft3_proto_qsys_fpga_spi0 (
                 begin
    -              shift_reg <= tx_holding_reg;
    +//              shift_reg <= tx_holding_reg;
    +              shift_reg <= tx_fifo_data_out;
                   transmitting <= 1;
    @@ -371,5 +416,5 @@ module ft3_proto_qsys_fpga_spi0 (
    -          if (data_rd_strobe)
    -              // On data read, clear the RRDY bit.
    -              RRDY <= 0;
    +//          if (data_rd_strobe)
    +//              // On data read, clear the RRDY bit.
    +//              RRDY <= 0;
    @@ -380,3 +425,3 @@ module ft3_proto_qsys_fpga_spi0 (
    -              RRDY <= 0;
    +//              RRDY <= 0;
                   ROE <= 0;
    @@ -389,6 +434,7 @@ module ft3_proto_qsys_fpga_spi0 (
                       transmitting <= 0;
    -                  RRDY <= 1;
    +//                  RRDY <= 1;
                       rx_holding_reg <= shift_reg;
                       SCLK_reg <= 0;
    -                  if (RRDY)
    +//                  if (RRDY)
    +                  if (rx_fifo_full)
                           ROE <= 1;
    

    I have two questions:

    1) All I have to do now is re-compile the main project in Quartus (16) and then use it?

    2) Is there a more modern/current fifo I should be using instead? I noticed that lpm_fifo is still supported for compatibility but isn't the normal wizard generated FIFO anymore.

    Thank you again,

    Hunter