Forum Discussion

K606's avatar
K606
Icon for Contributor rankContributor
22 days ago

Transceiver data corruption

I am trying to externally loopback a simple data-stream using the GTS on the Agilex 5, over an external QSFP loopback module.

The GTS is configured as followed:

External clock chip:

  • Outputs 156.25 MHz clock verified using an oscilloscope.

System PLL:

  • Outputs 125 MHz to the GTS.

GTS:

  • Basic PMA Direct
  • System PLL freq: 125 MHz
  • PMA speed: 1250 Mbps
  • PMA width: 10
  • TX/RX PLL/CDR: 156.25 MHz
  • TX/RX core interface FIFO: single width
  • TX/RX clock: System PLL clock /1

The RTL used to transfer data over TX:

module top(
    input  CPU_RESET_n,
    input  REFCLK,
    output gts_o_tx_serial_data,
    output gts_o_tx_serial_data_n,
    input  gts_i_rx_serial_data,
    input  gts_i_rx_serial_data_n
);

// gts
logic gts_pma_cu_clk_i;
logic gts_tx_reset, gts_rx_reset;
logic gts_tx_reset_ack, gts_rx_reset_ack;
logic gts_tx_ready, gts_rx_ready;
logic tx_coreclkin, rx_coreclkin;
(* noprune *) logic gts_tx_clkout, gts_rx_clkout;
logic gts_rs_grant_i;
logic gts_rc_rs_req_o;
(* noprune *) logic gts_tx_pll_locked                 /* synthesis keep */;
(* noprune *) logic gts_rx_is_lockedtodata            /* synthesis keep */;
(* noprune *) logic gts_rx_is_lockedtoref             /* synthesis keep */;
logic o_refclk2core;
(* noprune *) logic  [79:0] gts_i_tx_parallel_data    /* synthesis keep */;
(* noprune *) logic  [79:0] gts_o_rx_parallel_data    /* synthesis keep */;
assign gts_pma_cu_clk_i = srcss_bank1_pma_cu_clk_o;
assign tx_coreclkin  = gts_tx_clkout;
assign rx_coreclkin  = gts_rx_clkout;
assign gts_rs_grant_i = srcss_bank1_rs_grant_o;

// reset sequencer signals
logic srcss_bank1_rs_grant_o;
logic srcss_bank1_rs_priority;
logic srcss_bank1_rc_rs_req;
logic srcss_bank1_pma_cu_clk_o;
assign srcss_bank1_rs_priority = '0;
assign srcss_bank1_rc_rs_req = gts_rc_rs_req_o;

// system pll signals
logic gts_systempll_refclk_rdy;
assign gts_systempll_refclk_rdy = 1'b1; 

gts_top u0 (
      // gts
    .gts_top_clock_bridge_rx_in_clk_clk                              (QSFP_REFCLK_p),
    .gts_top_clock_bridge_tx_in_clk_clk                              (QSFP_REFCLK_p),
    .intel_directphy_gts_0_i_pma_cu_clk_clk                          (gts_pma_cu_clk_i),
    .intel_directphy_gts_0_i_tx_reset_tx_reset                       (gts_tx_reset),
    .intel_directphy_gts_0_i_rx_reset_rx_reset                       (gts_rx_reset),
    .intel_directphy_gts_0_o_tx_reset_ack_tx_reset_ack               (gts_tx_reset_ack),
    .intel_directphy_gts_0_o_rx_reset_ack_rx_reset_ack               (gts_rx_reset_ack),
    .intel_directphy_gts_0_o_tx_ready_tx_ready                       (gts_tx_ready),
    .intel_directphy_gts_0_o_rx_ready_rx_ready                       (gts_rx_ready),
    .intel_directphy_gts_0_i_tx_coreclkin_clk                        (tx_coreclkin),
    .intel_directphy_gts_0_i_rx_coreclkin_clk                        (rx_coreclkin),
    .intel_directphy_gts_0_o_tx_clkout_clk                           (gts_tx_clkout),
    .intel_directphy_gts_0_o_rx_clkout_clk                           (gts_rx_clkout),
    .intel_directphy_gts_0_i_src_rs_grant_src_rs_grant               (gts_rs_grant_i),
    .intel_directphy_gts_0_o_src_rs_req_src_rs_req                   (gts_rc_rs_req_o),
    .intel_directphy_gts_0_o_tx_serial_data_o_tx_serial_data         (gts_o_tx_serial_data),
    .intel_directphy_gts_0_o_tx_serial_data_n_o_tx_serial_data_n     (gts_o_tx_serial_data_n),
    .intel_directphy_gts_0_i_rx_serial_data_i_rx_serial_data         (gts_i_rx_serial_data),
    .intel_directphy_gts_0_i_rx_serial_data_n_i_rx_serial_data_n     (gts_i_rx_serial_data_n),
    .intel_directphy_gts_0_o_tx_pll_locked_o_tx_pll_locked           (gts_tx_pll_locked),
    .intel_directphy_gts_0_o_rx_is_lockedtodata_o_rx_is_lockedtodata (gts_rx_is_lockedtodata),
    .intel_directphy_gts_0_o_rx_is_lockedtoref_o_rx_is_lockedtoref   (gts_rx_is_lockedtoref),
    .intel_directphy_gts_0_o_refclk2core_o_refclk2core               (o_refclk2core),
    .intel_directphy_gts_0_i_tx_parallel_data_i_tx_parallel_data     (gts_i_tx_parallel_data),
    .intel_directphy_gts_0_o_rx_parallel_data_o_rx_parallel_data     (gts_o_rx_parallel_data),

    // reset sequencer signals
    .intel_srcss_gts_0_o_src_rs_grant_src_rs_grant                   (srcss_bank1_rs_grant_o),
    .intel_srcss_gts_0_i_src_rs_priority_src_rs_priority             (srcss_bank1_rs_priority),
    .intel_srcss_gts_0_i_src_rs_req_src_rs_req                       (srcss_bank1_rc_rs_req),
    .intel_srcss_gts_0_o_pma_cu_clk_clk                              (srcss_bank1_pma_cu_clk_o),

    // system pll signals
    .intel_systemclk_gts_0_i_refclk_rdy_data                         (gts_systempll_refclk_rdy)
);

// syncronise reset
logic gts_tx_system_reset;
altera_reset_synchronizer #(
    .ASYNC_RESET (1),
    .DEPTH       (2)
) gts_tx_rst_sync (
    .reset_in  (~CPU_RESET_n),
    .clk       (gts_tx_clkout),
    .reset_out (gts_tx_system_reset)
);

// generate test data stream
logic [7:0] counter;
logic [7:0] test_stream;
always_ff @(posedge gts_tx_clkout or posedge gts_tx_system_reset) begin
    if (gts_tx_system_reset) begin
        counter       <= 8'b0;
        test_stream   <= 8'b0;
    end else begin
        counter <= counter + 1;
        case (counter)
            8'd0: test_stream <= 8'h3C;
            8'd1: test_stream <= 8'h7F;
            8'd2: test_stream <= 8'h11;
            8'd3: test_stream <= 8'h07;
            default: test_stream <= 8'h00;
        endcase
    end
end

// detect and transform idle data, and mark control symbols 
logic [7:0] idle_data_transform;
logic control_symbol_detect;
always_comb begin
    idle_data_transform = (test_stream == 8'h00) ? 8'hBC : test_stream;
    control_symbol_detect = (idle_data_transform == 8'h1C) ||
                (idle_data_transform == 8'h3C) ||
                (idle_data_transform == 8'h5C) ||
                (idle_data_transform == 8'h7C) ||
                (idle_data_transform == 8'h9C) ||
                (idle_data_transform == 8'hBC) ||
                (idle_data_transform == 8'hDC) ||
                (idle_data_transform == 8'hFC) ||
                (idle_data_transform == 8'hF7) ||
                (idle_data_transform == 8'hFB) ||
                (idle_data_transform == 8'hFD) ||
                (idle_data_transform == 8'hFE);
end

// pipline combinational logic to ensure timings are met
logic [7:0] idle_data_transform_r;
logic control_symbol_detect_r;
always_ff @ (posedge gts_tx_clkout or posedge gts_tx_system_reset) begin
    if(gts_tx_system_reset) begin
        idle_data_transform_r <= 8'b0;
        control_symbol_detect_r <= 1'b0;
    end else begin
        idle_data_transform_r <= idle_data_transform;
        control_symbol_detect_r <= control_symbol_detect;
    end
end

// --- 8b/10b Encoding ---
// https://libsv.readthedocs.io/en/latest/encoder_8b10b.html
logic [9:0] encoded_out;
logic       code_error;
encoder_tx encoder_inst (
    .i_clk      (gts_tx_clkout),
    .i_reset_n  (~gts_tx_system_reset),
    .i_en       (1'b1),
    .i_8b       (idle_data_transform_r),
    .i_ctrl     (control_symbol_detect_r),
    .o_10b      (encoded_out),
    .o_code_err (code_error)
);

// pipeline encoded outputs to ensure timing is met
logic [9:0] encoded_out_r;
always_ff @(posedge gts_tx_clkout or posedge gts_tx_system_reset) begin
      if (gts_tx_system_reset) encoded_out_r <= 10'b0;
      else encoded_out_r <= encoded_out;
end

// send data over TX
logic data_path_rdy_tx;
always_ff @(posedge gts_tx_clkout or posedge gts_tx_system_reset) begin
    if (gts_tx_system_reset) begin
      gts_i_tx_parallel_data <= 80'b0;
      data_path_rdy_tx       <= 0;
    end
    else begin
      data_path_rdy_tx  <= gts_tx_ready && gts_tx_pll_locked;
      case (data_path_rdy_tx)
            1: gts_i_tx_parallel_data <= {1'b1, 39'b0, 1'b0, 1'b1, 28'b0, encoded_out_r};
            0: gts_i_tx_parallel_data <= 80'b0;
      endcase
    end
end

endmodule

This RTL passes timing standalone, but when signal tap is used, it does produce warnings.

In SignalTap I take the following measurments:

Instance TX:

  • data: gts_i_tx_parallel_data[79:0]
  • clock domain: gts_tx_clkout

Instance RX:

  • data: gts_i_rx_parallel_data[79:0]
  • clock domain: gts_rx_clkout

The issue I am seeing is intermitted failures upon bitstream-re-configure:

On the TX side, after the encoder has encoded, the TX data reads as folowed:

  • (EXPECTED): ... 283, 17C, 283, 17C, 183, 335, 0B1, 347, 283, 17C, 283, 17C, ...

This is the expected pattern on the RX side (post-framing)

However, in my experiments so far, I have found that it only sometimes works:

Here are the framing results after 5 different re-flashes:

  1. (FAILURE): ... 283, 17C, 283, 17C, 383, 135, 0B1, 347, 083, 37C, 283, 17C, ...
  2. (FAILURE): ... 283, 17C, 283, 17C, 383, 135, 0B1, 347, 083, 37C, 283, 17C, ...
  3. (FAILURE): ... 283, 17C, 283, 17D, 183, 335, 0B1, 346, 283, 17C, 283, 17C, ...
  4. (SUCCESS): ... 283, 17C, 283, 17C, 183, 335, 0B1, 347, 283, 17C, 283, 17C, ...
  5. (FAILURE): ... 283, 17C, 283, 175, 1B1, 307, 083, 37C, 283, 17C, 283, 17C, ...

If anyone has any idea of what else to try, it would be much appreciated!

3 Replies

  • CheepinC_altera's avatar
    CheepinC_altera
    Icon for Regular Contributor rankRegular Contributor

    Hi,

     

    Thank you for your question regarding implementing a simple test design with the Agilex 5 GTS transceiver.

     

    To help you get started quickly, I recommend referring to the IP-generated example design described in the GTS Transceiver PHY User Guide under the section “GTS PMA/FEC Direct PHY IP Example Design.” You can select an example design that is closest to your target configuration, verify its functionality, and then proceed with customizing it to meet your specific requirements. Generally the example designs support simulation and hardware.

     

    Please let me know if you have any further questions or need additional clarification. Thank you.

    • K606's avatar
      K606
      Icon for Contributor rankContributor

      Hi CheepinC_altera​,

      Thanks for your note! I have indeed looked into this already.

      As far as I can tell, I think it must be an issue with the reset sequence, as the data does seem to loopback correctly every so often. This may indicate an issue with the CDR lock, right? Especially as signal taps reports that the on the TX parallel bus is as expected each time.

      I am hoping someone might notice what I have done differently enough from the reference designs to cause this intermittent issue.
      I have combed through both my design above and the reference design a few times and seem to keep missing whatever the key difference is.

      Many thanks

      • CheepinC_altera's avatar
        CheepinC_altera
        Icon for Regular Contributor rankRegular Contributor

        Hi,

         

        One possible cause of intermittent bit errors could be related to signal integrity issues. Have you had a chance to enable serial loopback and check if the issue persists? This step can help isolate whether the problem is due to signal integrity or something else.