0%

FPGA数字信号处理之FIR滤波器

本节主要介绍了FIR滤波器的并行与串行verilog实现,及其量化方法。

FIR滤波器设计原理


FIR滤波器的硬件实现

1.基于累加器的FIR滤波器设计

  • 以4阶FIR滤波器为例,采用FPGA实现累加器组成的FIR滤波器电路,有:
    $$
    y(n) = x(n) + x(n-1) + x(n-2) + x(n-3) + x(n-4)\
    h(n) = \{1,1,1,1,1\}
    $$

  • 其中,$x(n)=sin(40\pi t) + sin(4\pi t)$,其分别对应$20Hz$和$2Hz$,设采样频率为$f_s = 100Hz$,其也就是FPGA的系统时钟

  • FPGA设计的系统框图如下:

    image-20231120181039172

2.源文件

  • accumulator_fir.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    module accumulator_fir(
    input clk, //系统时钟,100Hz
    input signed [8:0] xin, //输入数据
    output signed [11:0] yout //滤波输出数据
    );

    //产生4级触发器输出信号,相当于4级延时后的信号
    reg signed [8:0] x1,x2,x3,x4;

    always @(posedge clk) begin
    x1 <= xin;
    x2 <= x1;
    x3 <= x2;
    x4 <= x3;
    end

    //对连续5个输入数据进行累加,完成滤波输出
    assign yout = xin + x1 + x2 + x3 + x4;

    endmodule
  • fir_test_data.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    module fir_test_data(
    input clk,
    output out_valid_sin2Hz,
    output out_valid_sin20Hz,
    output reg signed [8:0] dout
    );

    wire signed [7:0] sin_data_2Hz;
    wire signed [7:0] sin_data_20Hz; //注意这边一定要定义成有符号数,或者手动补充最高位

    //产生2Hz的正弦信号
    dds_compiler_0 u1(
    .aclk(clk),
    .s_axis_config_tvalid(1'b1),
    .s_axis_config_tdata(16'd1311),
    .m_axis_data_tvalid(out_valid_sin2Hz),
    .m_axis_data_tdata(sin_data_2Hz)
    );

    //产生20Hz的正弦信号
    dds_compiler_0 u2(
    .aclk(clk),
    .s_axis_config_tvalid(1'b1),
    .s_axis_config_tdata(16'd13107),
    .m_axis_data_tvalid(out_valid_sin20Hz),
    .m_axis_data_tdata(sin_data_20Hz)
    );

    always @(posedge clk) begin
    dout <= sin_data_2Hz + sin_data_20Hz;
    end

    endmodule
  • FIR_accumulator_design.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    module FIR_accumulator_design(
    input clk,
    output out_valid_sin2Hz,
    output out_valid_sin20Hz,
    output signed [11:0] yout
    );

    wire signed [8:0] xin;

    //频率叠加信号生成模块
    fir_test_data u1(
    .clk(clk),
    .out_valid_sin2Hz(out_valid_sin2Hz),
    .out_valid_sin20Hz(out_valid_sin20Hz),
    .dout(xin)
    );

    //叠加器构成的FIR滤波模块
    accumulator_fir u2(
    .clk(clk),
    .xin(xin),
    .yout(yout)
    );

    endmodule

3.Testbench

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
`timescale 1ms / 1ms

module FIR_accumulator_tb;

reg clk_100Hz;
wire out_valid_sin2Hz;
wire out_valid_sin20Hz;
wire signed [11:0] yout;

FIR_accumulator_design u1(
.clk(clk_100Hz),
.out_valid_sin2Hz(out_valid_sin2Hz),
.out_valid_sin20Hz(out_valid_sin20Hz),
.yout(yout)
);

initial begin
clk_100Hz = 0;
#100;
end

always #5 clk_100Hz <= ~clk_100Hz;

endmodule
  • 仿真结果:

    Snipaste_2023-11-20_17-51-29


FIR系数的量化方法

1.量化原则

  • 归一化处理$\rightarrow $乘以$2^{B-1}-1$$\rightarrow$四舍五入取整(实际设计可能稍有出入,不除以绝对值,方便后期还原,详情见IP核那一节)
    • 若量化位宽为$B$bit,那么乘以$2^{B-1}-1$的原因是有符号数的范围是-$2^{B-1}\sim 2^{B-1}-1$
  • 输出与输入相比,需要增加的位宽:所有系数的绝对值之和$<2^{B_{pad}}$,$B_{pad}$为增加的位宽

2.MATLAB代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
N=16;      %滤波器长度
fs=2000; %采样频率
fc=200; %低通滤波器的3dB截止频率

%生成窗函数
window=blackman(N)';

%采用fir1函数设计FIR滤波器
b=fir1(N-1,fc*2/fs,window);

%对滤波器系数进行量化
B=8; %量化位宽为8比特
Q_h=b/max(abs(b)); %系数归一化处理
Q_h=Q_h*(2^(B-1)-1); %乘以B比特位宽的最大正整数
Q_h8=round(Q_h); %四舍五入

B=12; %量化位宽为12比特
Q_h=b/max(abs(b)); %系数归一化处理
Q_h=Q_h*(2^(B-1)-1); %乘以B比特位宽的最大正整数
Q_h12=round(Q_h) %四舍五入

abs_sum=sum(abs(Q_h12))

%求滤波器的幅频响应
m_b=20*log10(abs(fft(b,1024)));
m_8=20*log10(abs(fft(Q_h8,1024)));
m_12=20*log10(abs(fft(Q_h12,1024)));
%对幅频响应归一化处理
m_b=m_b-max(m_b);
m_8=m_8-max(m_8);
m_12=m_12-max(m_12);

%设置幅频响应的横坐标单位为Hz
x_f=[0:(fs/length(m_b)):fs/2];
%只显示正频率部分的幅频响应
mb=m_b(1:length(x_f));
m8=m_8(1:length(x_f));
m12=m_12(1:length(x_f));

%绘制幅频响应曲线
plot(x_f,mb,'-',x_f,m8,'--',x_f,m12,'-.');
xlabel('频率(Hz)');ylabel('幅度(dB)');
legend('未量化','8bit量化','12bit量化');
grid on;
  • 不同量化位宽对应$h(n)$幅频特性的比较:

    untitled


并行FIR滤波的硬件实现

1.基于并行FIR滤波器的设计

  • 由于经matlab中fir1函数设计的滤波器,都具有线性相位,且$h(n)$的系数是对称的,即$h(k)=h(n-k)$,那么对于此对称结构,有如下实现框图:

    image-20231120213503803
  • 全并行处理:时钟频率与数据速率相同

2.源文件

  • FirParallel.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    module FirParallel(
    input clk, //系统时钟:2 000 Hz
    input signed [7:0] Xin, //输入数据
    output reg signed [21:0] Yout //输出数据
    );

    //将数据存入移位寄存器Xin_Reg中
    reg signed [7:0] Xin_Reg[15:0];
    always @(posedge clk)
    begin
    Xin_Reg[0] <= Xin;
    Xin_Reg[1] <= Xin_Reg[0];
    Xin_Reg[2] <= Xin_Reg[1];
    Xin_Reg[3] <= Xin_Reg[2];
    Xin_Reg[4] <= Xin_Reg[3];
    Xin_Reg[5] <= Xin_Reg[4];
    Xin_Reg[6] <= Xin_Reg[5];
    Xin_Reg[7] <= Xin_Reg[6];
    Xin_Reg[8] <= Xin_Reg[7];
    Xin_Reg[9] <= Xin_Reg[8];
    Xin_Reg[10] <= Xin_Reg[9];
    Xin_Reg[11] <= Xin_Reg[10];
    Xin_Reg[12] <= Xin_Reg[11];
    Xin_Reg[13] <= Xin_Reg[12];
    Xin_Reg[14] <= Xin_Reg[13];
    Xin_Reg[15] <= Xin_Reg[14];
    end

    //采用8个双输入加法器,完成对称系数相加
    //两个8比特数据相加,需要用9比特数据存储数据
    reg signed [8:0] Xin_Add [7:0];
    always @(posedge clk)
    begin
    Xin_Add[0] = Xin_Reg[0] + Xin_Reg[15];
    Xin_Add[1] = Xin_Reg[1] + Xin_Reg[14];
    Xin_Add[2] = Xin_Reg[2] + Xin_Reg[13];
    Xin_Add[3] = Xin_Reg[3] + Xin_Reg[12];
    Xin_Add[4] = Xin_Reg[4] + Xin_Reg[11];
    Xin_Add[5] = Xin_Reg[5] + Xin_Reg[10];
    Xin_Add[6] = Xin_Reg[6] + Xin_Reg[9];
    Xin_Add[7] = Xin_Reg[7] + Xin_Reg[8];
    end

    //实例化8个有符号数乘法器IP核mult
    //2级流水线延时输出
    wire signed [20:0] Mout [7:0];
    mult u0 (
    .CLK (clk),
    .A (Xin_Add[0]),
    .B (12'd0),
    .P (Mout[0]));

    mult u1 (
    .CLK (clk),
    .A (Xin_Add[1]),
    .B (-12'd7),
    .P (Mout[1]));

    mult u2 (
    .CLK (clk),
    .A (Xin_Add[2]),
    .B (-12'd15),
    .P (Mout[2]));

    mult u3 (
    .CLK (clk),
    .A (Xin_Add[3]),
    .B (12'd46),
    .P (Mout[3]));

    mult u4 (
    .CLK (clk),
    .A (Xin_Add[4]),
    .B (12'd307),
    .P (Mout[4]));

    mult u5 (
    .CLK (clk),
    .A (Xin_Add[5]),
    .B (12'd850),
    .P (Mout[5]));

    mult u6 (
    .CLK (clk),
    .A (Xin_Add[6]),
    .B (12'd1545),
    .P (Mout[6]));

    mult u7 (
    .CLK (clk),
    .A (Xin_Add[7]),
    .B (12'd2047),
    .P (Mout[7]));

    //采用2级流水线完成8输入加法运算
    reg signed [20:0] sum1,sum2;
    always @(posedge clk) begin
    sum1 <= Mout[0]+Mout[1]+Mout[2]+Mout[3];
    sum2 <= Mout[4]+Mout[5]+Mout[6]+Mout[7];
    Yout <= sum1 + sum2;
    end

    endmodule
  • FirParallel_test_data.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    module FirParallel_test_data(
    input clk, //2000Hz
    output out_valid_sin100Hz,
    output out_valid_sin500Hz,
    output reg signed [7:0] dout
    );

    wire signed [7:0] sin_data_100Hz; //这里在如果用7bit在modelsim中仿真会报错,但在vivado中仿真不会报错
    wire signed [7:0] sin_data_500Hz; //实际上ip核中例化的是7bit

    //产生100Hz的正弦信号
    dds_for_FirParallel u1(
    .aclk(clk),
    .s_axis_config_tvalid(1'b1),
    .s_axis_config_tdata(16'd3277),
    .m_axis_data_tvalid(out_valid_sin100Hz),
    .m_axis_data_tdata(sin_data_100Hz)
    );

    //产生500Hz的正弦信号
    dds_for_FirParallel u2(
    .aclk(clk),
    .s_axis_config_tvalid(1'b1),
    .s_axis_config_tdata(16'd16384),
    .m_axis_data_tvalid(out_valid_sin500Hz),
    .m_axis_data_tdata(sin_data_500Hz)
    );

    always @(posedge clk) begin
    dout <= sin_data_100Hz + sin_data_500Hz;
    end

    endmodule
  • FirParallel_design.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    module FirParallel_design(
    input clk,
    output out_valid_sin100Hz,
    output out_valid_sin500Hz,
    output signed [21:0] yout
    );

    wire signed [7:0] xin;

    //频率叠加信号生成模块
    FirParallel_test_data u1(
    .clk(clk),
    .out_valid_sin100Hz(out_valid_sin100Hz),
    .out_valid_sin500Hz(out_valid_sin500Hz),
    .dout(xin)
    );

    //并行线性相位FIR滤波模块
    FirParallel u2(
    .clk(clk),
    .Xin(xin),
    .Yout(yout)
    );

    endmodule

3.Testbench

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
module FirParallel_design_tb;

reg clk;
wire out_valid_sin100Hz;
wire out_valid_sin500Hz;
wire signed [21:0] yout;

FirParallel_design u1(
.clk(clk),
.out_valid_sin100Hz(out_valid_sin100Hz),
.out_valid_sin500Hz(out_valid_sin500Hz),
.yout(yout)
);

initial begin
clk = 0;
#100;
end

always #250000 clk <= ~clk; //产生2000Hz的系统时钟

endmodule
  • 结果如下:

    Snipaste_2023-11-20_21-08-07


串行FIR滤波器的硬件实现*

1.基于串行结构的FIR滤波器设计

  • 核心思想:速度换面积

  • 与并行FIR滤波器一致,实现16个抽头系数的低通滤波器,那么,滤波器的时钟速率是数据速率的8倍,因为此时只有一个乘法器,完成一次乘加运算,需要8个时钟周期,而进来的16个输入数据,需要在这8个时钟周期之内保持不变,所以数据时钟是滤波器时钟周期的8倍,那么对应频率就是:时钟频率是数据频率的8倍

  • FPGA设计的系统框图如下:

    image-20231121135956807

  • 下述代码中有一段用for循环实现移位操作值得指出,个人认为很精彩:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    //将数据存入移位寄存器Xin_Reg中
    reg [7:0] Xin_Reg[15:0];
    reg [3:0] i,j;
    always @(posedge clk or posedge rst) begin
    if (rst) begin
    //初始化寄存器值为0
    for (i=0; i<15; i=i+1)
    Xin_Reg[i]<=8'd0;
    end
    else begin
    if (count==7) begin
    //Xin_Reg[0] <= Xin; //写在for循环前面和后面结果一致,都可以
    for (j=0; j<15; j=j+1)
    Xin_Reg[j+1] <= Xin_Reg[j];
    Xin_Reg[0] <= Xin;
    end
    end
    end

2.源文件

  • FirFullSerial.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    module FirFullSerial(
    input rst, //复位信号,高电平有效
    input clk, //FPGA系统时钟,频率为16kHz
    input signed [7:0] Xin, //数据输入频率为2khZ
    output reg signed [21:0] Yout); //滤波后的输出数据

    reg signed [11:0] coe; //滤波器为12比特量化数据
    wire signed [8:0] add_s; //输入为8比特量化数据,两个对称系数相加需要9比特存储

    //实例化有符号数加法器IP核,对输入数据进行1位符号位扩展,输出结果为9比特数据
    //无流水线延时
    reg signed [8:0] add_a;
    reg signed [8:0] add_b;
    adder u2 (
    .A (add_a),
    .B (add_b),
    .S (add_s));

    //3位计数器,计数周期为8,为输入数据速率
    reg [2:0] count = 0;
    always @(posedge clk or posedge rst)
    if (rst)
    count <= 3'd0;
    else
    count <= count + 1;

    //将数据存入移位寄存器Xin_Reg中
    reg [7:0] Xin_Reg[15:0];
    reg [3:0] i,j;
    always @(posedge clk or posedge rst) begin
    if (rst) begin
    //初始化寄存器值为0
    for (i=0; i<15; i=i+1)
    Xin_Reg[i]<=8'd0;
    end
    else begin
    if (count==7) begin
    //Xin_Reg[0] <= Xin; //写在for循环前面和后面结果一致,都可以
    for (j=0; j<15; j=j+1)
    Xin_Reg[j+1] <= Xin_Reg[j];
    Xin_Reg[0] <= Xin;
    end
    end
    end

    //将对称系数的输入数据相加,同时将对应的滤波器系数送入乘法器
    //为了保证加法运算不溢出,输入输出数据均扩展为9比特。
    //需要注意的是,下面程序只使用了一个加法器
    always @(posedge clk or posedge rst) begin
    if (rst) begin
    add_a <= 13'd0;
    add_b <= 13'd0;
    coe <= 12'd0;
    end
    else begin
    if (count==3'd0) begin
    add_a <= {Xin_Reg[0][7],Xin_Reg[0]};
    add_b <= {Xin_Reg[15][7],Xin_Reg[15]};
    coe <= 12'd0;//c0
    end
    else if (count==3'd1) begin
    add_a <= {Xin_Reg[1][7],Xin_Reg[1]};
    add_b <= {Xin_Reg[14][7],Xin_Reg[14]};
    coe <= -12'd7; //c1
    end
    else if (count==3'd2) begin
    add_a <= {Xin_Reg[2][7],Xin_Reg[2]};
    add_b <= {Xin_Reg[13][7],Xin_Reg[13]};
    coe <= -12'd15; //c2
    end
    else if (count==3'd3) begin
    add_a <= {Xin_Reg[3][7],Xin_Reg[3]};
    add_b <= {Xin_Reg[12][7],Xin_Reg[12]};
    coe <= 12'd46; //c3
    end
    else if (count==3'd4) begin
    add_a <= {Xin_Reg[4][7],Xin_Reg[4]};
    add_b <= {Xin_Reg[11][7],Xin_Reg[11]};
    coe <= 12'd307; //c4
    end
    else if (count==3'd5) begin
    add_a <= {Xin_Reg[5][7],Xin_Reg[5]};
    add_b <= {Xin_Reg[10][7],Xin_Reg[10]};
    coe <= 12'd850; //c5
    end
    else if (count==3'd6) begin
    add_a <= {Xin_Reg[6][7],Xin_Reg[6]};
    add_b <= {Xin_Reg[9][7],Xin_Reg[9]};
    coe <= 12'd1545; //c6
    end
    else begin
    add_a <= {Xin_Reg[7][7],Xin_Reg[7]};
    add_b <= {Xin_Reg[8][7],Xin_Reg[8]};
    coe <= 12'd2047; //c7
    end
    end
    end

    //以8倍数据速率调用乘法器IP核,由于滤波器长度为16,系数具有对称性,故可在一个数据
    //周期内完成所有8个滤波器系数与数据的乘法运算
    //实例化有符号数乘法器IP核mult
    //1级流水线延时输出
    wire signed [20:0] Mout;
    mult2 u11 (
    .clk (clk),
    .a (add_s),
    .b (coe),
    .p (Mout)
    );

    //对滤波器系数与输入数据的乘法结果进行累加,并输出滤波后的数据
    //考虑到乘法器及累加器的延时,需要计数器为2时对累加器清零,同时输出滤波器结果数据。
    //类似的时延长度一方面可通过精确计算获取,但更好的方法是通过行为仿真查看
    reg signed [21:0] sum;
    always @(posedge clk or posedge rst) begin
    if (rst) begin
    sum <= 22'd0;
    Yout <= 22'd0;
    end
    else begin
    if (count == 2) begin
    Yout <= sum;//count==2时完成了8次乘累加
    sum <= Mout;//count==3时得到第一个乘数
    end
    else
    sum <= sum + Mout; //1级延时
    end
    end

    endmodule
  • FirFullSerial_design.v

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    module FirFullSerial_design(
    input rst,
    input clk_data, //2kHz
    input clk_fir, //16kHz
    output [21:0] dout
    );

    wire [7:0] xin;

    //频率叠加信号生成模块
    FirParallel_test_data u1(
    .clk(clk_data),
    .out_valid_sin100Hz(),
    .out_valid_sin500Hz(),
    .dout(xin)
    );

    //串行FIR滤波模块
    FirFullSerial u2(
    .clk(clk_fir),
    .rst(rst),
    .Xin(xin),
    .Yout(dout)
    );

    endmodule

3.Testbench

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
module FirFullSerial_tb;

// Inputs
reg rst;
reg clk_data;
reg clk_fir;

// Outputs
wire [21:0] dout;

// Instantiate the Unit Under Test (UUT)
FirFullSerial_design uut (
.rst(rst),
.clk_data(clk_data),
.clk_fir(clk_fir),
.dout(dout)
);

reg clk_32k;
initial begin
// Initialize Inputs
rst = 1;
clk_data = 0;
clk_fir = 0;
clk_32k = 0;

// Wait 100 ns for global reset to finish
#80000;
rst = 0;
// Add stimulus here
end

//生成32kHz的时钟频率
always #15625 clk_32k <= !clk_32k;

reg [3:0] cn=0;
always @(posedge clk_32k) begin
cn <= cn + 1;
clk_fir <= cn[0]; //16kHz
clk_data <= cn[3]; //2kHz
end

endmodule
  • 结果如下:

    image-20231121141829792

    image-20231121141755650


Reference

欢迎来到ssy的世界