我正在实现AXI4 4流模块。该模块使用三个数字信号处理模块(DSP49E1,UG479 - Xilinx).为了使模块以150 MHz的频率运行,所设计的是一条流水线,通过每个数字信号处理器。
在这段代码中,我选择了一个for循环到一个进程中来实现管道。我应该说我已经在Xilinx (7系列)上模拟和测试了这个设计,到目前为止它的工作非常好。
与其在单个进程中创建for循环,不如创建独立的进程吗?
我喜欢我编写管道的方式,因为由于for循环和std_logic_vector数组,它节省了我的时间(用于shift寄存器)。
然而,它是否是一种很好的编码方式(频率,功率,FPGA利用率,.)?更广泛地说,它是一个好的设计实践,还是出于某些目的而受到限制?我想了解我所选择的原因和原因。
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
library ieee_proposed;
use ieee_proposed.fixed_pkg.all;
entity slave_AXIStream_RGBtoGray is
port (
-- Main signals
CLK : in std_logic;
RESETN : in std_logic;
-- Ready signal for upstream block
S_AXIS_TREADY : out std_logic;
-- Data in
S_AXIS_TDATA : in std_logic_vector(23 downto 0);
-- Flag for first pixel of a frame
S_AXIS_TUSER : in std_logic;
-- Flag for last pixel of a line
S_AXIS_TLAST : in std_logic;
-- Valid data
S_AXIS_TVALID : in std_logic;
-- Downstream blocks are ready
M_AXIS_TREADY : in std_logic;
-- Data out
M_AXIS_TDATA : out std_logic_vector(7 downto 0);
-- Flag for first pixel of a frame
M_AXIS_TUSER : out std_logic;
-- Flag for last pixel of a line
M_AXIS_TLAST : out std_logic;
-- Valid data
M_AXIS_TVALID : out std_logic
);
end slave_AXIStream_RGBtoGray;
architecture Behavioral of slave_AXIStream_RGBtoGray is
-- DSP to perform A*B+C
COMPONENT dsp48E1_macro
PORT (
CLK : IN STD_LOGIC;
CE : IN STD_LOGIC;
SCLR : IN STD_LOGIC;
A : IN STD_LOGIC_VECTOR(14 DOWNTO 0); -- signed, two's complement
B : IN STD_LOGIC_VECTOR(14 DOWNTO 0); -- signed, two's complement
C : IN STD_LOGIC_VECTOR(29 DOWNTO 0); -- signed, two's complement
P : OUT STD_LOGIC_VECTOR(30 DOWNTO 0) -- signed, two's complement
);
END COMPONENT;
-- main signals
signal main_ready : std_logic;
-- DSP48E1 signals
signal dsp_A_out : std_logic_vector(30 downto 0);
signal dsp_B_out : std_logic_vector(30 downto 0);
signal dsp_C_out : std_logic_vector(30 downto 0);
-- Out = coef_red * component_red + coef_green * component_green + coef_blue * component_blue
-- sum of the factors <= 1
constant coef_red : std_logic_vector(14 downto 0) := to_slv(to_ufixed (0.29, 8, -6));
constant coef_green : std_logic_vector(14 downto 0) := to_slv(to_ufixed (0.59, 8, -6));
constant coef_blue : std_logic_vector(14 downto 0) := to_slv(to_ufixed (0.11, 8, -6));
-- Pipeline
signal pipeline_step : integer range 0 to 13;
type array_component_2DSP is array(0 to 8) of std_logic_vector(7 downto 0);
type array_component_1DSP is array(0 to 4) of std_logic_vector(7 downto 0);
signal component_red_dsp : std_logic_vector(7 downto 0);
signal component_green_dsp : array_component_1DSP;
signal component_blue_dsp : array_component_2DSP;
signal DV_step_dsp : std_logic_vector(0 to 12); -- data valid
signal SOF_step_dsp : std_logic_vector(0 to 12); -- start of frame
signal EOL_step_dsp : std_logic_vector(0 to 12); -- end of line
-- Last step
signal component_mono_step_out : std_logic_vector(7 downto 0);
signal DV_step_out : std_logic; -- data valid
signal SOF_step_out : std_logic; -- start of frame
signal EOL_step_out : std_logic; -- end of line
signal data_stored : std_logic_vector(7 downto 0);
signal DV_stored : std_logic; -- data valid
signal SOF_stored : std_logic; -- start of frame
signal EOL_stored : std_logic; -- end of line
begin
S_AXIS_TREADY <= main_ready;
M_AXIS_TDATA <= component_mono_step_out;
M_AXIS_TVALID <= DV_step_out;
M_AXIS_TUSER <= SOF_step_out;
M_AXIS_TLAST <= EOL_step_out;
DSP_A: dsp48E1_macro
PORT MAP (
CLK => CLK,
CE => main_ready,
SCLR => not RESETN,
A => '0' & component_red_dsp & "000000",
B => coef_red,
C => (others => '0'),
P => dsp_A_out
);
DSP_B: dsp48E1_macro
PORT MAP (
CLK => CLK,
CE => main_ready,
SCLR => not RESETN,
A => '0' & component_green_dsp(4) & "000000",
B => coef_green,
C => dsp_A_out(29 downto 0),
P => dsp_B_out
);
DSP_C: dsp48E1_macro
PORT MAP (
CLK => CLK,
CE => main_ready,
SCLR => not RESETN,
A => '0' & component_blue_dsp(8) & "000000",
B => coef_blue,
C => dsp_B_out(29 downto 0),
P => dsp_C_out
);
main_process: process(CLK)
begin
if (rising_edge (CLK)) then
if(RESETN = '0') then
pipeline_step <= 0;
main_ready <= '0';
component_red_dsp <= (others => '0');
component_green_dsp <= (others => (others => '0'));
component_blue_dsp <= (others => (others => '0'));
DV_step_dsp <= (others => '0');
SOF_step_dsp <= (others => '0');
EOL_step_dsp <= (others => '0');
component_mono_step_out <= (others => '0');
DV_step_out <= '0';
SOF_step_out <= '0';
EOL_step_out <= '0';
DV_stored <= '0';
SOF_stored <= '0';
EOL_stored <= '0';
else
FOR pipeline_step IN 0 TO 13 LOOP
CASE pipeline_step IS
WHEN 0 =>
if main_ready = '1' then
component_red_dsp <= S_AXIS_TDATA(23 downto 16);
component_green_dsp(pipeline_step) <= S_AXIS_TDATA(15 downto 8);
component_blue_dsp(pipeline_step) <= S_AXIS_TDATA(7 downto 0);
DV_step_dsp(pipeline_step) <= S_AXIS_TVALID;
SOF_step_dsp(pipeline_step) <= S_AXIS_TUSER;
EOL_step_dsp(pipeline_step) <= S_AXIS_TLAST;
end if;
WHEN 1 to 4 =>
if main_ready = '1' then
component_green_dsp(pipeline_step) <= component_green_dsp(pipeline_step-1);
component_blue_dsp(pipeline_step) <= component_blue_dsp(pipeline_step-1);
DV_step_dsp(pipeline_step) <= DV_step_dsp(pipeline_step-1);
SOF_step_dsp(pipeline_step) <= SOF_step_dsp(pipeline_step-1);
EOL_step_dsp(pipeline_step) <= EOL_step_dsp(pipeline_step-1);
end if;
WHEN 5 to 8 =>
if main_ready = '1' then
component_blue_dsp(pipeline_step) <= component_blue_dsp(pipeline_step-1);
DV_step_dsp(pipeline_step) <= DV_step_dsp(pipeline_step-1);
SOF_step_dsp(pipeline_step) <= SOF_step_dsp(pipeline_step-1);
EOL_step_dsp(pipeline_step) <= EOL_step_dsp(pipeline_step-1);
end if;
WHEN 9 to 12 =>
if main_ready = '1' then
DV_step_dsp(pipeline_step) <= DV_step_dsp(pipeline_step-1);
SOF_step_dsp(pipeline_step) <= SOF_step_dsp(pipeline_step-1);
EOL_step_dsp(pipeline_step) <= EOL_step_dsp(pipeline_step-1);
end if;
WHEN 13 =>
if M_AXIS_TREADY = '1' or DV_step_out = '0' then
main_ready <= '1';
DV_step_out <= '0';
if main_ready = '1' then
component_mono_step_out <= dsp_C_out(19 downto 12);
DV_step_out <= DV_step_dsp(pipeline_step-1);
SOF_step_out <= SOF_step_dsp(pipeline_step-1);
EOL_step_out <= EOL_step_dsp(pipeline_step-1);
else
component_mono_step_out <= data_stored;
DV_step_out <= DV_stored;
SOF_step_out <= SOF_stored;
EOL_step_out <= EOL_stored;
DV_stored <= '0';
end if;
else
main_ready <= '0';
if main_ready = '1' then
data_stored <= dsp_C_out(19 downto 12);
DV_stored <= DV_step_dsp(pipeline_step-1);
SOF_stored <= SOF_step_dsp(pipeline_step-1);
EOL_stored <= EOL_step_dsp(pipeline_step-1);
end if;
end if;
WHEN others => NULL;
END CASE;
END LOOP;
end if;
end if;
end process;
end Behavioral;发布于 2016-07-25 16:37:55
比起复杂的VHDL过程,我更喜欢generate语句。过程描述不会揭示管道连接故障,因为进程不能创建多个驱动程序(最后一次分配获胜)。在生成描述中,这样的故障会产生多个驱动程序,这些驱动程序可以被工具检测(合成和仿真)。
您可以尝试为MACC操作编写通用的VHDL代码。如果Xilinx能够推断出正确的硬件(因此它使用了DSP48E*硬宏和嵌入式加法器),那么它是一个更好的长期解决方案。每一代和每一个家庭都有自己的DSPxxEy硬宏。因此,使用通用VHDL代码可以提高可维护性和可移植性。(另一方面,合成工具以取消学习而闻名.)
组件语法“过时”。您可以省去组件声明,并将这一行用于实例化:
DSP_A: entity work.dsp48E1_macro
port map (
-- ...
);如果宏被编译到当前设计单元(work)编译到的另一个库中,则用正确的库名替换work。
您可能需要替换一些神奇的数字(12,13,14,19,30,.)使用常量或泛型参数并重用或计算它们。因此,如果您决定增加范围,那么只修改几个常量就更容易了,而不是重新考虑完整的算法。
generate示例下面是两个生成示例,我编写了该示例:
https://codereview.stackexchange.com/questions/135868
复制相似问题