Efficient Signed Multiplier with good timing












0
















I am making a signed multiplier for that i have created half adder,
full adder,
ripple carry adder and then finally a multiplier. The code is shown below. How can i make it faster to achieve better timing. My
final task is to make a fir filter operating running at 100Mhz. This
filter use multiple multiplication operation(by using my multiplier).
So Can you help me to make my design better by some optimizing
technique like pipelining or parallelism or other??




   //half adder
entity half_adder is
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end half_adder;

architecture version1 of half_adder is
begin
sum <= a xor b;
cout <= a and b;
end version1;



-- full adder




    entity full_adder is

port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end full_adder;


architecture structural of full_adder is
component half_adder
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end component;
signal s1, c1, c2 : std_logic ;
begin -- structural
half_adder1 : half_adder
port map (
a => a, b => b,
sum => s1, cout => c1);
half_adder2 : half_adder
port map (
a =>cin, b => s1,
sum => sum, cout => c2);
cout <= c1 or c2;

end structural ;



-- ripple carry adder 40 bit




    entity rca40bit is
generic (
width: integer := 40
);
port(
a : in std_logic_vector(width-1 downto 0);
b : in std_logic_vector(width-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(width-1 downto 0);
cout : out std_logic
);
end rca40bit;

architecture Behavioral of rca40bit is
component full_adder
port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end component;
signal s: std_logic_vector(width downto 0);

begin
s(0)<=cin;
FA:for i in 0 to width-1 generate
FA_i:full_adder
port map
(
a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
);

end generate;

cout<=s(width);

end Behavioral;



-- Multiplier,i calculated partial products first then add them by rca adder.




    library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;


entity signed_mult is
generic (

m : integer := 24; -- Multiplicand

n : integer := 16 ; -- multiplier;

bit1 :integer := 39 -- size of the adder

);


port(

a : in std_logic_vector(m - 1 downto 0) ;
b : in std_logic_vector(n - 1 downto 0) ;
prod : out std_logic_vector(bit1 downto 0)

);
end entity signed_mult;

architecture Behavioral of signed_mult is


component rca40bit

port(
a : in std_logic_vector(bit1 downto 0);
b : in std_logic_vector(bit1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(bit1 downto 0);
cout : out std_logic
);




end component;

--partial products signals

signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ; --1st
`enter code here`partial product size m-1 bits (24 bit)
signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd
partial product size m bits (25 bit)
signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0'); --3rd partial
product size m+1 bits (26 bit)
signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0'); --4th partial
product size m+2 bits (27 bit)
signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0'); --5th
partial product size m+3 bits (28 bit)
signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0'); --6th
partial product size m+4 bits (29 bit)
signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0'); --7th
partial product size m+5 bits (30 bit)
signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0'); --8th
partial product size m+6 bits (31 bit)
signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0'); --9th
partial product size m+7 bits (32 bit)
signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0'); --10th
partial product size m+8 bits (33 bit)
signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0'); --11th
partial product size m+9 bits (34 bit)
signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0'); --12th
partial product size m+10 bits (35 bit)
signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0'); --13th
partial product size m+11 bits (36 bit)
signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0'); --14th
partial product size m+12 bits (37 bit)
signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0'); --15th
partial product size m+13 bits (38 bit)
signal p15 : std_logic_vector (bit1 downto 0); --16th partial product
size m+14 bits (39 bit)
signal p16 : std_logic_vector (bit1 downto 0);
signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');



signal extc0 :std_logic_vector (bit1 downto 0) ;




--constants and carry


signal zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
signal c : std_logic := '0' ;
signal c0 : std_logic :='0' ;
signal c12 : std_logic :='0' ;
signal c23 : std_logic :='0' ;
signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0';
signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0');


--sum only


signal sum12 : std_logic_vector (bit1 downto 0);
signal sum23 : std_logic_vector (bit1 downto 0);

signal sum34, sum45,sum56,sum67,sum78,
sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0:
std_logic_vector (bit1 downto 0);


--signal f_sum: std_logic_vector (bit1 downto 0); --sum of p0 and P1
--signal tpp1 : std_logic_vector (bit1 downto 0);



begin

pp0: process is
begin
fst_partial_product :for i in 0 to m-1 loop -- 0 to 23


p0(i)<= (a(i) and b(0)) ;


if i=m-1 then

p0(i)<=not ( a(i) and b(0)); --msb=m=24

end if ;
end loop fst_partial_product ;



n2d_partial_product : for i in 0 to m-1 loop -- 24

p1(0)<='0';
p1(i+1)<=a(i) and b(1) ;

if i=m-1 then
p1(i+1)<=not (a(i) and b(1)); --msb=m=24


end if ;
end loop n2d_partial_product;


r3d_partial_product : for i in 0 to m-1 loop

p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25

end if ;
end loop r3d_partial_product;

r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
if i=m-1 then
p3(i+3)<=not( a(i) and b(3)); --msb=26

end if ;
end loop r4th_partial_product;

r5d_partial_product : for i in 0 to m-1 loop

p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;

t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then
p5(i+5)<=not( a(i) and b(5)); --msb28

end if;
end loop t6hpartial_product ;

t7h_partial_product : for i in 0 to m-1 loop

p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;

t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then
p7(i+7)<=not( a(i) and b(7)); --msb=30

end if;
end loop t8th_partial_product ;


t9th_partial_product : for i in 0 to m-1 loop

p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8);
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;

t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
p9(i+9)<=not( a(i) and b(9)); --msb=32

end if;
end loop t10th_partial_product ;

t11th_partial_product : for i in 0 to m-1 loop

p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31

end if;
end loop t11th_partial_product;

t12th_partial_product : for i in 0 to m-1 loop

p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
p11(i+11)<=not( a(i) and b(11)); --msb=32

end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop

p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';

p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
p12(i+12)<=not( a(i) and b(12)); --msb=32

end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop

p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop

p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;

if i=m-1 then

p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;

t16th_partial_product : for i in 0 to m-1 loop

p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;


if i=m-1 then
p15(i+15)<= (a(i) and b(15));

end if;



end loop t16th_partial_product ;


wait on a,b;
end process pp0;

p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum

extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' & zeros(14 downto 0 ) ; -- just store 1 on msb of 1st pp



--sum temp p0
tmp1st_pp:rca40bit
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0


--sum 1st and 2nd row of pp
s1tnd_pp:rca40bit
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);

--adding sum of s12 and 3rd row of pp

t3rd_pp:rca40bit
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);

--adding sum of s23 and 4rt row of pp

t4rh_pp:rca40bit
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);


t5rh_pp:rca40bit
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);

t6rh_pp:rca40bit
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);


t7th_pp:rca40bit
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);


t8th_pp:rca40bit
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);

t9th_pp:rca40bit
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);

t10th_pp:rca40bit
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);

t11th_pp:rca40bit
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);

t12th_pp:rca40bit
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);

t13th_pp:rca40bit
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);

t14th_pp:rca40bit
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);

t15th_pp:rca40bit
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);

carryaddwith15th_pp:rca40bit
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);

t16th_pp:rca40bit

port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);

prod <=sum1617;



end Behavioral;



Filter Design




library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;

entity your_filter is
Generic (
constant PIPELINE_DEPTH : positive := 2;
constant DATA_WIDTH : positive := 24;
constant FIR_ORDER : positive := 34;
constant COEFF_WIDTH : positive := 16
);
Port (
clk : in STD_LOGIC;
rst : in STD_LOGIC;
data_in : in STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
data_out : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
);
end your_filter;

architecture Behavioral of your_filter is



component ripple_ca is
port(
a : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
b : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cout : out std_logic
);
end component;

component signed_mult is
port(

a : in std_logic_vector(DATA_WIDTH - 1 downto 0) ;
b : in std_logic_vector(COEFF_WIDTH - 1 downto 0) ;
prod : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)

);

end component;

type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);

type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal sum1 : sum;
signal c1 : std_logic;
signal c : std_logic :='0';
type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0);
signal coeff : COEFF_ARRAY1;
signal reg : REG_TYPE2;
signal next_reg : REG_TYPE2;
signal reg1 : REG_TYPE2;
type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal next_reg1 : REG_TYPE1;
signal data_out_temp : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal tempmult : sum;
signal data_in_reg : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
signal next_data_out : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);



constant coeff_array : COEFF_ARRAY_TYPE := (
"0000000101011111",
"0000000001100001",
"0000000000110011",
"1111111111011010",
"1111111101011111",
"1111111011011001",
"1111111001100101",
"1111111000100101",
"1111111000111011",
"1111111010111111",
"1111111110111001",
"0000000100100000",
"0000001011011000",
"0000010010110011",
"0000011001111010",
"0000011111110010",
"0000100011101011",
"0000100101000010",
"0000100011101011",
"0000011111110010",
"0000011001111010",
"0000010010110011",
"0000001011011000",
"0000000100100000",
"1111111110111001",
"1111111010111111",
"1111111000111011",
"1111111000100101",
"1111111001100101",
"1111111011011001",
"1111111101011111",
"1111111111011010",
"0000000000110011",
"0000000001100001",
"0000000101011111"
);

begin



--synchronous process
sync_proc : process (clk)
begin
if rising_edge(clk) then
if rst = '0' then
reg <= (others=>(others=>'0'));
data_in_reg <= (others=>'0');
data_out <= (others=>'0');
else
reg <= next_reg;
data_in_reg <= data_in;
data_out <= next_data_out;
end if;
end if;
end process;

--asynchronous process
async_proc : process (reg, data_in_reg)
variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
begin
for i in 0 to FIR_ORDER -2 loop
next_reg(i+1) <= reg(i);
end loop;
next_reg(0) <= (data_in_reg);

-- data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

for i in 0 to FIR_ORDER-1 loop
coeff(i)<= std_logic_vector (coeff_array(i));
reg1(i)<= std_logic_vector (reg(i));
--next_reg1(i)<= std_logic_vector (next_reg(i));
end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));


end process;

--next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
nextreg : for i in 0 to FIR_ORDER -1 generate

accumumlator : signed_mult
port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );
end generate nextreg;

sum1(0) <= (others => '0');
addition : for i in 0 to FIR_ORDER -1 generate
addition01 : ripple_ca
port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate addition;
-- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

dataout : signed_mult
port map (

a => data_in_reg,
b => coeff(0),
prod => tempmult(FIR_ORDER)
);

addition_2 : ripple_ca
port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);


next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1 downto COEFF_WIDTH);


end Behavioral;









share|improve this question




















  • 3





    Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

    – Tricky
    Nov 24 '18 at 19:50













  • You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

    – Oldfart
    Nov 24 '18 at 19:56











  • @Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

    – Zohaib Ramzan
    Nov 24 '18 at 20:56











  • @Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

    – Zohaib Ramzan
    Nov 24 '18 at 20:59











  • But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

    – Tricky
    Nov 24 '18 at 23:05
















0
















I am making a signed multiplier for that i have created half adder,
full adder,
ripple carry adder and then finally a multiplier. The code is shown below. How can i make it faster to achieve better timing. My
final task is to make a fir filter operating running at 100Mhz. This
filter use multiple multiplication operation(by using my multiplier).
So Can you help me to make my design better by some optimizing
technique like pipelining or parallelism or other??




   //half adder
entity half_adder is
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end half_adder;

architecture version1 of half_adder is
begin
sum <= a xor b;
cout <= a and b;
end version1;



-- full adder




    entity full_adder is

port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end full_adder;


architecture structural of full_adder is
component half_adder
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end component;
signal s1, c1, c2 : std_logic ;
begin -- structural
half_adder1 : half_adder
port map (
a => a, b => b,
sum => s1, cout => c1);
half_adder2 : half_adder
port map (
a =>cin, b => s1,
sum => sum, cout => c2);
cout <= c1 or c2;

end structural ;



-- ripple carry adder 40 bit




    entity rca40bit is
generic (
width: integer := 40
);
port(
a : in std_logic_vector(width-1 downto 0);
b : in std_logic_vector(width-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(width-1 downto 0);
cout : out std_logic
);
end rca40bit;

architecture Behavioral of rca40bit is
component full_adder
port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end component;
signal s: std_logic_vector(width downto 0);

begin
s(0)<=cin;
FA:for i in 0 to width-1 generate
FA_i:full_adder
port map
(
a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
);

end generate;

cout<=s(width);

end Behavioral;



-- Multiplier,i calculated partial products first then add them by rca adder.




    library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;


entity signed_mult is
generic (

m : integer := 24; -- Multiplicand

n : integer := 16 ; -- multiplier;

bit1 :integer := 39 -- size of the adder

);


port(

a : in std_logic_vector(m - 1 downto 0) ;
b : in std_logic_vector(n - 1 downto 0) ;
prod : out std_logic_vector(bit1 downto 0)

);
end entity signed_mult;

architecture Behavioral of signed_mult is


component rca40bit

port(
a : in std_logic_vector(bit1 downto 0);
b : in std_logic_vector(bit1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(bit1 downto 0);
cout : out std_logic
);




end component;

--partial products signals

signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ; --1st
`enter code here`partial product size m-1 bits (24 bit)
signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd
partial product size m bits (25 bit)
signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0'); --3rd partial
product size m+1 bits (26 bit)
signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0'); --4th partial
product size m+2 bits (27 bit)
signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0'); --5th
partial product size m+3 bits (28 bit)
signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0'); --6th
partial product size m+4 bits (29 bit)
signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0'); --7th
partial product size m+5 bits (30 bit)
signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0'); --8th
partial product size m+6 bits (31 bit)
signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0'); --9th
partial product size m+7 bits (32 bit)
signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0'); --10th
partial product size m+8 bits (33 bit)
signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0'); --11th
partial product size m+9 bits (34 bit)
signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0'); --12th
partial product size m+10 bits (35 bit)
signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0'); --13th
partial product size m+11 bits (36 bit)
signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0'); --14th
partial product size m+12 bits (37 bit)
signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0'); --15th
partial product size m+13 bits (38 bit)
signal p15 : std_logic_vector (bit1 downto 0); --16th partial product
size m+14 bits (39 bit)
signal p16 : std_logic_vector (bit1 downto 0);
signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');



signal extc0 :std_logic_vector (bit1 downto 0) ;




--constants and carry


signal zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
signal c : std_logic := '0' ;
signal c0 : std_logic :='0' ;
signal c12 : std_logic :='0' ;
signal c23 : std_logic :='0' ;
signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0';
signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0');


--sum only


signal sum12 : std_logic_vector (bit1 downto 0);
signal sum23 : std_logic_vector (bit1 downto 0);

signal sum34, sum45,sum56,sum67,sum78,
sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0:
std_logic_vector (bit1 downto 0);


--signal f_sum: std_logic_vector (bit1 downto 0); --sum of p0 and P1
--signal tpp1 : std_logic_vector (bit1 downto 0);



begin

pp0: process is
begin
fst_partial_product :for i in 0 to m-1 loop -- 0 to 23


p0(i)<= (a(i) and b(0)) ;


if i=m-1 then

p0(i)<=not ( a(i) and b(0)); --msb=m=24

end if ;
end loop fst_partial_product ;



n2d_partial_product : for i in 0 to m-1 loop -- 24

p1(0)<='0';
p1(i+1)<=a(i) and b(1) ;

if i=m-1 then
p1(i+1)<=not (a(i) and b(1)); --msb=m=24


end if ;
end loop n2d_partial_product;


r3d_partial_product : for i in 0 to m-1 loop

p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25

end if ;
end loop r3d_partial_product;

r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
if i=m-1 then
p3(i+3)<=not( a(i) and b(3)); --msb=26

end if ;
end loop r4th_partial_product;

r5d_partial_product : for i in 0 to m-1 loop

p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;

t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then
p5(i+5)<=not( a(i) and b(5)); --msb28

end if;
end loop t6hpartial_product ;

t7h_partial_product : for i in 0 to m-1 loop

p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;

t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then
p7(i+7)<=not( a(i) and b(7)); --msb=30

end if;
end loop t8th_partial_product ;


t9th_partial_product : for i in 0 to m-1 loop

p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8);
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;

t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
p9(i+9)<=not( a(i) and b(9)); --msb=32

end if;
end loop t10th_partial_product ;

t11th_partial_product : for i in 0 to m-1 loop

p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31

end if;
end loop t11th_partial_product;

t12th_partial_product : for i in 0 to m-1 loop

p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
p11(i+11)<=not( a(i) and b(11)); --msb=32

end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop

p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';

p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
p12(i+12)<=not( a(i) and b(12)); --msb=32

end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop

p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop

p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;

if i=m-1 then

p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;

t16th_partial_product : for i in 0 to m-1 loop

p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;


if i=m-1 then
p15(i+15)<= (a(i) and b(15));

end if;



end loop t16th_partial_product ;


wait on a,b;
end process pp0;

p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum

extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' & zeros(14 downto 0 ) ; -- just store 1 on msb of 1st pp



--sum temp p0
tmp1st_pp:rca40bit
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0


--sum 1st and 2nd row of pp
s1tnd_pp:rca40bit
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);

--adding sum of s12 and 3rd row of pp

t3rd_pp:rca40bit
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);

--adding sum of s23 and 4rt row of pp

t4rh_pp:rca40bit
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);


t5rh_pp:rca40bit
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);

t6rh_pp:rca40bit
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);


t7th_pp:rca40bit
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);


t8th_pp:rca40bit
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);

t9th_pp:rca40bit
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);

t10th_pp:rca40bit
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);

t11th_pp:rca40bit
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);

t12th_pp:rca40bit
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);

t13th_pp:rca40bit
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);

t14th_pp:rca40bit
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);

t15th_pp:rca40bit
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);

carryaddwith15th_pp:rca40bit
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);

t16th_pp:rca40bit

port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);

prod <=sum1617;



end Behavioral;



Filter Design




library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;

entity your_filter is
Generic (
constant PIPELINE_DEPTH : positive := 2;
constant DATA_WIDTH : positive := 24;
constant FIR_ORDER : positive := 34;
constant COEFF_WIDTH : positive := 16
);
Port (
clk : in STD_LOGIC;
rst : in STD_LOGIC;
data_in : in STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
data_out : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
);
end your_filter;

architecture Behavioral of your_filter is



component ripple_ca is
port(
a : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
b : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cout : out std_logic
);
end component;

component signed_mult is
port(

a : in std_logic_vector(DATA_WIDTH - 1 downto 0) ;
b : in std_logic_vector(COEFF_WIDTH - 1 downto 0) ;
prod : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)

);

end component;

type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);

type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal sum1 : sum;
signal c1 : std_logic;
signal c : std_logic :='0';
type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0);
signal coeff : COEFF_ARRAY1;
signal reg : REG_TYPE2;
signal next_reg : REG_TYPE2;
signal reg1 : REG_TYPE2;
type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal next_reg1 : REG_TYPE1;
signal data_out_temp : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal tempmult : sum;
signal data_in_reg : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
signal next_data_out : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);



constant coeff_array : COEFF_ARRAY_TYPE := (
"0000000101011111",
"0000000001100001",
"0000000000110011",
"1111111111011010",
"1111111101011111",
"1111111011011001",
"1111111001100101",
"1111111000100101",
"1111111000111011",
"1111111010111111",
"1111111110111001",
"0000000100100000",
"0000001011011000",
"0000010010110011",
"0000011001111010",
"0000011111110010",
"0000100011101011",
"0000100101000010",
"0000100011101011",
"0000011111110010",
"0000011001111010",
"0000010010110011",
"0000001011011000",
"0000000100100000",
"1111111110111001",
"1111111010111111",
"1111111000111011",
"1111111000100101",
"1111111001100101",
"1111111011011001",
"1111111101011111",
"1111111111011010",
"0000000000110011",
"0000000001100001",
"0000000101011111"
);

begin



--synchronous process
sync_proc : process (clk)
begin
if rising_edge(clk) then
if rst = '0' then
reg <= (others=>(others=>'0'));
data_in_reg <= (others=>'0');
data_out <= (others=>'0');
else
reg <= next_reg;
data_in_reg <= data_in;
data_out <= next_data_out;
end if;
end if;
end process;

--asynchronous process
async_proc : process (reg, data_in_reg)
variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
begin
for i in 0 to FIR_ORDER -2 loop
next_reg(i+1) <= reg(i);
end loop;
next_reg(0) <= (data_in_reg);

-- data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

for i in 0 to FIR_ORDER-1 loop
coeff(i)<= std_logic_vector (coeff_array(i));
reg1(i)<= std_logic_vector (reg(i));
--next_reg1(i)<= std_logic_vector (next_reg(i));
end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));


end process;

--next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
nextreg : for i in 0 to FIR_ORDER -1 generate

accumumlator : signed_mult
port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );
end generate nextreg;

sum1(0) <= (others => '0');
addition : for i in 0 to FIR_ORDER -1 generate
addition01 : ripple_ca
port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate addition;
-- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

dataout : signed_mult
port map (

a => data_in_reg,
b => coeff(0),
prod => tempmult(FIR_ORDER)
);

addition_2 : ripple_ca
port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);


next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1 downto COEFF_WIDTH);


end Behavioral;









share|improve this question




















  • 3





    Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

    – Tricky
    Nov 24 '18 at 19:50













  • You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

    – Oldfart
    Nov 24 '18 at 19:56











  • @Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

    – Zohaib Ramzan
    Nov 24 '18 at 20:56











  • @Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

    – Zohaib Ramzan
    Nov 24 '18 at 20:59











  • But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

    – Tricky
    Nov 24 '18 at 23:05














0












0








0









I am making a signed multiplier for that i have created half adder,
full adder,
ripple carry adder and then finally a multiplier. The code is shown below. How can i make it faster to achieve better timing. My
final task is to make a fir filter operating running at 100Mhz. This
filter use multiple multiplication operation(by using my multiplier).
So Can you help me to make my design better by some optimizing
technique like pipelining or parallelism or other??




   //half adder
entity half_adder is
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end half_adder;

architecture version1 of half_adder is
begin
sum <= a xor b;
cout <= a and b;
end version1;



-- full adder




    entity full_adder is

port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end full_adder;


architecture structural of full_adder is
component half_adder
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end component;
signal s1, c1, c2 : std_logic ;
begin -- structural
half_adder1 : half_adder
port map (
a => a, b => b,
sum => s1, cout => c1);
half_adder2 : half_adder
port map (
a =>cin, b => s1,
sum => sum, cout => c2);
cout <= c1 or c2;

end structural ;



-- ripple carry adder 40 bit




    entity rca40bit is
generic (
width: integer := 40
);
port(
a : in std_logic_vector(width-1 downto 0);
b : in std_logic_vector(width-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(width-1 downto 0);
cout : out std_logic
);
end rca40bit;

architecture Behavioral of rca40bit is
component full_adder
port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end component;
signal s: std_logic_vector(width downto 0);

begin
s(0)<=cin;
FA:for i in 0 to width-1 generate
FA_i:full_adder
port map
(
a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
);

end generate;

cout<=s(width);

end Behavioral;



-- Multiplier,i calculated partial products first then add them by rca adder.




    library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;


entity signed_mult is
generic (

m : integer := 24; -- Multiplicand

n : integer := 16 ; -- multiplier;

bit1 :integer := 39 -- size of the adder

);


port(

a : in std_logic_vector(m - 1 downto 0) ;
b : in std_logic_vector(n - 1 downto 0) ;
prod : out std_logic_vector(bit1 downto 0)

);
end entity signed_mult;

architecture Behavioral of signed_mult is


component rca40bit

port(
a : in std_logic_vector(bit1 downto 0);
b : in std_logic_vector(bit1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(bit1 downto 0);
cout : out std_logic
);




end component;

--partial products signals

signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ; --1st
`enter code here`partial product size m-1 bits (24 bit)
signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd
partial product size m bits (25 bit)
signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0'); --3rd partial
product size m+1 bits (26 bit)
signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0'); --4th partial
product size m+2 bits (27 bit)
signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0'); --5th
partial product size m+3 bits (28 bit)
signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0'); --6th
partial product size m+4 bits (29 bit)
signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0'); --7th
partial product size m+5 bits (30 bit)
signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0'); --8th
partial product size m+6 bits (31 bit)
signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0'); --9th
partial product size m+7 bits (32 bit)
signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0'); --10th
partial product size m+8 bits (33 bit)
signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0'); --11th
partial product size m+9 bits (34 bit)
signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0'); --12th
partial product size m+10 bits (35 bit)
signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0'); --13th
partial product size m+11 bits (36 bit)
signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0'); --14th
partial product size m+12 bits (37 bit)
signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0'); --15th
partial product size m+13 bits (38 bit)
signal p15 : std_logic_vector (bit1 downto 0); --16th partial product
size m+14 bits (39 bit)
signal p16 : std_logic_vector (bit1 downto 0);
signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');



signal extc0 :std_logic_vector (bit1 downto 0) ;




--constants and carry


signal zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
signal c : std_logic := '0' ;
signal c0 : std_logic :='0' ;
signal c12 : std_logic :='0' ;
signal c23 : std_logic :='0' ;
signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0';
signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0');


--sum only


signal sum12 : std_logic_vector (bit1 downto 0);
signal sum23 : std_logic_vector (bit1 downto 0);

signal sum34, sum45,sum56,sum67,sum78,
sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0:
std_logic_vector (bit1 downto 0);


--signal f_sum: std_logic_vector (bit1 downto 0); --sum of p0 and P1
--signal tpp1 : std_logic_vector (bit1 downto 0);



begin

pp0: process is
begin
fst_partial_product :for i in 0 to m-1 loop -- 0 to 23


p0(i)<= (a(i) and b(0)) ;


if i=m-1 then

p0(i)<=not ( a(i) and b(0)); --msb=m=24

end if ;
end loop fst_partial_product ;



n2d_partial_product : for i in 0 to m-1 loop -- 24

p1(0)<='0';
p1(i+1)<=a(i) and b(1) ;

if i=m-1 then
p1(i+1)<=not (a(i) and b(1)); --msb=m=24


end if ;
end loop n2d_partial_product;


r3d_partial_product : for i in 0 to m-1 loop

p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25

end if ;
end loop r3d_partial_product;

r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
if i=m-1 then
p3(i+3)<=not( a(i) and b(3)); --msb=26

end if ;
end loop r4th_partial_product;

r5d_partial_product : for i in 0 to m-1 loop

p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;

t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then
p5(i+5)<=not( a(i) and b(5)); --msb28

end if;
end loop t6hpartial_product ;

t7h_partial_product : for i in 0 to m-1 loop

p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;

t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then
p7(i+7)<=not( a(i) and b(7)); --msb=30

end if;
end loop t8th_partial_product ;


t9th_partial_product : for i in 0 to m-1 loop

p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8);
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;

t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
p9(i+9)<=not( a(i) and b(9)); --msb=32

end if;
end loop t10th_partial_product ;

t11th_partial_product : for i in 0 to m-1 loop

p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31

end if;
end loop t11th_partial_product;

t12th_partial_product : for i in 0 to m-1 loop

p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
p11(i+11)<=not( a(i) and b(11)); --msb=32

end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop

p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';

p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
p12(i+12)<=not( a(i) and b(12)); --msb=32

end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop

p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop

p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;

if i=m-1 then

p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;

t16th_partial_product : for i in 0 to m-1 loop

p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;


if i=m-1 then
p15(i+15)<= (a(i) and b(15));

end if;



end loop t16th_partial_product ;


wait on a,b;
end process pp0;

p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum

extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' & zeros(14 downto 0 ) ; -- just store 1 on msb of 1st pp



--sum temp p0
tmp1st_pp:rca40bit
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0


--sum 1st and 2nd row of pp
s1tnd_pp:rca40bit
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);

--adding sum of s12 and 3rd row of pp

t3rd_pp:rca40bit
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);

--adding sum of s23 and 4rt row of pp

t4rh_pp:rca40bit
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);


t5rh_pp:rca40bit
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);

t6rh_pp:rca40bit
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);


t7th_pp:rca40bit
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);


t8th_pp:rca40bit
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);

t9th_pp:rca40bit
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);

t10th_pp:rca40bit
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);

t11th_pp:rca40bit
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);

t12th_pp:rca40bit
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);

t13th_pp:rca40bit
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);

t14th_pp:rca40bit
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);

t15th_pp:rca40bit
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);

carryaddwith15th_pp:rca40bit
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);

t16th_pp:rca40bit

port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);

prod <=sum1617;



end Behavioral;



Filter Design




library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;

entity your_filter is
Generic (
constant PIPELINE_DEPTH : positive := 2;
constant DATA_WIDTH : positive := 24;
constant FIR_ORDER : positive := 34;
constant COEFF_WIDTH : positive := 16
);
Port (
clk : in STD_LOGIC;
rst : in STD_LOGIC;
data_in : in STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
data_out : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
);
end your_filter;

architecture Behavioral of your_filter is



component ripple_ca is
port(
a : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
b : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cout : out std_logic
);
end component;

component signed_mult is
port(

a : in std_logic_vector(DATA_WIDTH - 1 downto 0) ;
b : in std_logic_vector(COEFF_WIDTH - 1 downto 0) ;
prod : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)

);

end component;

type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);

type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal sum1 : sum;
signal c1 : std_logic;
signal c : std_logic :='0';
type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0);
signal coeff : COEFF_ARRAY1;
signal reg : REG_TYPE2;
signal next_reg : REG_TYPE2;
signal reg1 : REG_TYPE2;
type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal next_reg1 : REG_TYPE1;
signal data_out_temp : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal tempmult : sum;
signal data_in_reg : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
signal next_data_out : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);



constant coeff_array : COEFF_ARRAY_TYPE := (
"0000000101011111",
"0000000001100001",
"0000000000110011",
"1111111111011010",
"1111111101011111",
"1111111011011001",
"1111111001100101",
"1111111000100101",
"1111111000111011",
"1111111010111111",
"1111111110111001",
"0000000100100000",
"0000001011011000",
"0000010010110011",
"0000011001111010",
"0000011111110010",
"0000100011101011",
"0000100101000010",
"0000100011101011",
"0000011111110010",
"0000011001111010",
"0000010010110011",
"0000001011011000",
"0000000100100000",
"1111111110111001",
"1111111010111111",
"1111111000111011",
"1111111000100101",
"1111111001100101",
"1111111011011001",
"1111111101011111",
"1111111111011010",
"0000000000110011",
"0000000001100001",
"0000000101011111"
);

begin



--synchronous process
sync_proc : process (clk)
begin
if rising_edge(clk) then
if rst = '0' then
reg <= (others=>(others=>'0'));
data_in_reg <= (others=>'0');
data_out <= (others=>'0');
else
reg <= next_reg;
data_in_reg <= data_in;
data_out <= next_data_out;
end if;
end if;
end process;

--asynchronous process
async_proc : process (reg, data_in_reg)
variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
begin
for i in 0 to FIR_ORDER -2 loop
next_reg(i+1) <= reg(i);
end loop;
next_reg(0) <= (data_in_reg);

-- data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

for i in 0 to FIR_ORDER-1 loop
coeff(i)<= std_logic_vector (coeff_array(i));
reg1(i)<= std_logic_vector (reg(i));
--next_reg1(i)<= std_logic_vector (next_reg(i));
end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));


end process;

--next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
nextreg : for i in 0 to FIR_ORDER -1 generate

accumumlator : signed_mult
port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );
end generate nextreg;

sum1(0) <= (others => '0');
addition : for i in 0 to FIR_ORDER -1 generate
addition01 : ripple_ca
port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate addition;
-- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

dataout : signed_mult
port map (

a => data_in_reg,
b => coeff(0),
prod => tempmult(FIR_ORDER)
);

addition_2 : ripple_ca
port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);


next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1 downto COEFF_WIDTH);


end Behavioral;









share|improve this question

















I am making a signed multiplier for that i have created half adder,
full adder,
ripple carry adder and then finally a multiplier. The code is shown below. How can i make it faster to achieve better timing. My
final task is to make a fir filter operating running at 100Mhz. This
filter use multiple multiplication operation(by using my multiplier).
So Can you help me to make my design better by some optimizing
technique like pipelining or parallelism or other??




   //half adder
entity half_adder is
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end half_adder;

architecture version1 of half_adder is
begin
sum <= a xor b;
cout <= a and b;
end version1;



-- full adder




    entity full_adder is

port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end full_adder;


architecture structural of full_adder is
component half_adder
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end component;
signal s1, c1, c2 : std_logic ;
begin -- structural
half_adder1 : half_adder
port map (
a => a, b => b,
sum => s1, cout => c1);
half_adder2 : half_adder
port map (
a =>cin, b => s1,
sum => sum, cout => c2);
cout <= c1 or c2;

end structural ;



-- ripple carry adder 40 bit




    entity rca40bit is
generic (
width: integer := 40
);
port(
a : in std_logic_vector(width-1 downto 0);
b : in std_logic_vector(width-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(width-1 downto 0);
cout : out std_logic
);
end rca40bit;

architecture Behavioral of rca40bit is
component full_adder
port(

a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end component;
signal s: std_logic_vector(width downto 0);

begin
s(0)<=cin;
FA:for i in 0 to width-1 generate
FA_i:full_adder
port map
(
a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
);

end generate;

cout<=s(width);

end Behavioral;



-- Multiplier,i calculated partial products first then add them by rca adder.




    library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;


entity signed_mult is
generic (

m : integer := 24; -- Multiplicand

n : integer := 16 ; -- multiplier;

bit1 :integer := 39 -- size of the adder

);


port(

a : in std_logic_vector(m - 1 downto 0) ;
b : in std_logic_vector(n - 1 downto 0) ;
prod : out std_logic_vector(bit1 downto 0)

);
end entity signed_mult;

architecture Behavioral of signed_mult is


component rca40bit

port(
a : in std_logic_vector(bit1 downto 0);
b : in std_logic_vector(bit1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(bit1 downto 0);
cout : out std_logic
);




end component;

--partial products signals

signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ; --1st
`enter code here`partial product size m-1 bits (24 bit)
signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd
partial product size m bits (25 bit)
signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0'); --3rd partial
product size m+1 bits (26 bit)
signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0'); --4th partial
product size m+2 bits (27 bit)
signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0'); --5th
partial product size m+3 bits (28 bit)
signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0'); --6th
partial product size m+4 bits (29 bit)
signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0'); --7th
partial product size m+5 bits (30 bit)
signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0'); --8th
partial product size m+6 bits (31 bit)
signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0'); --9th
partial product size m+7 bits (32 bit)
signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0'); --10th
partial product size m+8 bits (33 bit)
signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0'); --11th
partial product size m+9 bits (34 bit)
signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0'); --12th
partial product size m+10 bits (35 bit)
signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0'); --13th
partial product size m+11 bits (36 bit)
signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0'); --14th
partial product size m+12 bits (37 bit)
signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0'); --15th
partial product size m+13 bits (38 bit)
signal p15 : std_logic_vector (bit1 downto 0); --16th partial product
size m+14 bits (39 bit)
signal p16 : std_logic_vector (bit1 downto 0);
signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');



signal extc0 :std_logic_vector (bit1 downto 0) ;




--constants and carry


signal zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
signal c : std_logic := '0' ;
signal c0 : std_logic :='0' ;
signal c12 : std_logic :='0' ;
signal c23 : std_logic :='0' ;
signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0';
signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0');


--sum only


signal sum12 : std_logic_vector (bit1 downto 0);
signal sum23 : std_logic_vector (bit1 downto 0);

signal sum34, sum45,sum56,sum67,sum78,
sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0:
std_logic_vector (bit1 downto 0);


--signal f_sum: std_logic_vector (bit1 downto 0); --sum of p0 and P1
--signal tpp1 : std_logic_vector (bit1 downto 0);



begin

pp0: process is
begin
fst_partial_product :for i in 0 to m-1 loop -- 0 to 23


p0(i)<= (a(i) and b(0)) ;


if i=m-1 then

p0(i)<=not ( a(i) and b(0)); --msb=m=24

end if ;
end loop fst_partial_product ;



n2d_partial_product : for i in 0 to m-1 loop -- 24

p1(0)<='0';
p1(i+1)<=a(i) and b(1) ;

if i=m-1 then
p1(i+1)<=not (a(i) and b(1)); --msb=m=24


end if ;
end loop n2d_partial_product;


r3d_partial_product : for i in 0 to m-1 loop

p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25

end if ;
end loop r3d_partial_product;

r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
if i=m-1 then
p3(i+3)<=not( a(i) and b(3)); --msb=26

end if ;
end loop r4th_partial_product;

r5d_partial_product : for i in 0 to m-1 loop

p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;

t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then
p5(i+5)<=not( a(i) and b(5)); --msb28

end if;
end loop t6hpartial_product ;

t7h_partial_product : for i in 0 to m-1 loop

p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;

t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then
p7(i+7)<=not( a(i) and b(7)); --msb=30

end if;
end loop t8th_partial_product ;


t9th_partial_product : for i in 0 to m-1 loop

p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8);
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;

t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
p9(i+9)<=not( a(i) and b(9)); --msb=32

end if;
end loop t10th_partial_product ;

t11th_partial_product : for i in 0 to m-1 loop

p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31

end if;
end loop t11th_partial_product;

t12th_partial_product : for i in 0 to m-1 loop

p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
p11(i+11)<=not( a(i) and b(11)); --msb=32

end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop

p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';

p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
p12(i+12)<=not( a(i) and b(12)); --msb=32

end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop

p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop

p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;

if i=m-1 then

p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;

t16th_partial_product : for i in 0 to m-1 loop

p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;


if i=m-1 then
p15(i+15)<= (a(i) and b(15));

end if;



end loop t16th_partial_product ;


wait on a,b;
end process pp0;

p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum

extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' & zeros(14 downto 0 ) ; -- just store 1 on msb of 1st pp



--sum temp p0
tmp1st_pp:rca40bit
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0


--sum 1st and 2nd row of pp
s1tnd_pp:rca40bit
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);

--adding sum of s12 and 3rd row of pp

t3rd_pp:rca40bit
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);

--adding sum of s23 and 4rt row of pp

t4rh_pp:rca40bit
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);


t5rh_pp:rca40bit
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);

t6rh_pp:rca40bit
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);


t7th_pp:rca40bit
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);


t8th_pp:rca40bit
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);

t9th_pp:rca40bit
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);

t10th_pp:rca40bit
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);

t11th_pp:rca40bit
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);

t12th_pp:rca40bit
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);

t13th_pp:rca40bit
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);

t14th_pp:rca40bit
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);

t15th_pp:rca40bit
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);

carryaddwith15th_pp:rca40bit
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);

t16th_pp:rca40bit

port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);

prod <=sum1617;



end Behavioral;



Filter Design




library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;

entity your_filter is
Generic (
constant PIPELINE_DEPTH : positive := 2;
constant DATA_WIDTH : positive := 24;
constant FIR_ORDER : positive := 34;
constant COEFF_WIDTH : positive := 16
);
Port (
clk : in STD_LOGIC;
rst : in STD_LOGIC;
data_in : in STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
data_out : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
);
end your_filter;

architecture Behavioral of your_filter is



component ripple_ca is
port(
a : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
b : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cout : out std_logic
);
end component;

component signed_mult is
port(

a : in std_logic_vector(DATA_WIDTH - 1 downto 0) ;
b : in std_logic_vector(COEFF_WIDTH - 1 downto 0) ;
prod : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)

);

end component;

type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);

type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal sum1 : sum;
signal c1 : std_logic;
signal c : std_logic :='0';
type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0);
signal coeff : COEFF_ARRAY1;
signal reg : REG_TYPE2;
signal next_reg : REG_TYPE2;
signal reg1 : REG_TYPE2;
type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal next_reg1 : REG_TYPE1;
signal data_out_temp : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal tempmult : sum;
signal data_in_reg : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
signal next_data_out : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);



constant coeff_array : COEFF_ARRAY_TYPE := (
"0000000101011111",
"0000000001100001",
"0000000000110011",
"1111111111011010",
"1111111101011111",
"1111111011011001",
"1111111001100101",
"1111111000100101",
"1111111000111011",
"1111111010111111",
"1111111110111001",
"0000000100100000",
"0000001011011000",
"0000010010110011",
"0000011001111010",
"0000011111110010",
"0000100011101011",
"0000100101000010",
"0000100011101011",
"0000011111110010",
"0000011001111010",
"0000010010110011",
"0000001011011000",
"0000000100100000",
"1111111110111001",
"1111111010111111",
"1111111000111011",
"1111111000100101",
"1111111001100101",
"1111111011011001",
"1111111101011111",
"1111111111011010",
"0000000000110011",
"0000000001100001",
"0000000101011111"
);

begin



--synchronous process
sync_proc : process (clk)
begin
if rising_edge(clk) then
if rst = '0' then
reg <= (others=>(others=>'0'));
data_in_reg <= (others=>'0');
data_out <= (others=>'0');
else
reg <= next_reg;
data_in_reg <= data_in;
data_out <= next_data_out;
end if;
end if;
end process;

--asynchronous process
async_proc : process (reg, data_in_reg)
variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
begin
for i in 0 to FIR_ORDER -2 loop
next_reg(i+1) <= reg(i);
end loop;
next_reg(0) <= (data_in_reg);

-- data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

for i in 0 to FIR_ORDER-1 loop
coeff(i)<= std_logic_vector (coeff_array(i));
reg1(i)<= std_logic_vector (reg(i));
--next_reg1(i)<= std_logic_vector (next_reg(i));
end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));


end process;

--next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
nextreg : for i in 0 to FIR_ORDER -1 generate

accumumlator : signed_mult
port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );
end generate nextreg;

sum1(0) <= (others => '0');
addition : for i in 0 to FIR_ORDER -1 generate
addition01 : ripple_ca
port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate addition;
-- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

dataout : signed_mult
port map (

a => data_in_reg,
b => coeff(0),
prod => tempmult(FIR_ORDER)
);

addition_2 : ripple_ca
port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);


next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1 downto COEFF_WIDTH);


end Behavioral;






vhdl pipeline fpga vlsi






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 24 '18 at 23:36







Zohaib Ramzan

















asked Nov 24 '18 at 16:37









Zohaib RamzanZohaib Ramzan

43




43








  • 3





    Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

    – Tricky
    Nov 24 '18 at 19:50













  • You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

    – Oldfart
    Nov 24 '18 at 19:56











  • @Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

    – Zohaib Ramzan
    Nov 24 '18 at 20:56











  • @Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

    – Zohaib Ramzan
    Nov 24 '18 at 20:59











  • But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

    – Tricky
    Nov 24 '18 at 23:05














  • 3





    Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

    – Tricky
    Nov 24 '18 at 19:50













  • You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

    – Oldfart
    Nov 24 '18 at 19:56











  • @Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

    – Zohaib Ramzan
    Nov 24 '18 at 20:56











  • @Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

    – Zohaib Ramzan
    Nov 24 '18 at 20:59











  • But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

    – Tricky
    Nov 24 '18 at 23:05








3




3





Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

– Tricky
Nov 24 '18 at 19:50







Is this an academic exercise? usually the fastest results come from using the dedicated multipliers on the chip. In your code, how do you expect to get any FMax without a clock?

– Tricky
Nov 24 '18 at 19:50















You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

– Oldfart
Nov 24 '18 at 19:56





You current method "I have created half adder, full adder...." does not match with your target good timing. For fast result you should not build your own adder/multipliers but let the synthesis tool handle it.

– Oldfart
Nov 24 '18 at 19:56













@Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

– Zohaib Ramzan
Nov 24 '18 at 20:56





@Tricky yes it is an academic exercise. The timing constraint file .xdc i hv not uploaded but it generates 10ns clock which means 100MHz..

– Zohaib Ramzan
Nov 24 '18 at 20:56













@Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

– Zohaib Ramzan
Nov 24 '18 at 20:59





@Oldfart My course subject is advanced vlsi whose requirement is to make fast components by students not by synthesis.

– Zohaib Ramzan
Nov 24 '18 at 20:59













But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

– Tricky
Nov 24 '18 at 23:05





But your design has no clock - so it wont matter whats in the XDC file, as you have nothing to time.

– Tricky
Nov 24 '18 at 23:05












0






active

oldest

votes











Your Answer






StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});


}
});














draft saved

draft discarded


















StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53460221%2fefficient-signed-multiplier-with-good-timing%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown

























0






active

oldest

votes








0






active

oldest

votes









active

oldest

votes






active

oldest

votes
















draft saved

draft discarded




















































Thanks for contributing an answer to Stack Overflow!


  • Please be sure to answer the question. Provide details and share your research!

But avoid



  • Asking for help, clarification, or responding to other answers.

  • Making statements based on opinion; back them up with references or personal experience.


To learn more, see our tips on writing great answers.




draft saved


draft discarded














StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53460221%2fefficient-signed-multiplier-with-good-timing%23new-answer', 'question_page');
}
);

Post as a guest















Required, but never shown





















































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown

































Required, but never shown














Required, but never shown












Required, but never shown







Required, but never shown







Popular posts from this blog

Costa Masnaga

Fotorealismo

Sidney Franklin