Chapter 5 Problems

Chapter 5: Problem Solutions

P5.1.1	P5.2.1	P5.3.1	P5.4.1	P5.5.1
P5.1.2	P5.2.2	P5.3.2	P5.4.2	P5.5.2
P5.1.3	P5.2.3	P5.3.3	P5.4.3
P5.1.4	P5.2.4		P5.4.4
P5.1.5	P5.2.5
	P5.2.6
	P5.2.7
	P5.2.8
	P5.2.9
	P5.2.10
	P5.2.11
	P5.2.12
	P5.2.13
	P5.2.14
	P5.2.15
	P5.2.16

P5.1.1

% Script  P5_1_1
%
% Compare two ways of setting up the Pascal matrix

clc
disp('   n     PascalTri   PascalTri1')
disp('           Flops        Flops ')
disp('------------------------------------')
for n=[5 10 20 40 80]
   flops(0);
   P = PascalTri(n);
   f = flops;
   flops(0);
   P = PascalTri1(n);
   f1 = flops;
   disp(sprintf(' %3.0f      %6.0f      %6.0f',n, f,f1))
end


  function P = PascalTri1(n)
% n is a positive integer
% T is nxn lower triangular matrix with T(i,j) =
%       the binomial coefficient (i-1)-choose-(j-1)

  P = tril(ones(n,n));
  for i=3:n
     for j=2:i-1
	    P(i,j) = P(i-1,j-1) + P(i-1,j);
	 end
  end

P5.1.2

% Script P5_1_2
% 
% Illustrate Circulant matrix set-up via Toeplitz

clc
v = (1:8)';
C = Circulant(v)


  function C = Circulant(v)
% v a column vector
% C is a circulant matrix with first row = v.

C = toeplitz([1; v(length(v):-1:2)],v);

P5.1.3

% Script P5_1_3
%
% Embeddig a Toeplitz matrix into a circulant matrix.

clc
row = [10 20 30 40 50];
col = [10; 200; 300; 400; 500];
T = toeplitz(col,row)
C = EmbedToep(col,row)


  function C = EmbedToep(col,row)
% col is a column n-vector
% row is a row n-vector with row(1) = col(1)
%
% C is a circulant matrix of size 2n-1 with the property
% that C(1:n,1:n) = T where T is an n-by-n Toeplitz
% matrix with T(:,1) = col and T(1,:) = row.

n = length(col);
m = 2*n-1;
C = zeros(m,m);
C(1,:) = [row col(n:-1:2)'];
for k=2:m
   C(k,:) = [C(k-1,m) C(k-1,1:m-1)];
end

P5.1.4

% Script P5_1_4
%
% Illustrate RandBand

clc
disp('p = 1 and q = 1')
A = RandBand(5,5,1,1)
disp('p = 3 and q = 2')
A = RandBand(9,5,3,2)
disp('p = 1 and q = 3')
A = RandBand(5,6,1,3)


  function A = RandBand(m,n,p,q)
% m, n, p, and q are  positive integers
% random m-by-n matrix with lower bandwidth p and upper bandwidth q.

A = triu(tril(rand(m,n),q),-p);

P5.1.5

 Not Available

P5.2.1

% Script P5_2_1
%
% Checks the Hessenberg matrix-vector product.

clc
n = 10;
A = triu(randn(n,n),-1);
z = randn(n,1);
y = HessMatVec(A,z);
err = y - A*z


  function y = HessMatVec(A,z)
% A is m-by-n with A(i,j) = 0, i>j+1, z is n-by-1.
% y = A*z
[n,n] = size(A);
y = z(n)*A(:,n);
for k=1:n-1
   y(1:k+1) = y(1:k+1) + A(1:k+1,k)*z(k);
end

P5.2.2

% Script P5_2_2
%
% Checks TriMatVec

clc
m = 5;
n = 3;

A = triu(randn(m,n));
x = randn(n,1);
y = TriMatVec(A,x);
err = y - A*x
A = triu(randn(n,m));
x = randn(m,1);
y = TriMatVec(A,x);
err = y - A*x


  function y = TriMatVec(A,x)
% A is an m-by-n upper triangular matrix and x n-by-1.
% y = A*x

[m,n] = size(A);
y = zeros(m,1);
for k=1:n
   s = min([k m]);
   y(1:s) = y(1:s) + A(1:s,k)*x(k);
end

P5.2.3

% Script P5_2_3
%
% Saxpy matrix multiply with one factor triangular.

clc 
disp('Example where B has more columns than rows:')
A = rand(4,4)
B = triu(rand(4,6))
C = MatMatSax1(A,B)
Cexact = A*B

disp(' ')
disp('Example where B has more rows than columns:')
A = rand(6,6)
B = triu(rand(6,4))
C = MatMatSax1(A,B)
Cexact = A*B


  function C = MatMatSax1(A,B)
% A is an m-by-p matrix.
% B is a p-by-n matrix (upper triangular).
% C     A*B (Saxpy Version)

[m,p] = size(A);
[p,n] = size(B);
C = zeros(m,n);
for j=1:n
   % Compute j-th column of C.
   for k=1:min([j p])
      C(:,j) = C(:,j) + A(:,k)*B(k,j);
   end
end

P5.2.4

% Script P5_2_4
%
% Saxpy matrix multiply with both factors triangular.

clc
A = triu(rand(5,5))
B = triu(rand(5,5))
C = MatMatSax2(A,B)
Cexact = A*B


  function C = MatMatSax2(A,B)
% A is an n-by-n matrix.
% B is a p-by-n matrix (upper triangular)
% C     A*B (Saxpy Version)
%
[n,n] = size(A);
C = zeros(n,n);
for j=1:n
   % Compute j-th column of C.
   for k=1:j
      C(1:j,j) = C(1:j,j) + A(1:j,k)*B(k,j);
	end
end

P5.2.5

% Script 5_2_5
%
% Illustrate (Lower Triangular)*(Upper Triangular) products

A = tril(rand(6,6))
B = triu(rand(6,6))
C = MatMatDot1(A,B)
Error = C - A*B


  function C = MatMatDot1(A,B)
% A and B are n-by-n lower triangular matrices.
% C = A*B (Dot Product Version)

[n,n] = size(A);
C = zeros(n,n);
for j=1:n
   % Compute j-th column of C.
   for i=1:n
      s = 1:min([i j]);
      C(i,j) = A(i,s)*B(s,j);
   end
end

P5.2.6

% Script P5_2_6
%
% Illustrate A*(Banded B)

p = 2;
A = rand(6,6)
B = triu(tril(rand(6,6),p),-p)
C = BandMatMatSax(A,B,p)
Error = C - A*B


  function C = bandMatMatSax(A,B,p)
% A is an n-by-n matrix.
% B is an n-by-n matrix, bandwidth p
% C = A*B (Saxpy Version)

[n,n] = size(A);
C = zeros(n,n);
for j=1:n
   % Compute j-th column of C.
   for k=max([1 j-p]):min([j+p n])
      C(:,j) = C(:,j) + A(:,k)*B(k,j);
   end
end

P5.2.7

% Script P5_2_7
%
% Matrix powers via repeated squaring

clc
k = input('Compute A^k where k = ');
A = randn(5,5);
A = A/norm(A,1);
B = MatPower(A,k);
Error = B - A^k


  function B = MatPower(A,k)
% A is an n-by-n matrix and k is a positive integer.
% B = A^k
s = '';
x = k;
while x>=1
   if rem(x,2)==0
      s = ['0' s];
      x = x/2;
   else
      s = ['1' s];
      x = (x-1)/2;
   end
end
C = A;
First = 0;
for j=length(s):-1:1
   if s(j)=='1'
      if First==0
         B = C;
         First =1;
      else
         B = B*C;
      end
   end
   C = C*C;
end

P5.2.8

% Script P5_2_8
%
% y = (polynomial in matrix A)*(vector)

clc
v = randn(8,1);
A = randn(8,8);
c = randn(4,1);
y = HornerM(c,A,v);
Error = y - (c(1)*v + c(2)*A*v + c(3)*A*A*v + c(4)*A*A*A*v)

The function HornerM is not provided.

P5.2.9

% Script P5_2_9
%
% Illustrate Krylov matrix set-up

clc
A = magic(4);
B = A(:,3:4);
p = 3
C = Krylov(A,B,p)


  function C = Krylov(A,B,p)
% A is n-by-n, B is n-by-t, and p is a positive integer.
% C = [ B AB (A^2)B ... (A^p-1))B]

C = B; 
C1 = B;
for k=1:p-1
   C1 = A*C1;
   C = [ C C1];
end

P5.2.10

% Script P5_2_10
%
% Illustrate row scaling

clc
A = magic(5); A = A(:,1:3)
d = 1:5
B = ScaleRows(A,d)


  function B = ScaleRows(A,d)
% A is an m-by-n and d an m-vector
% B = diag(d)*A 
[m,n] = size(A);
B = zeros(m,n);
for i=1:m
   B(i,:) = d(i)*A(i,:);
end

P5.2.11

% Script P5_2_11
%
% Structured Matrix-vector multiplication

clc
close all

n = 20;
k = 30;
P = SetUp(n-1);
v0 = rand(n,1);
v0 = (v0 + v0(n:-1:1))/2;
V = Forward(P,v0,k);
for j=1:k
   plot(V(:,j))
   title(sprintf('j = %1d',j))
   pause(1)
end
hold off

[The function Forward is not provided.]

% Script P5_3_1
%
% Integral of SampleF2 over [0,2]x[0,2] for various 2D composite
% QNC(7) rule.

clc
m = 7;
disp(' Subintervals    Integral       Relative Time')
disp('------------------------------------------------')
for n = [4 8 16 32]
   tic;
   numI2D = MyCompQNC2D('SampleF2',0,2,0,2,m,n,m,n);
   t = toc;
   if n==4;
      base = t;
      time = 1;
   else
      time = t/base;
   end
   disp(sprintf(' %7.0f  %17.15f  %11.1f',n,numI2D,time))
end

The function MyCompQNC2D is not provided.

P5.3.2

% Script P5_3_2
%
% Solving integral equations.

close all
a = 0;
b = 5;
clc 
disp('m   n     Set-up Time ')
disp('-----------------------')
for sigma = [.01 .1]
   j = 1;
   figure
   for m = [3 5]
      for n = [5 10]
         tic;
         Kmat = Kernel(a,b,m,n,sigma);
         tfinish = toc;
         T = etime(tfinish,tstart);
         disp(sprintf('%2.0f  %2.0f    %5.3f',m,n,T))
         x = linspace(a,b,n*(m-1)+1)';
         rhs = 1./((x-2).^2 + .1) + 1./((x-4).^2 + .2);  
         fvals = Kmat\rhs;
         z = linspace(a,b,200)';
         sz = spline(x,fvals,z);
         subplot(2,2,j)
         plot(z,sz)
         title(sprintf('m=%2.0f, n=%2.0f   sigma = %5.2f',m,n,sigma))
         j= j+1;
      end
   end
end


  function Kmat = Kernel(a,b,m,n,sigma)
%
%  Kmat(i,j) =
[omega,x] = wCompNC(a,b,m,n);
N = length(omega);
v = exp(-((x(1)-x).^2)/sigma);
Kmat = Toeplitz(v);
omega = (b-a)*omega;
for j=1:N
   Kmat(:,j) = Kmat(:,j)*omega(j);
end

P5.3.3

P5.4.1

% Script P5_4_1
%
% Compares the ordinary DFT with the FFT.

clc
global w
x = randn(1024,1)+sqrt(-1)*randn(1024,1);
disp('   n       DFT Flops    FFTRecur Flops    FFT Flops');
disp('------------------------------------------------------')
for n = [2 4 8 16 32 64 128 256 ]
   flops(0);
   y = DFT(x(1:n));
   dftFlops = flops;
   
   flops(0);
   % Precompute the weight vector.
   w = exp(-2*pi*sqrt(-1)/n).^(0:((n/2)-1))'; 
   % Note: there are better ways to do this.
   y2 = MyFFTRecur(x(1:n));   
   recurFlops = flops;
   flops(0);
   y = fft(x(1:n));
   fftFlops = flops;  
   disp(sprintf(' %4.0f   %10.0f    %10.0f      %10.0f', n,dftFlops,recurFlops,fftFlops))
end

  function y = MyFFTRecur(x)
% x is a column vector with power-of-two length.
% y is the DFT of x.

global w         % Precomputed weight vector
n = length(x);
if n ==1
   y = x;
else
   m = n/2;
   yT = MyFFTRecur(x(1:2:n));
   yB = MyFFTRecur(x(2:2:n));
   
   % The required weight vector is a subvector of the precomputed weight vector.
   n_orig = 2*length(w);
   s = n_orig/n;
   d = w(1:s:length(w));
   
   z = d.*yB;
   y = [ yT+z ; yT-z ];
end

P5.4.2

P5.4.3

% Problem P5_4_3
%
% Test generalized strassen multiply.

clc
n = input('Enter n:');
nmin = input('Enter nmin:');
A = randn(n,n);
B = randn(n,n);
C = MyStrass(A,B,nmin);
err = norm(C-A*B)



 function C = MyStrass(A,B,nmin)
% C = MyStrass(A,B)
%
% A,B are n-by-n matrices, n a power of 2.
% nmin is the regukar matrix multiply threshold, a positive integer.
% C is an n-by-n matrix equal to A*B. 
    
[n,n] = size(A);
if n <= nmin
   C = A*B;
else
   m  = floor(n/2); u = 1:m; v = m+1:2*m;     
   P1 = MyStrass(A(u,u)+A(v,v),B(u,u)+B(v,v),nmin);
   P2 = MyStrass(A(v,u)+A(v,v),B(u,u),nmin);
   P3 = MyStrass(A(u,u),B(u,v)-B(v,v),nmin);
   P4 = MyStrass(A(v,v),B(v,u)-B(u,u),nmin);
   P5 = MyStrass(A(u,u)+A(u,v),B(v,v),nmin);
   P6 = MyStrass(A(v,u)-A(u,u),B(u,u) + B(u,v),nmin);
   P7 = MyStrass(A(u,v)-A(v,v),B(v,u)+B(v,v),nmin);
   C = [ P1+P4-P5+P7   P3+P5; P2+P4 P1+P3-P2+P6];
   
   if 2*m < n
      % The odd n case.
      % Partition A = [A(1:n-1:n-1) u; v alfa] 
      % Partition B = [B(1:n-1:n-1) w; y alfa] 
      u = A(1:n-1,n); v = A(n,1:n-1); alfa = A(n,n);
      w = B(1:n-1,n); y = B(n,1:n-1); beta = B(n,n);
      % Perform the 2-by-2 block multiply using Strassen multiply in the 
      % (1,1) block.
      C = [C+u*y   A(1:2*m,1:2*m)*w+beta*u  ; v*B(1:2*m,1:2*m)+alfa*y  v*w+alfa*beta];
   end
end









% Script P5_4_3
%
% Compares compares general-n Strassen method flops with ordinary A*B flops

clc
A0 = rand(128,128);
B0 = rand(128,128);
disp('  n   Strass Flops   Ordinary Flops')
disp('-------------------------------------')
for n = [15:33 63 64 127 128]
   A = A0(1:n,1:n);
   B = B0(1:n,1:n);
   flops(0);
   C = MyStrass(A,B);
   f = flops;
   disp(sprintf('%3.0f   %10.0f   %10.0f', n,f,2*n^3))
end

Function MyStrass not provided.

P5.4.4

% Script 5_4_4
%
% Compare Strass and conventional matrix multiply

clc 
disp('  q     r   (Strass Flops)/(Conventional Flops)')
disp('-------------------------------------------------')
for q = 1:20
   n = 2^q;
   flop_min = StrassCount(n,1);
   rmin = 0;
   for r=1:q
      f = StrassCount(n,2^r);
      if f<flop_min
         flop_min = f;
         rmin = r;
      end
   end
   disp(sprintf(' %2.0f    %2.0f   %15.2f ',q,rmin,flop_min/(2^(3*q+1))))
end


  function f = StrassCount(n,nmin)
% n and nmin are powers of 2 and nmin<=n
% f is the number of flops required to compute an n-by-n matrix multiply 
% using Strassen if n>nmin and conventional multiplication otherwise.

if n==nmin
   % flops for conventional n-by-n multiply.
   f = 2*n^3;
else
   m = n/2;
   f = 18*m^2 + 7*StrassCount(m,nmin);
end

P5.5.1

Solution not provided.

P5.5.2

% Script P5_5_2
%
% Explores when it pays to do a 2-processor matrix-vector
% multiply.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Time for single processor execution:
% T_seq = (2*n^2)/R;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Let n = 2m and set top = 1:m and bot = (m+1):n.
% Proc(1) sends A(bot,:) to Proc(2)
% T_send_mat = alpha + (n^2/2)*beta;
% Proc(1) computes y(top) = A(top,:)*x and Proc(2) computes
% y(bot) = A(bot,:)*x. 
% T_work = (n^2)/R;
% Proc(2) sends y(bot) to Proc(1)
% T_send_vec = alpha + (n/2)*beta;
% Total execution time:
% T_par = 2*alpha + beta*n*(n+1)/2 + (n^2)/R;
% Comparison quotient T_par/T_seq:
% T_ratio = .5*(1+beta*R/2) + (alpha*R/n^2) + (beta*R)/(4*n);
% Thus, if beta*R&lt2 then for sufficiently large n, it pays
% to distribute the computation.

clc
disp('R = 1/beta     alpha    Minimum n')
disp('-----------------------------------')  
for R = [10^5  10^6  10^7  10^8  10^9]
   beta = 1/R; %safe by a factor of two
   for alpha = [10^(-3) 10^(-4) 10^(-5) 10^(-6)]
      %compute the smallest n so that it pays to distribute.
      n = ceil((1 + sqrt(1+16*alpha*R))/2);
      
      disp(sprintf('  %6.1e    %6.1e  %8.0f',R,alpha,n))
   end
   disp(' ')
end