Chapter 5: Problem Solutions
% Script P5_1_1 % % Compare two ways of setting up the Pascal matrix clc disp(' n PascalTri PascalTri1') disp(' Flops Flops ') disp('------------------------------------') for n=[5 10 20 40 80] flops(0); P = PascalTri(n); f = flops; flops(0); P = PascalTri1(n); f1 = flops; disp(sprintf(' %3.0f %6.0f %6.0f',n, f,f1)) end function P = PascalTri1(n) % n is a positive integer % T is nxn lower triangular matrix with T(i,j) = % the binomial coefficient (i-1)-choose-(j-1) P = tril(ones(n,n)); for i=3:n for j=2:i-1 P(i,j) = P(i-1,j-1) + P(i-1,j); end end
% Script P5_1_2 % % Illustrate Circulant matrix set-up via Toeplitz clc v = (1:8)'; C = Circulant(v) function C = Circulant(v) % v a column vector % C is a circulant matrix with first row = v. C = toeplitz([1; v(length(v):-1:2)],v);
% Script P5_1_3 % % Embeddig a Toeplitz matrix into a circulant matrix. clc row = [10 20 30 40 50]; col = [10; 200; 300; 400; 500]; T = toeplitz(col,row) C = EmbedToep(col,row) function C = EmbedToep(col,row) % col is a column n-vector % row is a row n-vector with row(1) = col(1) % % C is a circulant matrix of size 2n-1 with the property % that C(1:n,1:n) = T where T is an n-by-n Toeplitz % matrix with T(:,1) = col and T(1,:) = row. n = length(col); m = 2*n-1; C = zeros(m,m); C(1,:) = [row col(n:-1:2)']; for k=2:m C(k,:) = [C(k-1,m) C(k-1,1:m-1)]; end
% Script P5_1_4 % % Illustrate RandBand clc disp('p = 1 and q = 1') A = RandBand(5,5,1,1) disp('p = 3 and q = 2') A = RandBand(9,5,3,2) disp('p = 1 and q = 3') A = RandBand(5,6,1,3) function A = RandBand(m,n,p,q) % m, n, p, and q are positive integers % random m-by-n matrix with lower bandwidth p and upper bandwidth q. A = triu(tril(rand(m,n),q),-p);
Not Available
% Script P5_2_1 % % Checks the Hessenberg matrix-vector product. clc n = 10; A = triu(randn(n,n),-1); z = randn(n,1); y = HessMatVec(A,z); err = y - A*z function y = HessMatVec(A,z) % A is m-by-n with A(i,j) = 0, i>j+1, z is n-by-1. % y = A*z [n,n] = size(A); y = z(n)*A(:,n); for k=1:n-1 y(1:k+1) = y(1:k+1) + A(1:k+1,k)*z(k); end
% Script P5_2_2 % % Checks TriMatVec clc m = 5; n = 3; A = triu(randn(m,n)); x = randn(n,1); y = TriMatVec(A,x); err = y - A*x A = triu(randn(n,m)); x = randn(m,1); y = TriMatVec(A,x); err = y - A*x function y = TriMatVec(A,x) % A is an m-by-n upper triangular matrix and x n-by-1. % y = A*x [m,n] = size(A); y = zeros(m,1); for k=1:n s = min([k m]); y(1:s) = y(1:s) + A(1:s,k)*x(k); end
% Script P5_2_3 % % Saxpy matrix multiply with one factor triangular. clc disp('Example where B has more columns than rows:') A = rand(4,4) B = triu(rand(4,6)) C = MatMatSax1(A,B) Cexact = A*B disp(' ') disp('Example where B has more rows than columns:') A = rand(6,6) B = triu(rand(6,4)) C = MatMatSax1(A,B) Cexact = A*B function C = MatMatSax1(A,B) % A is an m-by-p matrix. % B is a p-by-n matrix (upper triangular). % C A*B (Saxpy Version) [m,p] = size(A); [p,n] = size(B); C = zeros(m,n); for j=1:n % Compute j-th column of C. for k=1:min([j p]) C(:,j) = C(:,j) + A(:,k)*B(k,j); end end
% Script P5_2_4 % % Saxpy matrix multiply with both factors triangular. clc A = triu(rand(5,5)) B = triu(rand(5,5)) C = MatMatSax2(A,B) Cexact = A*B function C = MatMatSax2(A,B) % A is an n-by-n matrix. % B is a p-by-n matrix (upper triangular) % C A*B (Saxpy Version) % [n,n] = size(A); C = zeros(n,n); for j=1:n % Compute j-th column of C. for k=1:j C(1:j,j) = C(1:j,j) + A(1:j,k)*B(k,j); end end
% Script 5_2_5 % % Illustrate (Lower Triangular)*(Upper Triangular) products A = tril(rand(6,6)) B = triu(rand(6,6)) C = MatMatDot1(A,B) Error = C - A*B function C = MatMatDot1(A,B) % A and B are n-by-n lower triangular matrices. % C = A*B (Dot Product Version) [n,n] = size(A); C = zeros(n,n); for j=1:n % Compute j-th column of C. for i=1:n s = 1:min([i j]); C(i,j) = A(i,s)*B(s,j); end end
% Script P5_2_6 % % Illustrate A*(Banded B) p = 2; A = rand(6,6) B = triu(tril(rand(6,6),p),-p) C = BandMatMatSax(A,B,p) Error = C - A*B function C = bandMatMatSax(A,B,p) % A is an n-by-n matrix. % B is an n-by-n matrix, bandwidth p % C = A*B (Saxpy Version) [n,n] = size(A); C = zeros(n,n); for j=1:n % Compute j-th column of C. for k=max([1 j-p]):min([j+p n]) C(:,j) = C(:,j) + A(:,k)*B(k,j); end end
% Script P5_2_7 % % Matrix powers via repeated squaring clc k = input('Compute A^k where k = '); A = randn(5,5); A = A/norm(A,1); B = MatPower(A,k); Error = B - A^k function B = MatPower(A,k) % A is an n-by-n matrix and k is a positive integer. % B = A^k s = ''; x = k; while x>=1 if rem(x,2)==0 s = ['0' s]; x = x/2; else s = ['1' s]; x = (x-1)/2; end end C = A; First = 0; for j=length(s):-1:1 if s(j)=='1' if First==0 B = C; First =1; else B = B*C; end end C = C*C; end
% Script P5_2_8 % % y = (polynomial in matrix A)*(vector) clc v = randn(8,1); A = randn(8,8); c = randn(4,1); y = HornerM(c,A,v); Error = y - (c(1)*v + c(2)*A*v + c(3)*A*A*v + c(4)*A*A*A*v)
The function HornerM is not provided.
% Script P5_2_9 % % Illustrate Krylov matrix set-up clc A = magic(4); B = A(:,3:4); p = 3 C = Krylov(A,B,p) function C = Krylov(A,B,p) % A is n-by-n, B is n-by-t, and p is a positive integer. % C = [ B AB (A^2)B ... (A^p-1))B] C = B; C1 = B; for k=1:p-1 C1 = A*C1; C = [ C C1]; end
% Script P5_2_10 % % Illustrate row scaling clc A = magic(5); A = A(:,1:3) d = 1:5 B = ScaleRows(A,d) function B = ScaleRows(A,d) % A is an m-by-n and d an m-vector % B = diag(d)*A [m,n] = size(A); B = zeros(m,n); for i=1:m B(i,:) = d(i)*A(i,:); end
% Script P5_2_11 % % Structured Matrix-vector multiplication clc close all n = 20; k = 30; P = SetUp(n-1); v0 = rand(n,1); v0 = (v0 + v0(n:-1:1))/2; V = Forward(P,v0,k); for j=1:k plot(V(:,j)) title(sprintf('j = %1d',j)) pause(1) end hold off
[The function Forward is not provided.]
z
z
z
z
z
% Script P5_3_1 % % Integral of SampleF2 over [0,2]x[0,2] for various 2D composite % QNC(7) rule. clc m = 7; disp(' Subintervals Integral Relative Time') disp('------------------------------------------------') for n = [4 8 16 32] tic; numI2D = MyCompQNC2D('SampleF2',0,2,0,2,m,n,m,n); t = toc; if n==4; base = t; time = 1; else time = t/base; end disp(sprintf(' %7.0f %17.15f %11.1f',n,numI2D,time)) end
The function MyCompQNC2D is not provided.
% Script P5_3_2 % % Solving integral equations. close all a = 0; b = 5; clc disp('m n Set-up Time ') disp('-----------------------') for sigma = [.01 .1] j = 1; figure for m = [3 5] for n = [5 10] tic; Kmat = Kernel(a,b,m,n,sigma); tfinish = toc; T = etime(tfinish,tstart); disp(sprintf('%2.0f %2.0f %5.3f',m,n,T)) x = linspace(a,b,n*(m-1)+1)'; rhs = 1./((x-2).^2 + .1) + 1./((x-4).^2 + .2); fvals = Kmat\rhs; z = linspace(a,b,200)'; sz = spline(x,fvals,z); subplot(2,2,j) plot(z,sz) title(sprintf('m=%2.0f, n=%2.0f sigma = %5.2f',m,n,sigma)) j= j+1; end end end function Kmat = Kernel(a,b,m,n,sigma) % % Kmat(i,j) = [omega,x] = wCompNC(a,b,m,n); N = length(omega); v = exp(-((x(1)-x).^2)/sigma); Kmat = Toeplitz(v); omega = (b-a)*omega; for j=1:N Kmat(:,j) = Kmat(:,j)*omega(j); end
z
% Script P5_4_1 % % Compares the ordinary DFT with the FFT. clc global w x = randn(1024,1)+sqrt(-1)*randn(1024,1); disp(' n DFT Flops FFTRecur Flops FFT Flops'); disp('------------------------------------------------------') for n = [2 4 8 16 32 64 128 256 ] flops(0); y = DFT(x(1:n)); dftFlops = flops; flops(0); % Precompute the weight vector. w = exp(-2*pi*sqrt(-1)/n).^(0:((n/2)-1))'; % Note: there are better ways to do this. y2 = MyFFTRecur(x(1:n)); recurFlops = flops; flops(0); y = fft(x(1:n)); fftFlops = flops; disp(sprintf(' %4.0f %10.0f %10.0f %10.0f', n,dftFlops,recurFlops,fftFlops)) end function y = MyFFTRecur(x) % x is a column vector with power-of-two length. % y is the DFT of x. global w % Precomputed weight vector n = length(x); if n ==1 y = x; else m = n/2; yT = MyFFTRecur(x(1:2:n)); yB = MyFFTRecur(x(2:2:n)); % The required weight vector is a subvector of the precomputed weight vector. n_orig = 2*length(w); s = n_orig/n; d = w(1:s:length(w)); z = d.*yB; y = [ yT+z ; yT-z ]; end
z
% Problem P5_4_3 % % Test generalized strassen multiply. clc n = input('Enter n:'); nmin = input('Enter nmin:'); A = randn(n,n); B = randn(n,n); C = MyStrass(A,B,nmin); err = norm(C-A*B) function C = MyStrass(A,B,nmin) % C = MyStrass(A,B) % % A,B are n-by-n matrices, n a power of 2. % nmin is the regukar matrix multiply threshold, a positive integer. % C is an n-by-n matrix equal to A*B. [n,n] = size(A); if n <= nmin C = A*B; else m = floor(n/2); u = 1:m; v = m+1:2*m; P1 = MyStrass(A(u,u)+A(v,v),B(u,u)+B(v,v),nmin); P2 = MyStrass(A(v,u)+A(v,v),B(u,u),nmin); P3 = MyStrass(A(u,u),B(u,v)-B(v,v),nmin); P4 = MyStrass(A(v,v),B(v,u)-B(u,u),nmin); P5 = MyStrass(A(u,u)+A(u,v),B(v,v),nmin); P6 = MyStrass(A(v,u)-A(u,u),B(u,u) + B(u,v),nmin); P7 = MyStrass(A(u,v)-A(v,v),B(v,u)+B(v,v),nmin); C = [ P1+P4-P5+P7 P3+P5; P2+P4 P1+P3-P2+P6]; if 2*m < n % The odd n case. % Partition A = [A(1:n-1:n-1) u; v alfa] % Partition B = [B(1:n-1:n-1) w; y alfa] u = A(1:n-1,n); v = A(n,1:n-1); alfa = A(n,n); w = B(1:n-1,n); y = B(n,1:n-1); beta = B(n,n); % Perform the 2-by-2 block multiply using Strassen multiply in the % (1,1) block. C = [C+u*y A(1:2*m,1:2*m)*w+beta*u ; v*B(1:2*m,1:2*m)+alfa*y v*w+alfa*beta]; end end % Script P5_4_3 % % Compares compares general-n Strassen method flops with ordinary A*B flops clc A0 = rand(128,128); B0 = rand(128,128); disp(' n Strass Flops Ordinary Flops') disp('-------------------------------------') for n = [15:33 63 64 127 128] A = A0(1:n,1:n); B = B0(1:n,1:n); flops(0); C = MyStrass(A,B); f = flops; disp(sprintf('%3.0f %10.0f %10.0f', n,f,2*n^3)) end
Function MyStrass not provided.
% Script 5_4_4 % % Compare Strass and conventional matrix multiply clc disp(' q r (Strass Flops)/(Conventional Flops)') disp('-------------------------------------------------') for q = 1:20 n = 2^q; flop_min = StrassCount(n,1); rmin = 0; for r=1:q f = StrassCount(n,2^r); if f<flop_min flop_min = f; rmin = r; end end disp(sprintf(' %2.0f %2.0f %15.2f ',q,rmin,flop_min/(2^(3*q+1)))) end function f = StrassCount(n,nmin) % n and nmin are powers of 2 and nmin<=n % f is the number of flops required to compute an n-by-n matrix multiply % using Strassen if n>nmin and conventional multiplication otherwise. if n==nmin % flops for conventional n-by-n multiply. f = 2*n^3; else m = n/2; f = 18*m^2 + 7*StrassCount(m,nmin); end
Solution not provided.
% Script P5_5_2 % % Explores when it pays to do a 2-processor matrix-vector % multiply. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Time for single processor execution: % T_seq = (2*n^2)/R; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Let n = 2m and set top = 1:m and bot = (m+1):n. % Proc(1) sends A(bot,:) to Proc(2) % T_send_mat = alpha + (n^2/2)*beta; % Proc(1) computes y(top) = A(top,:)*x and Proc(2) computes % y(bot) = A(bot,:)*x. % T_work = (n^2)/R; % Proc(2) sends y(bot) to Proc(1) % T_send_vec = alpha + (n/2)*beta; % Total execution time: % T_par = 2*alpha + beta*n*(n+1)/2 + (n^2)/R; % Comparison quotient T_par/T_seq: % T_ratio = .5*(1+beta*R/2) + (alpha*R/n^2) + (beta*R)/(4*n); % Thus, if beta*R<2 then for sufficiently large n, it pays % to distribute the computation. clc disp('R = 1/beta alpha Minimum n') disp('-----------------------------------') for R = [10^5 10^6 10^7 10^8 10^9] beta = 1/R; %safe by a factor of two for alpha = [10^(-3) 10^(-4) 10^(-5) 10^(-6)] %compute the smallest n so that it pays to distribute. n = ceil((1 + sqrt(1+16*alpha*R))/2); disp(sprintf(' %6.1e %6.1e %8.0f',R,alpha,n)) end disp(' ') end