Chapter 7: Problem Solutions
P7.1.1 | P7.2.1 | P7.3.1 | P7.4.1 |
P7.1.2 | P7.2.2 | P7.3.2 | P7.4.2 |
P7.1.3 | P7.2.3 | P7.3.3 | P7.4.3 |
P7.1.4 | P7.2.4 | P7.3.4 | |
P7.1.5 | P7.2.5 | P7.3.5 | |
P7.2.6 | P7.3.6 |
% Problem P7_1_1 % % Examine minimization in different norms. close all n = 100; b = sort(rand(3,1)); b = b(3:-1:1); A = [1;1;1]; x = linspace(-1,1,n); v1 = zeros(n,1); v2 = zeros(n,1); vinf = zeros(n,1); for k=1:n v1(k) = norm(A*x(k)-b,1); v2(k) = norm(A*x(k)-b,2); vinf(k) = norm(A*x(k)-b,'inf'); end subplot(2,2,1) plot(x,v1,[b(2),b(2)],[0,6]) title(sprintf('1-norm: min at %5.3f',b(2))) subplot(2,2,2) plot(x,v2,[sum(b)/3, sum(b)/3],[0,6]) title(sprintf('2-norm: min at %5.3f',sum(b)/3)) subplot(2,2,3) plot(x,vinf,[(b(1)+b(3))/2,(b(1)+b(3))/2],[0,6]) title(sprintf('inf-norm: min at %5.3f',(b(1)+b(3))/2))
% Problem P7_1_2 % % Quadratic LS fits to the sine function. close all for m=[3 6 12 24] [a,b,c] = LSFit(0,pi/3,'tan',m); x = linspace(0,pi/3)'; y = tan(x); qvals = (c*x + b).*x + a; figure plot(x,y,x,qvals) title(sprintf('m = %2.0f',m)) end function [a,b,c] = LSFit(L,R,fname,m) % [a,b,c] = LSFit(L,R,fname,m) % % L,R satisfy L= 3. % % a,b,c are scalars so that if q(x) =ax^2 + bx +c, then % sum from k=1 to m of [q(x(k)) -f(x(k))]^2 % is minimized where x = linspace(L.R,m). x = linspace(L,R,m)'; z = [ones(m,1) x x.^2]\feval(fname,x); a = z(1); b = z(2); c = z(3);
Not Available
Not Available
% Problem P7_1_5 % % Assume A (nxn) is given and b, c, and d are all given % column n-vectors. We want to choose scalars alfa and % beta so the solution to Ax = b + alfa*c + beta*d is % as small as possible. Note that the solution is % % x = u + alfa*v + beta*w = u + [v w]*[alfa;beta] % % where Au = b, Av = c, and Aw = d. % Solve for u, v, and w... [L,U,P] = lu(A); u = U\(L\(P*b)); v = U\(L\(P*c)); w = U\(L\(P*d)); % The act of minimizing norm(u + [v w]*[alfa;beta]) is the % act of solving the LS problem min norm(Cz - r) where % C is the n-by-2 matrix [v w], r = -u, and setting % alfa = z(1) and beta = z(2). z = -[v w]\u; alfa = z(1); beta = z(2);
% Problem P7_2_1 % % Solving Hessenberg systems via the QR factorization. clc n = 6; A = triu(randn(n,n),-1) b = randn(n,1) x = HessQRSolve(A,b) disp('A*x-b = ') disp(A*x - b) function x = HessQRSolve(H,b) % x = HessQRSolve(H,b) % % H is an n-by-n nonsingular upper Hessenberg. % b is a column n-vector. % % x column n-vector so Hx = b. [Q,R] = HessQRrot(H); % See P7.2.4 x = UTriSol(R,Q'*b); % Ax = QRx = b means Rx = Q'b.
% Script P7_2_2 % % Symmetrizing a 2-by-2 matrix through rotation. clc A = randn(2,2) % To make the (1,2) and (2,1) entries of [c s;-s c]'*A % the same, we require cA(1,2) - sA(2,2) = sA(1,1)+c(2,1), i.e., % % -s(A(1,1)+A(2,2)) + c(A(1,2)-A(2,1)) = 0 % % Note that this means that we want the bottom component % of [c s;-s c][(A(1,1)+A(2,2); A(1,2)-A(2,1)] to be zero. [c,s] = Rotate(A(1,1)+A(2,2),A(1,2)-A(2,1)); disp('[c s;-s c] =') disp(' ') disp([c s;-s c]) disp('[c s; -s c]''*A = ') disp(' ') disp([c s;-s c]'*A)
% Problem P7_2_3 % % Lower triangularizing a matrix using rotations clc n = 5; A = randn(n,n) [Q,L] = QLRot(A) disp('Q''*Q - I = ') disp(Q'*Q - eye(n,n)) disp('Q''*A - L =') disp(Q'*A - L) function [Q,L] = QLrot(A) % [Q,L] = QLrot(A) % % A is an n-by-n matrix. % % Q is an n-by-n orthogonal matrix and % L is an n-by-n lower triangular so A = QL. [n,n] = size(A); Q = eye(n,n); for j=n:-1:2 % Zero A(1:j-1,j) for i=1:j-1 % Zero A(i,j) % Note that if [c s;-s c][z;-y] = [r;0], then % [c s;-s c][y z] = [0;r]. So we can use Rotate to % zero the top of a 2-vector. [c,s] = Rotate(A(i+1,j),-A(i,j)); A(i:i+1,1:j) = [c s; -s c]*A(i:i+1,1:j); Q(:,i:i+1) = Q(:,i:i+1)*[c s; -s c]'; end end L = tril(A);
% Problem P7_2_4 % % hessenberg QR factorization. clc n = 6; A = triu(randn(n,n),-1) [Q,R] = HessQRrot(A) disp('Q''*Q - I = ') disp(Q'*Q - eye(n,n)) disp('Q''*A - R =') disp(Q'*A - R) function [Q,R] = HessQRrot(A) % [Q,R] = HessQRrot(A) % % A is an n-by-n upper Hessenberg matrix. % % Q is an n-by-n orthogonal matrix and % R is an n-by-n upper triangular such that A = QR. [n,n] = size(A); Q = eye(n,n); for j=1:n-1 %Zero A(j+1,j) [c,s] = Rotate(A(j,j),A(j+1,j)); A(j:j+1,j:n) = [c s; -s c]*A(j:j+1,j:n); Q(:,j:j+1) = Q(:,j:j+1)*[c s; -s c]'; end R = triu(A);
% Problem P7_2_5 % % Least squares fit of the square root function on [.25,1] clc disp(' m a b') disp('-------------------------------------------------') for m = [2:10 20 40 80 200] x = linspace(.25,1,m)'; z = [ones(m,1) x]\sqrt(x); disp(sprintf('%4.0f %20.16f %20.16f',m,z(1),z(2))) end
% Problem P7_2_6 % % Using a rotation to zero the top component of a 2-vector. clc x = randn(2,1) [c,s] = Rotate1(x(1),x(2)); disp('[c s;-s c]] =') disp(' ') disp([c s;-s c]) disp('[c s;-s c]*x =') disp(' ') disp([c s;-s c]*x) function [c,s] = Rotate1(x,y) % [c,s] = Rotate1(x,y) % % x,y are real scalars % % c,s satisfy c^2+s^=1 with the property that the top component % of the vector [c s;-s c][x;y] is zero. [c,s] = Rotate(-y,x);
% Problem P7_3_1 % % Test BVPsolver a = 0; b = 1; close all for n = [10 100 1000] uvals = TwoPtBVP(n,a,b,'MyF731p','MyF731q','MyF731r'); figure plot(linspace(a,b,n)',uvals) title(sprintf('n = %1.0f',n)) end function uvals = TwoPtBVP(n,a,b,pname,qname,rname) % uvals = TwoPtBVP(n,a,b,pname,qname,rname) % % a,b are reals with a<b. % n is an integer >= 3 % pname is a string that names a positive function defined on [a,b]. % qname is a string that names a positive function defined on [a,b]. % rname is a string that names a function defined on [a,b]. % % uvals is a column n-vector with the property that uvals(i) approximates % the solution to the 2-point boundary value problem % % -D[p(x)Du(x)] + q(x)u(x) = r(x) a<=x<=b % u(a) = u(b) = 0 % % at x = a+(i-1)h where h = (b-a)/(n-1) x = linspace(a,b,n)'; h = (b-a)/(n-1); p = feval(pname,x(1:n-1)+h/2); q = feval(qname,x); r = feval(rname,x); uvals = zeros(n,1); e = zeros(n-2,1); e(2:n-2) = -p(2:n-2); d = p(1:n-2)+p(2:n-1)+(h^2)*q(2:n-1); [d,e] = CholTrid(d,e); rhs = (h^2)*r(2:n-1); uvals(2:n-1) = CholTridSol(d,e,rhs);
% Problem P7_3_2 % % Recursive outer product Cholesky clc n = 5; Gexact = tril(rand(n,n)) A = Gexact*Gexact'; G = CholOuter1(A)
% Problem P7_3_3 % % pentadiagonal Cholesky % Produce plot of timings: % set up A and b clc time = []; for n = 400:200:2000 % Set up A as instructed: j = 1:n; d = 100 + j'; e = 10 + j'; f = j'; % Don't actually form A % A = diag(d) + diag(e(2:n),1) + diag(e(2:n),-1) + ... % diag(f(3:n),2) + diag(f(3:n),-2); % Right-hand side: b = 100*ones(n,1); % Record starting time. tic % Solve Ax = b % Factor A = G*G' % G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2); [g,h,p] = Chol5(d,e,f); % Solve Gy = b for y y = LTriSol5(g,h,p,b); % Solve G'*x = y for x x = UTriSol5(g,h,p,y); % Compute elapsed time. dt = toc; % Add to table of timings. time = [time dt]; end % Make plot of timings. close all plot(400:200:2000,time) xlabel('n') ylabel('time') title('Time to solve n-by-n penta-diagonal system') %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Produce table of flops: clc disp('n flops'); disp('------------------'); for n = [10 20 40 80 160] % Set up A as instructed: j = 1:n; d = 100 + j'; e = 10 + j'; f = j'; % Set up right-hand side. b = 100*ones(n,1); % Reset number of flops. flops(0) % Factor A = G * G' [g,h,p] = Chol5(d,e,f); % Solve Gy = b for y y = LTriSol5(g,h,p,b); % Solve G'*x = y for x x = UTriSol5(g,h,p,y); % Print number of flops required. disp(sprintf('%4.0f %8.0f', n, flops)); end function x = LTriSol5(g,h,p,b) % x = LTriSol5(g,h,p,b) % % for recitation problem 7.3.3 % Pre: % g,h,p as in Chol5.m % b n-vector % % Post: % x such that G*x = b, % where G = diag(g) + diag(h(2:n),-1) + diag(p(3:n),-2) n = length(g); x = zeros(n,1); % initialize first 2 x values x(1) = b(1)/g(1); x(2) = (b(2) - x(1)*h(2)) / g(2); % get rest of x values (this is similar to 2nd part of % CholTriDiSol, p.241) for i = 3:n x(i) = (b(i) - x(i-2)*p(i) - x(i-1)*h(i)) / g(i); end
Not Availabe
% Problem 7_3_5 % % We need to solve Ay = b for y and Az = u for z: G = CholScalar(A); y1 = LTriSol(G,b); y = UTriSol(G',y1); z1 = LTriSol(G,u); z = UTriSol(G',z1); % It follows that x = y - ((u'*y)/(1+u'*z))*z;
Not Available.
% Problem P7_4_1 % % Illustrate a general block Cholesky p = [ 2 4 3 1 5]; n = sum(p); G0 = tril(rand(n,n)); A = G0*G0'; G = MyCholBlock(A,p); clc home disp('The block width vector:') disp(' ') s = sprintf('%2.0f ',p); disp(['[' s ']']) disp(' ' ) error = norm(G-G0,1)/norm(G0,1) function G = MyCholBlock(A,p) % G = MyCholBlock(A,p) % % Pre: A is an n-by-n symmetric and positive definite matrix % and p is a positive integer vector with sum(p) = n % Post: G is lower triangular and A = G*G'. [n,n] = size(A); m = length(p); finish = cumsum(p); start = [1 finish(1:m-1)+1]; G = zeros(n,n); for i=1:m ivec = start(i):finish(i); for j=1:i jvec = start(j):finish(j); %Find the (i,j) block of G, i.e., G(ivec,jvec). S = A(ivec,jvec); for k=1:j-1 kvec = start(k):finish(k); S = S - G(ivec,kvec)*G(jvec,kvec)'; end if j<i for q=1:(finish(i)-start(i)+1) G(start(i)+q-1,jvec)= (LTriSol(G(jvec,jvec),S(q,:)'))'; end else G(ivec,ivec) = CholScalar(S); end end end
% Problem P7_4_2 % % Explore load balancing in shared memory Cholesky with % "wrap mapping" of tasks. n = 100; clc disp(' p max/min flops') disp('---------------------') for p = [2 4 8 16 32 64] f = FlopBalance(n,p); maxf = max(f); minf = min(f); disp(sprintf(' %3.0f %7.5f',p,maxf/minf)) end function f = FlopBalance(n,p) % f = FlopBalance(n,p) % % n,p are positive integers with n>p. % % f is a column p-vector where f(mu) is the approximate number % of flops required by the mu-th processor when parallel % cholesky is implemented with wrap mapping. f = zeros(p,1); for mu=1:p MyCols = mu:p:n; for k=1:n if any(MyCols==k) % Flops associated with column generation. f(mu) = f(mu) + (n-k)+3; end % Flops associated with saxpy updates f(mu) = f(mu) + 2*(n-k+1)*ceil((n-k)/p); end end
% Script P7_4_3 % % Explore load balancing in shared memory Cholesky with % "contiguous mapping" of tasks. n = 100; clc disp(' p max/min flops') disp('---------------------') for p = [2 4 8 16 32 64] f = FlopBalance1(n,p); maxf = max(f); minf = min(f); disp(sprintf(' %3.0f %7.2f',p,maxf/minf)) end