% =========== % Description: % =========== % % This program is for aligning warped functional data. It uses % a pairwise warping method to obtain the desired alignment (registration) % of the random trajectories. The data % can be regular (recorded on a regular grid) or sparse. In the % latter case, PACE will be used as a preliminary step to % evaluate the data on a regular time grid. The algorithm gives % the aligned curves and the warping function. Users can also % choose to run PACE on the aligned curves. % % The model is Y(t)=X(h^{-1}(t))+error where Y(t) is the observed noisy % curve, X(t) is the aligned curve and h^{-1}(t) is the inverse warping % function. The warping function h(t) can be used to align curves, i.e. % Y*(t)=Y(h(t)) is output aligned (still with noise, as only the % measurement times are shifted according to the warping). % % The Eigen library (http://eigen.tuxfamily.org/) can be used to substantially % improve computing times for this procedure. In order to take advantage % of this, the user must unzip Eigen version 3.1.2 in the directory % containing the file WFPCA.m, i.e., in PACE/PACE-WARP. Once this is done, % argument EigLib indicates the use of the improved procedure. Credit for % this is due to Pantelis Hadjipantelis of the University of Warwick. % % References: Tang, R., M\"{u}ller, H.G. (2008). Pairwise curve % synchronization for functional data. Biometrika 95, 875-889. % Tang, R., M\"{u}ller, H.G. (2009). Time-synchronized % clustering of gene expression trajectories. Biostatistics 10, 32-45. % % ====== % Usage: % ====== % % function [X,aligned,h,hinv,y_reg,t_reg] = WFPCA(y,t,nknots,lambda,subset,choice,option,p,EigLib) % % ====== % Input: % ====== % y: n*m matrix or 1*n cell array where y(i,:) % or y{i} is a functional trajectory for subject i; PACE % will be used as a preliminary step to evaluate y on a % regular time grid (output t_reg) if y is a cell array. % t: 1*m vector (if y is a matrix) or 1*n cell array (if y is % a cell array) of time point grid. % nknots: number of knots used for estimating pairwise warping functions; % default value is 3. % lambda: penalty parameter used for estimating pairwise warping % functions (larger lambda will force the warping functions % closer to the identity and thus result in less warping); % use 0 as a first default value, which will set the penalty to V*10^-4, % where V is the average L2 norm of y-mean(y); % use [] for a second default value (slower option), in which case % lambda will be selected from the grid of potential values % V*10^-k (k=1,...,6) as the value which minimizes the variation of y % after warping (this optimization procedure is time-consuming) % subset: controls the use of subsets to estimate pairwise warping functions. % Then, Pairwise warping functions will only be determined based % on random subsets of pairs. The smaller the subsets, the faster % the algorithm (number of operations is quadratic in subset size), % while the accuracy of the algorithm declines. % 'Y': use subsets of size min(max(n^0.5,20),n); % the subset is randomly selected for each trajectory [default]. % 'N': do not use subsets (time-consuming option). % integer: user-specified size of subsets (00 y_normalized(i,:)=y_reg(i,:)/tempsca(i); else y_normalized(i,:)=y_reg(i,:); end end end clear tempsca; for i=1:n yfd(i,:)=spapi(2,t_reg,y_normalized(i,:)); % transfer normalized data into spline form end for i=1:n yfd_old(i,:)=spapi(2,t_reg,y_reg(i,:)); % transfer orignal data into spline form end % use subset to estimate warping functions if strcmp(subset,'Y') subN=floor(min(max(30,sqrt(n)),n)); elseif strcmp(subset,'N') subN=n; elseif isa(subset,'numeric') subN=subset; end % choose lambda if not specified if length(lambda)==0 Vy=sqrt(sum(trapz(t_reg',(y_normalized-repmat(mean(y_normalized),[n,1]))'.^2))/(n-1)); Slambda=[]; for i=1:6 [tmptX,tmptaligned,tmpth,tmpthinv] = WFPCA(y_reg,t_reg,nknots,Vy*10^-i,'Y',choice,0,[]); Slambda(i)=sum(trapz(t_reg',(tmptaligned-repmat(mean(tmptaligned),[n,1]))'.^2)); clear tmptX tmptaligned tmpth tmpthinv; end [tmptc,tmptk]=min(Slambda); lambda=Vy*10^-tmptk; clear tmptc tmptk; fprintf(1,['lambda is selected as ' num2str(lambda) '. \n']); elseif lambda==0 Vy=sqrt(sum(trapz(t_reg',(y_normalized-repmat(mean(y_normalized),[n,1]))'.^2))/(n-1)); lambda=Vy*10^-4; fprintf(1,['Default lambda value ' num2str(lambda) ' is used. \n']); end indd=1; hik=[]; for k=1:n tempind=randperm(n); tempind=tempind(1:subN); for i=1:subN; curvei=yfd(tempind(i),:); curvek=yfd(k,:); if tempind(i)~=k if EigLib == 0 % do not use Eigen library [hik(indd,:),ftemp]=rthik(t_reg,curvei,curvek,nknots,lambda); elseif EigLib == 1 % use Eigen library try % check for compiled rthik_E [hik(indd,:),ftemp]=rthik_E( curvei.coefs,curvek.coefs,t_reg,nknots,lambda); catch err try % if necessary, compile rthik_E, checking that Eigen library is present eval( horzcat('mex -I',fileparts(mfilename('fullpath')),... '/eigen-eigen-5097c01bcdc4/ ',... fileparts(mfilename('fullpath')),'/','rthik_E.cpp') ); catch err msg = sprintf('%s', ... 'To use option EigLib = 1, please download and unzip ', ... 'Eigen 3.1.2 (http://eigen.tuxfamily.org/) in folder PACE-WARP'); error('MATLAB:myCode:dimensions', msg); end [hik(indd,:),ftemp]=rthik_E( curvei.coefs,curvek.coefs,t_reg,nknots,lambda); end end afterdistance(k,i)=mean((rtYH(t_reg,curvei,hik(indd,:))-fnval(curvek,t_reg)).^2); lochat(indd,:)=[k,tempind(i)]; indd=indd+1; else hik(indd,:)=t_reg; afterdistance(k,i)=0; lochat(indd,:)=[k,k]; indd=indd+1; end end clear tempind; end % take truncated averages or weighted averages of the pairwise warping functions h=[]; hinv=[]; aligned=[]; for k=1:n weight=[]; if strcmp(choice,'weighted') tempw=afterdistance(k,:); for i=1:subN if tempw(i)==0; tempw(i)=inf; end end weight=1./tempw; clear tempw; elseif strcmp(choice,'truncated') weight=afterdistance(k,:)<=quantile(afterdistance(afterdistance>0),.90); end if sum(weight)==0 hinv(k,:)=t_reg; else weight=weight/sum(weight); hinv(k,:)=weight*hik(lochat(:,1)==k,:); hinv(k,m)=max(t_reg); hinv(k,1)=min(t_reg); end h(k,:)=fnval(spapi(2,hinv(k,:),t_reg),t_reg); h(k,1)=min(t_reg); h(k,m)=max(t_reg); aligned(k,:)=fnval(yfd_old(k,:),h(k,:)); end % tranfer aligned data into cells and run PACE if option==1 yy=num2cell(aligned,2)'; if size(t_reg,1)==m tt=repmat(t_reg',[n,1]); else tt=repmat(t_reg,[n,1]); end tt=num2cell(tt,2)'; X=FPCA(yy,tt,p); else X=[]; end end