Skip to Main Content

Sample SAS for Real Data Set

  /*================================================================*
 *This SAS program is to analyze twin data. It has three sections.*
 *The first section reads the data. You need to specify your own  *
 *path for the data set and the variable names to be input. This  *
 *is also where you create your new variables.                    *
 *The second section should not be changed. This section creates  *
 *random effects to accommodate different correlations of MZ and  *
 *DZ twins.                                                       *
 *The third section executes different models. You need to include*
 *appropriate variables for your analysis as well as try different*
 *mode of inheritance and initial parameter values.               *
 *Please email heping.zhang@yale.edu if you discovered any errors *
 *with this program. This program can be freely used and          *
 *distributed. However, the owners of this program is not         *
 *responsible for inaccuracy or errors as a result of using this  *
 *program. It is the responsibility of the users to verify the    *
 *the validity of this program and conduct their analysis         * 
 *appropriately.                                                  *
 *================================================================*/

/*Begin Section 1*/
data one;
*you need to enter your own file path in the next statement;
INFILE 'D:\shared\hz2\Students\Gongfu\biometrics-data.txt' missover delimiter='09'x  firstobs=2;
*you need to include appropriate variables the next statement;
input FAMID     Sex     GA      BW      RDS     BPD     INST zygo;
*you need to create appropriate additional variables if needed;
inst2=(INST=2);
inst3=(INST=3);
id=_n_;
run;
/*End Section 1*/
/*Begin Section 2: no changes are needed in this section*/
proc iml;
covA1=sqrsym({1,1,1});
gA1=t(root(covA1));
covA2=sqrsym({1,0.5,1});
gA2=t(root(covA2));
covD1=sqrsym({1,1,1});
gD1=t(root(covD1));
covD2=sqrsym({1,0.25,1});
gD2=t(root(covD2));

/* for additive genetic effect, the G design matrix determines the model through
the model covariance structure: VAR[A]=GZG`. The mixed model is then represented
as: logit(p{y=1}) = mu + ZG + e */

g1=INSERT(gA1,gD1,0,3);                 /*combine matrix for MZ twin*/
g2=INSERT(gA2,gD2,0,3);                 /*combine matrix for DZ twin*/
use one;
        read all var {FAMID zygo} into vzygo;
close one;
nsub=nrow(vzygo);
g=j(nsub,4,.);
do i=1 to nsub;
        ind=2-mod(i,2);
        if(vzygo[i,2]=1) then g[i,]=g1[ind,];                   /* MZ twin*/
        if(vzygo[i,2]=0) then g[i,]=g2[ind,];                   /* DZ twin*/
end;
cname={"aone" "atwo" "done" "dtwo"};
create rmatrix from g[colname=cname];
append from g;
close rmatrix;
quit;
data rmatrix;
set rmatrix;
id=_n_;
run;
data two;
merge one rmatrix;
by id;
run;
/*End Section 2*/
/*Begin Section 3*/
title1 'ADE model';
proc nlmixed data=two;
*you need to choose your own initial values;
*it may be useful to run a simple model such as PROC LOGISTIC;
*usually you may need to try-and-err;
parms beta1=-0.192 beta2=0.088 beta3=0.0011 beta4=0.889 beta5=0.230  beta6=-1.092 s1=4 s2=0.01;
*include the fixed covariates in the next statement;
fixed1 = beta0+beta1*SEX+beta2*GA+beta3*BW+beta4*inst2+beta5*inst3+beta6*RDS;
*the following statement specifies additive (a1 and a2) and;
*dominant (d1 and d2) random effects;
random1 = a1*aone + a2*atwo + d1*done + d2*dtwo;
eta = fixed1 + random1;
*specifies a probit model;
p = 1 - probnorm(eta);
model BPD ~ binary(p);
random a1 a2 d1 d2 ~ normal([0,0,0,0],[s1,0,s1,0,0,s2,0,0,0,s2]) subject=FAMID;
estimate 'icc' (s1+s2)/(1+s1+s2);
run;

title2 'ACE model';
proc nlmixed data=two;
parms beta0=2.793 beta1=0.133 beta2=-0.095 beta3=-0.0011 beta4=-0.929 beta5=-0.283  beta6=1.112 s1=3 s2=1;
fixed1 = beta0+beta1*SEX+beta2*GA+beta3*BW+beta4*inst2+beta5*inst3+beta6*RDS;
*the following statement specifies additive (a1 and a2) and;
*common environment (c) random effects;
random1 = a1*aone+a2*atwo+c;
eta = fixed1 + random1;
p = 1 - probnorm(eta);
model BPD ~ binary(p);
random a1 a2 c ~ normal([0,0,0],[s1,0,s1,0,0,s2]) subject=FAMID;
estimate 'icc' s1/(1+s1+s2);
run;
/*End Section 3*/