Computed theoretical power for N=100 and N=200 scenarios
This commit is contained in:
31
Modules/ado/plus/i/i2ci.ado
Normal file
31
Modules/ado/plus/i/i2ci.ado
Normal file
@ -0,0 +1,31 @@
|
||||
*! Author: Ross Harris
|
||||
*! Date: 1 August 2008
|
||||
|
||||
* Confidence interval for I-sq, to be run after metan
|
||||
|
||||
version 8.0
|
||||
program i2ci
|
||||
|
||||
local k = r(df)+1
|
||||
local q = r(het)
|
||||
|
||||
if `q' > `k'{
|
||||
local selogH = 0.5 * ( ln(`q') - ln(`k'-1) ) / ( sqrt(2*`q') - sqrt(2*`k'-3) )
|
||||
}
|
||||
else{
|
||||
local selogH = sqrt( (1/(2*(`k'-2))) * (1- (1/(3*(`k'-2)^2))) )
|
||||
}
|
||||
|
||||
local H = sqrt(`q'/(`k'-1))
|
||||
local Hlow = exp(ln(`H')-1.96*`selogH')
|
||||
local Hupp = exp(ln(`H')+1.96*`selogH')
|
||||
|
||||
local I2 = string( r(i_sq) , "%5.1f")
|
||||
local I2low = string( max( 100* ( (`Hlow'^2-1) / `Hlow'^2 ) , 0) , "%5.1f")
|
||||
local I2upp = string( max( 100* ( (`Hupp'^2-1) / `Hupp'^2 ) , 0) , "%5.1f")
|
||||
|
||||
di in ye "I-sq= `I2'%, 95% CI: `I2low'% to `I2upp'%"
|
||||
di in whi "CI based on Higgins & Thompson, Statist. Med. 2002; 21:1539<33>1558,
|
||||
di in whi "Appendix A2: Intervals based on the statistical significance of Q."
|
||||
|
||||
end
|
146
Modules/ado/plus/i/icc23.ado
Normal file
146
Modules/ado/plus/i/icc23.ado
Normal file
@ -0,0 +1,146 @@
|
||||
*! version 17feb09
|
||||
|
||||
program define icc23, rclass
|
||||
version 9
|
||||
syntax varlist(min=3 max=3) [if] [, MOdel(integer 2) LEvel(real .95)]
|
||||
tokenize "`varlist'"
|
||||
marksample touse
|
||||
|
||||
local dv `1'
|
||||
local rater `2'
|
||||
local id `3'
|
||||
|
||||
capture assert `model'==2 |`model'==3
|
||||
if _rc~=0 {
|
||||
di
|
||||
di in re "The ICC model must be specified as either 2 or 3"
|
||||
exit 198
|
||||
}
|
||||
|
||||
capture assert `level'>0 & `level'<1.0
|
||||
if _rc~=0 {
|
||||
di
|
||||
di in re "The CI level must be a value between 0 and 1.0"
|
||||
exit 198
|
||||
}
|
||||
|
||||
qui anova `dv' `rater' `id' `if', repeated(`rater')
|
||||
|
||||
local f1 e(F_1)
|
||||
local ss2 e(ss_2)
|
||||
local df2 e(df_2)
|
||||
local rss e(rss)
|
||||
local dfr e(df_r)
|
||||
local df1 e(df_1)
|
||||
local ss1 e(ss_1)
|
||||
local n e(N_bse) /* the number of subjects tested */
|
||||
local k = `df1'+1 /* the number of raters */
|
||||
|
||||
*Compute F-test for rater
|
||||
local p_rater=Ftail(`df1',`dfr',`f1')
|
||||
|
||||
*Compute components of ICC
|
||||
|
||||
local bms = `ss2'/`df2'
|
||||
local ems = `rss'/`dfr'
|
||||
local jms = `ss1'/`df1'
|
||||
local Fj = `jms'/`ems'
|
||||
local alpha2 =1-((1-`level')/2)
|
||||
local cilevel = `level'*100
|
||||
|
||||
|
||||
if `model' == 2 {
|
||||
|
||||
*Compute ICC Model 2 for single observations (ICC21) and for means (ICC2k)
|
||||
|
||||
local num21 = `bms'-`ems'
|
||||
local dentmp = (`k'*(`jms'-`ems'))/`n' /* the ratio within the denominator */
|
||||
local den21 = `bms'+(`k'-1)*`ems'+`dentmp'
|
||||
local icc21 = `num21'/`den21'
|
||||
|
||||
*Compute ICC21 confidence intervals (values will be used for ICC2k confidence intervals)
|
||||
|
||||
local nu_num21 = (`k'-1)*(`n'-1)*(`k'*`icc21'*`Fj'+`n'*(1+(`k'-1)*`icc21')-`k'*`icc21')^2
|
||||
local nu_den21 = (`n'-1)*(`k'^2)*((`icc21')^2)*((`Fj')^2)+(`n'*(1+(`k'-1)*`icc21')-`k'*`icc21')^2
|
||||
local nu21 = `nu_num21'/`nu_den21'
|
||||
local Fsuper = invF(`df2',`nu21',`alpha2')
|
||||
local Fsub = invF(`nu21',`df2',`alpha2')
|
||||
local cilower = (`n'*(`bms'-`Fsuper'*`ems'))/(`Fsuper'*(`k'*`jms'+(`k'*`n'-`k'-`n')*`ems')+`n'*`bms')
|
||||
local ciupper = (`n'*(`Fsub'*`bms'-`ems'))/(`k'*`jms'+(`k'*`n'-`k'-`n')*`ems'+`n'*`Fsub'*`bms')
|
||||
|
||||
*Compute ICC2k and its confidence intervals
|
||||
local num2k = `bms'-`ems'
|
||||
local dentmp1 = (`jms'-`ems')/`n' /* the ratio within the denominator */
|
||||
local den2k = `bms'+`dentmp1'
|
||||
local icc2k = `num2k'/`den2k'
|
||||
local cilowerK = (`k'*`cilower')/(1+(`k'-1)*`cilower')
|
||||
local ciupperK = (`k'*`ciupper')/(1+(`k'-1)*`ciupper')
|
||||
|
||||
di
|
||||
di in gr " **************************************************************************"
|
||||
di in ye " Two-Way Random Effects Models: ICC[2,1] and ICC[2,k]"
|
||||
di in gr " **************************************************************************"
|
||||
di
|
||||
di in gr " The total number of subjects is: " in ye %3.0f `n'
|
||||
di in gr " The total number of raters is: " in ye %3.0f `k'
|
||||
di
|
||||
di in gr " Reliability of observations: ICC[2,1] = " in ye %4.3f `icc21' ", (" `cilevel' "% CI: " %5.3f `cilower' ", " %5.3f `ciupper' ")"
|
||||
di
|
||||
di in gr " Reliability of the mean: ICC[2,`k'] = " in ye %4.3f `icc2k' ", (" `cilevel' "% CI: " %5.3f `cilowerK' ", " %5.3f `ciupperK' ")"
|
||||
di
|
||||
di in gr " **************************************************************************"
|
||||
di
|
||||
if `p_rater' <= .05 {
|
||||
di in red " Note: There is a significant `rater' effect: p = " %5.4f `p_rater'
|
||||
}
|
||||
}
|
||||
|
||||
if `model' == 3 {
|
||||
|
||||
* Compute ICC31
|
||||
|
||||
local num31 = `bms'-`ems'
|
||||
local den31 = `bms'+(`k'-1)*`ems'
|
||||
local icc31 = `num31'/`den31'
|
||||
|
||||
*Compute ICC Model 3 Confidence Interval (Single Observations)
|
||||
|
||||
local fzero = `bms'/`ems'
|
||||
local fdistL = invF(`n'-1,(`n'-1)*(`k'-1),`alpha2')
|
||||
local fdistU = invF((`n'-1)*(`k'-1),`n'-1,`alpha2')
|
||||
local FL = `fzero'/`fdistL'
|
||||
local FU = `fzero'*`fdistU'
|
||||
local cilower = (`FL'-1)/(`FL'+(`k'-1))
|
||||
local ciupper = (`FU'-1)/(`FU'+(`k'-1))
|
||||
|
||||
*Compute ICC3k
|
||||
|
||||
local num3k = `bms'-`ems'
|
||||
local den3k = `bms'
|
||||
local icc3k = `num3k'/`den3k'
|
||||
|
||||
*Compute ICC3k confidence intervals
|
||||
|
||||
local cilowerK = 1-(1/`FL')
|
||||
local ciupperK = 1-(1/`FU')
|
||||
|
||||
di
|
||||
di in gr " **************************************************************************"
|
||||
di in ye " Two-Way Mixed Effects Models: ICC[3,1] and ICC[3,k]"
|
||||
di in gr " **************************************************************************"
|
||||
di
|
||||
di in gr " The total number of subjects is: " in ye %3.0f `n'
|
||||
di in gr " The total number of raters is: " in ye %3.0f `k'
|
||||
di
|
||||
di in gr " Reliability of observations: ICC[3,1] = " in ye %4.3f `icc31' ", (" `cilevel' "% CI: " %5.3f `cilower' ", " %5.3f `ciupper' ")"
|
||||
di
|
||||
di in gr " Reliability of the mean: ICC[3,`k'] = " in ye %4.3f `icc3k' ", (" `cilevel' "% CI: " %5.3f `cilowerK' ", " %5.3f `ciupperK' ")"
|
||||
di
|
||||
di in gr " **************************************************************************"
|
||||
di
|
||||
|
||||
if `p_rater' <= .05 {
|
||||
di in red " Note: There is a significant `rater' effect: p = " %5.4f `p_rater'
|
||||
}
|
||||
}
|
||||
end
|
110
Modules/ado/plus/i/icc23.hlp
Normal file
110
Modules/ado/plus/i/icc23.hlp
Normal file
@ -0,0 +1,110 @@
|
||||
|
||||
.-
|
||||
help for ^icc23^
|
||||
.-
|
||||
|
||||
^Calculation of ICC Models 2 and 3^
|
||||
^---------------------------------^
|
||||
|
||||
.^icc23^ <dv> <classvar> <within_var>, MOdel(#) LEvel(#)
|
||||
|
||||
|
||||
^Description^
|
||||
^-----------^
|
||||
|
||||
^icc23^ computes the intra-class correlation for random effects models based on repeated
|
||||
measures ANOVA. These models are ICC[2,1], ICC[2,k], ICC[3,1], and ICC[3,k], as described
|
||||
by Shrout and Fleiss, 1979 (see reference below). (For the ICC[1,1] and ICC[1,k] models
|
||||
based on a one-way ANOVA, see @loneway@). ^icc23^ runs a repeated measures ANOVA to derive
|
||||
the appropriate estimates and degrees of freedom. In the event there is a significant F-test
|
||||
for the ^classvar^ (e.g., a significant differernce among raters), the program will provide
|
||||
the p-value from the ANOVA table.
|
||||
|
||||
Data must be in the "long" format. If not, use the @reshape@ command to reconfigure the data.
|
||||
|
||||
Four types of ICC models are considered:
|
||||
|
||||
ICC[2,1]: reflects the case where the same group of subjects is rated by k raters,
|
||||
interest is in the reliability of individual scores. In this model, raters
|
||||
are considered a representative sample of a population of similar raters. This
|
||||
model is a two-way random effects model.
|
||||
|
||||
ICC[2,k]: is the same approach as ICC[2,1] above, but interest is in the reliability
|
||||
of the MEAN score, rather than among single observations.
|
||||
|
||||
ICC[3,1]: reflects the case where the same group of subjects is rated by k raters,
|
||||
interest is in the reliability of individual scores. In this model, the only
|
||||
raters of interest are those participating in the study (e.g., there is no
|
||||
intention of generalizing the raters' scores to a larger population of raters).
|
||||
This model is considered a "mixed" model (subjects are random, raters are fixed).
|
||||
|
||||
ICC[3,k]: is the same approach as ICC[3,1] above, but interest is in the reliability
|
||||
of the MEAN score, rather than among single observations.
|
||||
|
||||
Three inputs are required:
|
||||
|
||||
dv is the dependent variable
|
||||
|
||||
classvar the class variable refers to factor that is repeated within subjects, e.g., raters,
|
||||
devices, time points, etc., e.g., the variable which would be entered as the
|
||||
repeated() variable in the ANOVA option.
|
||||
|
||||
within_var refers to the "within subject" variable, e.g., subjects being assessed
|
||||
|
||||
|
||||
^NOTE: The order of entry of the variables is critical!^
|
||||
|
||||
|
||||
|
||||
^Options^
|
||||
^-------^
|
||||
^MO^del(#) refers to the type of model to be estimated. ICC model 2 is the default (
|
||||
producing ICC[2,1] and ICC[2,k] estimates).
|
||||
|
||||
^LE^vel(#) the degree of precision of the confidence interval, entered as a decimal.
|
||||
The default is 95%, i.e., level(.95)
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
For ICC[2,1] and ICC[2,k]: Two-way random effects (Subjects and raters are considered to be
|
||||
sampled from larger populations); 95% CIs are assumed.
|
||||
|
||||
^.icc23 score rater person_id^
|
||||
|
||||
|
||||
|
||||
For ICC[3,1] and ICC[3,k]: Two-way mixed model: Subjects are random, but raters are fixed
|
||||
(i.e., the raters are not considered a sample -- they are the
|
||||
only raters of interest); 90% CIs are requested.
|
||||
|
||||
^.icc23 score rater person_id, model(3) level(.90)^
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Shrout PE, FLeiss JL. Intraclass correlation: uses in assessing rater reliability. Psychol
|
||||
Bull, 1979; 86: 420-428.
|
||||
|
||||
Portney LG, Watkins MP. Foundations of Clinical Research: Applications to Practice (2nd ed.).
|
||||
Prentice-Hall, Inc: Upper Saddle River, NJ., 2000.
|
||||
|
||||
|
||||
Author
|
||||
------
|
||||
Paul F. Visintainer, PhD
|
||||
Baystate Health System
|
||||
Springfield, MA 01089
|
||||
visint46@gmail.com
|
||||
|
||||
Luis C.Orozco, MD, MSc
|
||||
Facultad de Salud
|
||||
Universidad Industrial de Santander
|
||||
Colombia
|
||||
lcorovar@gmail.com
|
||||
|
||||
|
||||
Also see
|
||||
--------
|
||||
Manual or on-line help for: @loneway@, @reshape@, @iclassr@, @iclassr2@
|
55
Modules/ado/plus/i/iccconf.ado
Normal file
55
Modules/ado/plus/i/iccconf.ado
Normal file
@ -0,0 +1,55 @@
|
||||
*! November 5, 2008 by Paul F. Visintainer, PhD
|
||||
|
||||
program define iccconf
|
||||
version 8.0
|
||||
syntax anything [, level(real .95)]
|
||||
tokenize "`anything'"
|
||||
|
||||
local icc `1'
|
||||
local k `2' /* number of observations */
|
||||
local reps `3'
|
||||
|
||||
|
||||
confirm number `icc'
|
||||
confirm integer number `k'
|
||||
confirm integer number `reps'
|
||||
|
||||
if `icc'<=0 | `icc'>=1 {
|
||||
di
|
||||
di in red "RE-enter " in ye "ICC" in red " between 0 and 1"
|
||||
error 197
|
||||
}
|
||||
|
||||
if `level' >=1.0 {
|
||||
di
|
||||
di in red " Confidence level must be between 0 and 1"
|
||||
error 197
|
||||
}
|
||||
|
||||
* Defining the components of the test
|
||||
local alpha = 1 - `level'
|
||||
local N = `k'*`reps'
|
||||
local df1 = `k' - 1
|
||||
local df2 = `N' - `k'
|
||||
local F = ((`icc'*`reps') - `icc' + 1)/(1-`icc')
|
||||
local lFcrit = invF(`df1',`df2',(1-`alpha'/2))
|
||||
local uFcrit = invF(`df1',`df2',(`alpha'/2))
|
||||
|
||||
*Compute confidence limits based on Rosner, "Fundamental of Bios, 6th", pg. 615
|
||||
|
||||
local ul = (`F'/`uFcrit'-1)/(`reps'+(`F'/`uFcrit')-1)
|
||||
local ll = (`F'/`lFcrit'-1)/(`reps'+(`F'/`lFcrit')-1)
|
||||
|
||||
|
||||
di
|
||||
di in gr " ******************************************************************************* "
|
||||
di in ye " Confidence Interval for the INTRACLASS COEFFICIENT "
|
||||
di in gr " ******************************************************************************* "
|
||||
di
|
||||
di in gr " The ICC with " in ye %3.0f `level'*100 "% " in gr "CI is: " %3.2f in ye `icc' " (" %3.2f in ye `ll' ", " %3.2f in ye `ul' ")"
|
||||
di
|
||||
di in gr " The number of subjects is: " %5.0f in ye `k'
|
||||
di
|
||||
di in gr " The number of repeated assessments is: " %2.0f in ye `reps'
|
||||
|
||||
end
|
62
Modules/ado/plus/i/iccconf.hlp
Normal file
62
Modules/ado/plus/i/iccconf.hlp
Normal file
@ -0,0 +1,62 @@
|
||||
|
||||
.-
|
||||
help for ^iccconf^
|
||||
.-
|
||||
|
||||
Confidence Interval for A Single Intraclass Correlation (ICC)
|
||||
-------------------------------------------------------------
|
||||
|
||||
.^iccconf <icc> <k> <reps>, level(#)^
|
||||
|
||||
|
||||
^Description^
|
||||
^-----------^
|
||||
|
||||
^iccconf^ is an immediate command that computes a confidence interval for a
|
||||
single intraclass correlation (ICC). The procedure is based on Rosner's approach
|
||||
using the F-test.
|
||||
|
||||
Three inputs are required:
|
||||
|
||||
^icc^ is the intraclass correlation. The input must be a value between 0 and 1
|
||||
|
||||
^k^ is the number of subjects
|
||||
|
||||
^reps^ is the number of repeated assessments or "within" pair
|
||||
measurements.
|
||||
|
||||
^Option^
|
||||
^------^
|
||||
level(#) The level option allows the user to modify the confidence limit.
|
||||
Use values between 0 and 1. The default is .95.
|
||||
|
||||
|
||||
Example
|
||||
--------
|
||||
To find a 95% confidence interval for an ICC of .914, based on 5 observations
|
||||
with 2 replcations each:
|
||||
|
||||
^.iccconf .914 5 2^
|
||||
|
||||
|
||||
For a 90% confidence interval for the above example, type:
|
||||
|
||||
^.iccconf .914 5 2, level(.9)^
|
||||
|
||||
|
||||
References
|
||||
----------
|
||||
Rosner, B. "Fundamentals of Biostatistics, 6th ed" Duxbury, Press (The Thompson
|
||||
Company): Belmont, CA, 2006, pgs 615-616.
|
||||
|
||||
|
||||
Author
|
||||
------
|
||||
Paul F. Visintainer, PhD
|
||||
Springfield, MA 01089
|
||||
visint46@gmail.com
|
||||
|
||||
|
||||
Also see
|
||||
--------
|
||||
Manual or on-line help for: @loneway@, @xtreg@, or ^bootcor^, ^xtrho^ if installed
|
905
Modules/ado/plus/i/ice.ado
Normal file
905
Modules/ado/plus/i/ice.ado
Normal file
@ -0,0 +1,905 @@
|
||||
*! version 1.1.1 PR 23sep2005.
|
||||
*
|
||||
* History of ice
|
||||
* 1.1.1 23sep2005 Better error trapping for passive() and substitute() options.
|
||||
* 1.1.0 23aug2005 Replace -draw- option with -match-. Default becomes draw.
|
||||
* Trace option documented, now has argument for filename.
|
||||
* Report number of rows with 0, 1, 2, ... missing values.
|
||||
* Arrange variables in increasing order of missingness when imputing.
|
||||
* Split ties at random when more than one observation satisfies the
|
||||
* prediction matching criterion
|
||||
* 1.0.4 21jul2005 Trap and report error when running uvis
|
||||
* 1.0.3 08jun2005 Tidy up display of equations when have multiple lines (long equations)
|
||||
* 1.0.3 03jun2005 Silence file load/save
|
||||
* 1.0.2 20may2005 Changed files containing imputations to tempfiles (standalone mode)
|
||||
* (Angela Wood reported problem).
|
||||
* 1.0.1 04may2005 Added a trace to a file (undocumented in help file).
|
||||
* 1.0.0 18apr2005 First release, based on mice.
|
||||
*
|
||||
* History of mice
|
||||
* 1.0.3 13apr2005 Minor tidying up, including recode of ChkIn and deletion of strdel.
|
||||
* Check if prediction equations have a variable on both sides.
|
||||
* 1.0.2 17feb2005 Added code to take care of inherited missingness of passive variables robustly.
|
||||
* 1.0.1 21jan2005 Added display of regression command in showing prediction equations.
|
||||
* 1.0.0 20jan2005 First release, based on mvis2/_mvis2.
|
||||
*
|
||||
* History of mvis
|
||||
* 1.1.0 18jan2005 categoric() option removed.
|
||||
* New options dryrun, passive(), substitute(), eq() added.
|
||||
* Improvements to output showing actual prediction equations.
|
||||
* 1.0.5 19nov2004 Delete dummy variables for categoric() variables with no missing data from output file
|
||||
* Found problem with bsample in Stata 7 with "if" clause and boot option.
|
||||
* Revert to Stata 8 for mvis, _mvis and uvis.
|
||||
* 1.0.4 18nov2004 Weights not working (syntax error), fixed.
|
||||
* 1.0.3 16nov2004 Categoric() option added to deal with unordered categoric
|
||||
* covariates, updated default handling of such variables
|
||||
* 1.0.2 16oct2004 Saving, using etc of file safest with compound quotes, fixed.
|
||||
*
|
||||
program define ice, rclass
|
||||
version 8
|
||||
* Check for _I* variables, could be created by xi:
|
||||
capture describe _I*
|
||||
if _rc==0 {
|
||||
di as err _n "Warning: _I* variables detected in the dataset - was xi: used?"
|
||||
di as inp "Use of xi: with mvis is liable to give incorrect results."
|
||||
di as inp "If you wish to model categoric covariates with dummy"
|
||||
di as inp "variables, please recalculate the dummies via the passive() option"
|
||||
di as inp "and use the substitute() option to identify the dummies as predictors." _n
|
||||
}
|
||||
local m `s(MI_m)'
|
||||
if "`m'"!="" {
|
||||
* Called by mi_impute
|
||||
local mitools mitools
|
||||
local mopt
|
||||
local uopt
|
||||
local fn0 `s(MI_tfile)'
|
||||
local using `fn0'
|
||||
forvalues i=1/`m' {
|
||||
local fn`i' `s(MI_tfile`i')'
|
||||
}
|
||||
}
|
||||
else {
|
||||
* standalone
|
||||
local mitools
|
||||
local mopt "m(int 1)"
|
||||
local uopt [using/]
|
||||
}
|
||||
|
||||
syntax varlist(min=2 numeric) [if] [in] [aweight fweight pweight iweight] `uopt', /*
|
||||
*/ [ `mopt' REPLACE Seed(int 0) BOot MAtch DRYrun * ]
|
||||
|
||||
* Check if there are variables called boot and/or match
|
||||
if "`boot'"=="boot" {
|
||||
cap confirm var boot
|
||||
if _rc local options `options' boot(`varlist')
|
||||
else local options `options' boot(boot)
|
||||
}
|
||||
if "`match'"=="match" {
|
||||
cap confirm var match
|
||||
if _rc local options `options' match(`varlist')
|
||||
else local options `options' match(match)
|
||||
}
|
||||
if `seed'>0 set seed `seed'
|
||||
local first first
|
||||
if "`dryrun'"!="" {
|
||||
if `"`using'"'=="" {
|
||||
tempname fn
|
||||
local using `fn'
|
||||
}
|
||||
_ice `varlist' `if' `in' [`weight' `exp'] using `using', `options' first dryrun
|
||||
di as text _n "End of dry run. No imputations were done, no files were created."
|
||||
exit
|
||||
}
|
||||
preserve
|
||||
if "`mitools'"=="" {
|
||||
if `m'<1 {
|
||||
di as err "number of imputations must be 1 or more"
|
||||
exit 198
|
||||
}
|
||||
if `"`using'"'=="" {
|
||||
if "`dryrun'"=="" {
|
||||
di as err "using required"
|
||||
exit 100
|
||||
}
|
||||
}
|
||||
else {
|
||||
if substr(`"`using'"',-4,.)!=".dta" {
|
||||
local using `using'.dta
|
||||
}
|
||||
if "`replace'"=="" {
|
||||
confirm new file `"`using'"'
|
||||
}
|
||||
}
|
||||
forvalues i=1/`m' {
|
||||
tempfile fn`i'
|
||||
_ice `varlist' `if' `in' [`weight' `exp'] using `fn`i'', `options' `first'
|
||||
di as text `i' ".." _cont
|
||||
local first
|
||||
}
|
||||
* Join files of imputations vertically using code from mijoin.ado
|
||||
quietly {
|
||||
local J _j
|
||||
forvalues j=1/`m' {
|
||||
* could automate this part
|
||||
use `"`fn`j''"', clear
|
||||
chkrowid
|
||||
local I `s(I)'
|
||||
if "`I'"=="" {
|
||||
* create row number
|
||||
local I _i
|
||||
cap drop `I'
|
||||
gen long `I'=_n
|
||||
lab var `I' "obs. number"
|
||||
}
|
||||
cap drop `J'
|
||||
gen int `J'=`j'
|
||||
lab var `J' "imputation number"
|
||||
save `"`fn`j''"', replace
|
||||
}
|
||||
use `"`fn1'"', clear
|
||||
forvalues j=2/`m' {
|
||||
append using `"`fn`j''"'
|
||||
}
|
||||
char _dta[mi_id] `I'
|
||||
}
|
||||
save `"`using'"', `replace'
|
||||
}
|
||||
else {
|
||||
* Save original data and imputations to tempfiles for mi_impute to stack
|
||||
* fn0,...,fn`m' are local macros created by mi_impute and supplied as s() functions;
|
||||
* they contain the actual names of tempfiles, hence the need for compound quotes.
|
||||
local original original
|
||||
forvalues i=0/`m' {
|
||||
if "`replace'"!="" cap drop `"`fn`i''"' // !! bug - should be erase not cap drop?
|
||||
_ice `varlist' `if' `in' [`weight' `exp'] using `"`fn`i''"', ///
|
||||
`options' `first' `original' mitools
|
||||
di as text `i' ".." _cont
|
||||
local original
|
||||
if `m'>0 local first
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
program define chkrowid, sclass
|
||||
version 8
|
||||
local I: char _dta[mi_id]
|
||||
if "`I'"=="" exit
|
||||
cap confirm var `I'
|
||||
if _rc exit
|
||||
sret local I `I'
|
||||
end
|
||||
|
||||
*! Based on _mvis2 version 1.0.2 PR 19jan2005.
|
||||
program define _ice, rclass
|
||||
version 8
|
||||
syntax varlist(min=2 numeric) [if] [in] [aw fw pw iw] using/, /*
|
||||
*/ [ BOot(varlist) CC(varlist) CMd(string) CYcles(int 10) noCONStant MAtch(varlist) /*
|
||||
*/ DRYrun EQ(string) first Genmiss(string) Id(string) mitools ON(varlist) original /*
|
||||
*/ PASsive(string) noSHoweq SUBstitute(string) TRace(string) ]
|
||||
|
||||
if "`original'"!="" {
|
||||
* Save original data
|
||||
quietly save `"`using'"', replace
|
||||
exit
|
||||
}
|
||||
|
||||
local nvar: word count `varlist'
|
||||
if "`id'"!="" {
|
||||
confirm new var `id'
|
||||
}
|
||||
else local id _i
|
||||
|
||||
preserve
|
||||
tempvar touse order
|
||||
quietly {
|
||||
marksample touse, novarlist
|
||||
if "`cc'`on'"!="" {
|
||||
markout `touse' `cc' `on'
|
||||
}
|
||||
|
||||
* Record sort order
|
||||
gen long `order'=_n
|
||||
lab var `order' "obs. number"
|
||||
|
||||
* For standard operation (no `on' list), disregard any completely missing rows in varlist, among marked obs
|
||||
if "`on'"=="" {
|
||||
tempvar rmis
|
||||
egen int `rmis'=rmiss(`varlist') if `touse'==1
|
||||
count if `rmis'==0
|
||||
replace `touse'=0 if `rmis'==`nvar'
|
||||
replace `rmis'=. if `rmis'==`nvar'
|
||||
lab var `rmis' "#missing values"
|
||||
if "`first'"!="" & "`showeq'"=="" noi tab `rmis', missing
|
||||
drop `rmis'
|
||||
}
|
||||
* Deal with weights
|
||||
frac_wgt `"`exp'"' `touse' `"`weight'"'
|
||||
local wgt `r(wgt)'
|
||||
|
||||
* Sort out cmds (not checking if each cmd is valid - any garbage may be entered)
|
||||
if "`cmd'"!="" {
|
||||
* local cmds "regress logistic logit ologit mlogit"
|
||||
detangle "`cmd'" cmd "`varlist'"
|
||||
forvalues i=1/`nvar' {
|
||||
if "${S_`i'}"!="" {
|
||||
local cmd`i' ${S_`i'}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
* Default for all uvis operations is nomatch, meaning draw
|
||||
if "`match'"!="" {
|
||||
tokenize `match'
|
||||
while "`1'"!="" {
|
||||
ChkIn `1' "`varlist'"
|
||||
if `s(k)'>0 {
|
||||
local match`s(k)' match
|
||||
}
|
||||
mac shift
|
||||
}
|
||||
}
|
||||
|
||||
if "`boot'"!="" {
|
||||
tokenize `boot'
|
||||
while "`1'"!="" {
|
||||
ChkIn `1' "`varlist'"
|
||||
if `s(k)'>0 {
|
||||
local boot`s(k)' boot
|
||||
}
|
||||
mac shift
|
||||
}
|
||||
}
|
||||
local anyerr 0
|
||||
if `"`passive'"'!="" {
|
||||
tempvar passmiss
|
||||
/*
|
||||
Defines vars that are functions or transformations of others in varlist.
|
||||
They are (may be) "passively imputed". "\" is an expression separator.
|
||||
Default is comma.
|
||||
Comma may not always be appropriate (i.e. may appear in an expression).
|
||||
*/
|
||||
detangle "`passive'" passive "`varlist'" \
|
||||
local haserr 0
|
||||
forvalues i=1/`nvar' {
|
||||
if "${S_`i'}"!="" {
|
||||
local exp`i' ${S_`i'}
|
||||
ParsExp `exp`i''
|
||||
local exclude `s(result)'
|
||||
if "`exclude'"!="" {
|
||||
* Count missingness of this passive variable
|
||||
egen int `passmiss'=rmiss(`exclude') if `touse'
|
||||
count if `passmiss'>0 & `touse'==1
|
||||
local nimp`i'=r(N)
|
||||
if `nimp`i''==0 {
|
||||
local v: word `i' of `varlist'
|
||||
noi di as err "passive definition `v' = (${S_`i'}) redundant: `exclude' has no missing data."
|
||||
local ++haserr
|
||||
}
|
||||
drop `passmiss'
|
||||
}
|
||||
}
|
||||
}
|
||||
if `haserr'>0 {
|
||||
di as err "`haserr' error(s) found in option " as inp "passive(`passive')"
|
||||
local anyerr 1
|
||||
}
|
||||
}
|
||||
if "`substitute'"!="" {
|
||||
* defines vars that are to be substituted in the recalc context
|
||||
detangle "`substitute'" substitute "`varlist'"
|
||||
local haserr 0
|
||||
forvalues i=1/`nvar' {
|
||||
if "${S_`i'}"!="" {
|
||||
local sub`i' ${S_`i'}
|
||||
local v: word `i' of `varlist'
|
||||
count if missing(`v') & `touse'==1
|
||||
if r(N)==0 {
|
||||
noi di as err "substitute for variable `v' redundant: `v' has no missing data."
|
||||
local ++haserr
|
||||
}
|
||||
}
|
||||
}
|
||||
if `haserr'>0 {
|
||||
noi di as err "`haserr' error(s) found in option " as inp "substitute(`substitute')"
|
||||
local anyerr 1
|
||||
}
|
||||
}
|
||||
if `"`eq'"'!="" {
|
||||
* defines equations specified vars.
|
||||
detangle "`eq'" equation "`varlist'"
|
||||
forvalues i=1/`nvar' {
|
||||
if "${S_`i'}"!="" {
|
||||
local Eq`i' ${S_`i'}
|
||||
* Check that eq vars are in mainvarlist
|
||||
tokenize `Eq`i''
|
||||
while "`1'"!="" {
|
||||
ChkIn `1' "`varlist'"
|
||||
mac shift
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if `anyerr' {
|
||||
di as err _n "specification error(s) found."
|
||||
exit 198
|
||||
}
|
||||
count if `touse'
|
||||
local n=r(N)
|
||||
/*
|
||||
Count potentially imputable missing values for each variable,
|
||||
and where necessary create an equation for each
|
||||
*/
|
||||
local to_imp 0 // actual number of vars with missing values to be imputed
|
||||
local recalc 0 // number of passively imputed vars to be recalculated
|
||||
tempvar xtmp // temporary holding area
|
||||
local nimp // list of number of missing values for each variable
|
||||
forvalues i=1/`nvar' {
|
||||
local xvar: word `i' of `varlist'
|
||||
if "`genmiss'"!="" {
|
||||
tempvar mvar`i'
|
||||
gen byte `mvar`i''=missing(`xvar') if `touse'==1
|
||||
lab var `mvar`i'' "1 if `xvar' missing, 0 otherwise"
|
||||
}
|
||||
local x`i' `xvar'
|
||||
count if missing(`xvar') & `touse'==1
|
||||
* Create prediction equation for each active variable
|
||||
if r(N)>0 & `"`exp`i''"'=="" {
|
||||
local nimp`i'=r(N)
|
||||
* active var: has missing obs, not passive
|
||||
local ++to_imp
|
||||
local main`i' 1
|
||||
* Keep missingness of the original variable
|
||||
tempvar miss`i'
|
||||
gen byte `miss`i''=missing(`xvar') if `touse'==1
|
||||
* Define equation for this variable - user definition from eq() takes precedence
|
||||
if "`Eq`i''"!="" {
|
||||
local eq`i' `Eq`i''
|
||||
}
|
||||
else {
|
||||
* Remove variable from mainvarlist
|
||||
local eq`i': list varlist - xvar
|
||||
}
|
||||
if "`cmd`i''"=="" {
|
||||
/*
|
||||
Assign default cmd for vars not so far accounted for.
|
||||
cmd is relevant only for vars requiring imputation, i.e. with >=1 missing values.
|
||||
Use logit if 2 distinct values, mlogit if 3-5, otherwise regress.
|
||||
*/
|
||||
inspect `xvar' if `touse'
|
||||
local nuniq=r(N_unique)
|
||||
if `nuniq'==1 {
|
||||
noi di as err "only 1 distinct value of `xvar' found"
|
||||
exit 2000
|
||||
}
|
||||
if `nuniq'==2 {
|
||||
count if `xvar'==0 & `touse'==1
|
||||
if r(N)==0 {
|
||||
noi di as err "variable `xvar' unsuitable for imputation,"
|
||||
noi di as err "binary variables must include at least one 0 and one non-missing value"
|
||||
exit 198
|
||||
}
|
||||
local cmd`i' logit
|
||||
}
|
||||
else if `nuniq'<=5 {
|
||||
local cmd`i' mlogit
|
||||
}
|
||||
else local cmd`i' regress
|
||||
}
|
||||
if "`cmd`i''"=="mlogit" {
|
||||
* With mlogit, if xvar carries a score label,
|
||||
* drop it since it causes prediction problems
|
||||
local xlab: value label `xvar'
|
||||
capture label drop `xlab'
|
||||
}
|
||||
if "`on'"=="" {
|
||||
* Initially fill missing obs cyclically with nonmissing obs
|
||||
sampmis `xtmp'=`xvar'
|
||||
replace `xvar'=cond(`touse'==0, ., `xtmp')
|
||||
drop `xtmp'
|
||||
}
|
||||
else replace `xvar'=. if `touse'==0
|
||||
local lab`i' `xvar' imput.`suffix' (`nimp`i'' values)
|
||||
}
|
||||
else {
|
||||
local main`i' 0
|
||||
if "`nimp`i''"=="" { // may have been set earlier by consideration of ParsExp
|
||||
local nimp`i'=r(N)
|
||||
}
|
||||
if `"`exp`i''"'!="" {
|
||||
if "`Eq`i''"!="" {
|
||||
noi di as err "equation" as input " `xvar':`Eq`i'' " ///
|
||||
as err "invalid, `xvar' is passively imputed"
|
||||
exit 198
|
||||
}
|
||||
local ++recalc
|
||||
}
|
||||
}
|
||||
local nimp `nimp' `nimp`i''
|
||||
}
|
||||
if `to_imp'==0 {
|
||||
noi di as err _n "All relevant cases are complete, no imputation required."
|
||||
return scalar N=`n'
|
||||
return scalar imputed=0
|
||||
exit 2000
|
||||
}
|
||||
* Remove passivevars from equations as necessary
|
||||
forvalues i=1/`nvar' {
|
||||
if `"`exp`i''"'!="" {
|
||||
ParsExp `exp`i''
|
||||
local exclude `s(result)'
|
||||
* remove current passivevar from each relevant equation
|
||||
local passive `x`i''
|
||||
tokenize `exclude'
|
||||
while "`1'"!="" {
|
||||
* identify which variable in mainvarlist we are looking at
|
||||
ChkIn `1' "`varlist'"
|
||||
local index `s(k)'
|
||||
* Remove `passive' from equation of variable
|
||||
* whose index in mainvarlist is `index'
|
||||
* (only allowed to be done if there is no
|
||||
* user equation Eq`' for var #`index')
|
||||
if "`eq`index''"!="" & "`Eq`index''"=="" {
|
||||
local eq`index': list eq`index' - passive
|
||||
}
|
||||
mac shift
|
||||
}
|
||||
}
|
||||
}
|
||||
if "`substitute'"!="" {
|
||||
forvalues i=1/`nvar' {
|
||||
if `main`i'' & "`sub`i''"!="" {
|
||||
* substitute for this variable in all equations where it is a covariate
|
||||
forvalues j=1/`nvar' {
|
||||
if `main`j'' & (`j'!=`i') & "`Eq`j''"=="" {
|
||||
local res: list eq`j' - x`i'
|
||||
* substitute sub`i' if necessary i.e. if not already there
|
||||
tokenize `sub`i''
|
||||
while "`1'"!="" {
|
||||
cap ChkIn `1' "`res'"
|
||||
if "`s(k)'"=="0" {
|
||||
local res `res' `1'
|
||||
}
|
||||
mac shift
|
||||
}
|
||||
local eq`j' `res'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
* Show prediction equations at first imputation
|
||||
if "`first'"!="" {
|
||||
local errs 0
|
||||
local longstring 55 // max display length of variables in equation
|
||||
local off 13 // blanks to col 13 on continuation lines
|
||||
if "`showeq'"=="" {
|
||||
noi di as text _n " Variable {c |} Command {c |} Prediction equation" _n ///
|
||||
"{hline 12}{c +}{hline 9}{c +}{hline `longstring'}"
|
||||
}
|
||||
forvalues i=1/`nvar' {
|
||||
if "`exp`i''"!="" & `nimp`i''>0 {
|
||||
local eq "[Passively imputed from `exp`i'']"
|
||||
local formatoutput 0
|
||||
}
|
||||
else if "`eq`i''"=="" {
|
||||
local eq "[No missing data in estimation sample]"
|
||||
local formatoutput 0
|
||||
}
|
||||
else {
|
||||
local eq `eq`i''
|
||||
local formatoutput 1
|
||||
}
|
||||
if "`showeq'"=="" {
|
||||
if `formatoutput' {
|
||||
formatline, n(`eq') maxlen(`longstring')
|
||||
local nlines=r(lines)
|
||||
forvalues j=1/`nlines' {
|
||||
if `j'==1 noi di as text %11s abbrev("`x`i''",11) ///
|
||||
" {c |} " %-8s "`cmd`i''" "{c |} `r(line`j')'"
|
||||
else noi di as text _col(`off') ///
|
||||
"{c |}" _col(23) "{c |} `r(line`j')'"
|
||||
}
|
||||
}
|
||||
else noi di as text %11s abbrev("`x`i''",11) ///
|
||||
" {c |} " %-8s "`cmd`i''" "{c |} `eq'"
|
||||
}
|
||||
// Check for invalid equation - xvar on both sides
|
||||
if "`eq`i''"!="" {
|
||||
if `: list x`i' in eq`i'' {
|
||||
noi di as err "Error!" as inp " `x`i''" ///
|
||||
as err " found on both sides of prediction equation"
|
||||
local ++errs
|
||||
}
|
||||
}
|
||||
}
|
||||
if `errs' {
|
||||
di as err _n `errs' " error(s) found. Consider using the passive() option to fix the problem"
|
||||
exit 198
|
||||
}
|
||||
if "`dryrun'"!="" {
|
||||
exit
|
||||
}
|
||||
noi di as text _n "Imputing " _cont
|
||||
}
|
||||
if `to_imp'==1 | "`on'"!="" {
|
||||
local cycles 1
|
||||
}
|
||||
* Update recalculated variables
|
||||
if `"`passive'"'!="" & `recalc'>0 {
|
||||
forvalues i=1/`nvar' {
|
||||
if "`exp`i''"!="" {
|
||||
replace `x`i''=`exp`i''
|
||||
}
|
||||
}
|
||||
}
|
||||
* Impute sequentially `cycles' times by regression switching (van Buuren et al)
|
||||
tempvar y imputed
|
||||
* Sort variables on number of missing values, from low to high numbers.
|
||||
* Of benefit to the mice algorithm since less missings get imputed first.
|
||||
listsort "`nimp'"
|
||||
forvalues i=1/`nvar' {
|
||||
local r`i' `s(index`i')'
|
||||
}
|
||||
if `"`trace'"'!="" {
|
||||
tempname tmp
|
||||
* create names
|
||||
local postvl cycle
|
||||
forvalues r=1/`nvar' {
|
||||
local i `r`r'' // antirank: vars with small #missing come first
|
||||
if `main`i'' local postvl `postvl' `x`i''_mean
|
||||
}
|
||||
postfile `tmp' `postvl' using `"`trace'"', replace
|
||||
}
|
||||
forvalues j=1/`cycles' {
|
||||
if `"`trace'"'!="" local posts (`j')
|
||||
forvalues r=1/`nvar' {
|
||||
local i `r`r'' // antirank, ensuring vars with small #missing come first
|
||||
if `main`i'' {
|
||||
* Each var is reimputed based on imputed values of other vars
|
||||
local type: type `x`i''
|
||||
gen `type' `y'=`x`i'' if `miss`i''==0 & `touse'==1
|
||||
if "`on'"=="" {
|
||||
local vars `eq`i''
|
||||
}
|
||||
else local vars `on'
|
||||
* uvis is derived from uvisamp4.ado
|
||||
cap uvis `cmd`i'' `y' `vars' `wgt' if `touse', ///
|
||||
gen(`imputed') `boot`i'' `match`i'' `constant'
|
||||
if _rc {
|
||||
noi di as err _n(2) "Error running -uvis-"
|
||||
noi di as err "I detected a problem with running uvis with command `cmd`i'' on response `x`i''"
|
||||
noi di as err "and covariates `vars'."
|
||||
if "`cmd`i''"=="mlogit" {
|
||||
noi di as inp "The troublesome regression command is mlogit."
|
||||
noi di as inp "Try reducing the number of categories of `x`i'' or using ologit if appropriate"
|
||||
}
|
||||
exit 198
|
||||
}
|
||||
if `"`trace'"'!="" {
|
||||
summarize `imputed' if missing(`y') & `touse'==1
|
||||
local mean=r(mean)
|
||||
local posts `posts' (`mean')
|
||||
/*
|
||||
noi di as text %11s abbrev("`x`i''",10) %7.0g `mean' _cont
|
||||
foreach v of var `vars' {
|
||||
if "`v'"=="`x`i''" {
|
||||
noi di as result " ." _cont
|
||||
}
|
||||
else noi di as result _skip(1) %7.0g _b[`v'] _cont
|
||||
}
|
||||
noi di
|
||||
*/
|
||||
}
|
||||
replace `x`i''=`imputed'
|
||||
drop `y' `imputed'
|
||||
}
|
||||
}
|
||||
if `"`trace'"'!="" post `tmp' `posts'
|
||||
if `recalc'>0 { // update covariates needing recalculation
|
||||
forvalues i=1/`nvar' {
|
||||
if "`exp`i''"!="" & `nimp`i''>0 {
|
||||
replace `x`i''=`exp`i''
|
||||
}
|
||||
}
|
||||
}
|
||||
if `to_imp'==1 & "`first'"!="" {
|
||||
noi di as text _n "[Only 1 variable to be imputed, therefore no cycling needed.]"
|
||||
}
|
||||
}
|
||||
}
|
||||
if `"`trace'"'!="" postclose `tmp'
|
||||
* Save to file with cases in original order
|
||||
quietly {
|
||||
local impvl /* list of newvars containing imputations */
|
||||
sort `order'
|
||||
forvalues i=1/`nvar' {
|
||||
return scalar ni`i'=`nimp`i''
|
||||
if "`genmiss'"!="" {
|
||||
cap drop `genmiss'`x`i''
|
||||
rename `mvar`i'' `genmiss'`x`i''
|
||||
}
|
||||
if `main`i'' {
|
||||
local impvl `impvl' `x`i''
|
||||
lab var `x`i'' "`lab`i''"
|
||||
cap drop `miss`i''
|
||||
}
|
||||
}
|
||||
drop `touse'
|
||||
if "`mitools'"=="" {
|
||||
* Save list of imputed variables with imputations to char _dta[mi_ivar]
|
||||
char _dta[mi_ivar] `impvl'
|
||||
char _dta[mi_id] `id'
|
||||
rename `order' `id'
|
||||
return local impvl `impvl'
|
||||
return scalar imputed=`to_imp'
|
||||
}
|
||||
else drop `order'
|
||||
save `"`using'"', replace
|
||||
}
|
||||
end
|
||||
|
||||
*! v 1.0.0 PR 01Jun2001.
|
||||
program define sampmis
|
||||
version 7
|
||||
* Duplicates nonmissing obs of `exp' into missing ones, in random order.
|
||||
* This routine always reproduces the same sort order among the missings.
|
||||
* Note technique to avoid Stata creating arbitrary sort order for missing
|
||||
* observations of `exp'; affects entire reproducibility of mvi sampling.
|
||||
syntax newvarname =/exp
|
||||
quietly {
|
||||
tempvar u
|
||||
* Sort non-missing data at random, sort missing data systematically
|
||||
gen double `u'=cond(missing(`exp'), _n, uniform())
|
||||
sort `u'
|
||||
count if !missing(`exp')
|
||||
local nonmis=r(N)
|
||||
drop `u'
|
||||
local type: type `exp'
|
||||
gen `type' `varlist'=`exp'
|
||||
local blocks=int((_N-1)/`nonmis')
|
||||
forvalues i=1/`blocks' {
|
||||
local j=`nonmis'*`i'
|
||||
local j1=`j'+1
|
||||
local j2=min(`j'+`nonmis',_N)
|
||||
replace `varlist'=`exp'[_n-`j'] in `j1'/`j2'
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
program define ChkIn, sclass
|
||||
version 7
|
||||
* Returns s(k) = index # of target variable v in varlist, or 0 if not found.
|
||||
args v varlist
|
||||
sret clear
|
||||
local k: list posof "`v'" in varlist
|
||||
sret local k `k'
|
||||
if `s(k)'==0 {
|
||||
di as err "`v' is not a valid covariate"
|
||||
exit 198
|
||||
}
|
||||
end
|
||||
|
||||
*! version 1.0.0 PR 20dec2004.
|
||||
program define ParsExp, sclass
|
||||
version 8
|
||||
tokenize `*', parse(" +-/^()[]{}.*=<>!$%&|~`'")
|
||||
local vl
|
||||
while "`1'"!="" {
|
||||
cap confirm var `1'
|
||||
if _rc==0 {
|
||||
if index("`vl'", "`1'")==0 {
|
||||
local vl `vl' `1'
|
||||
}
|
||||
}
|
||||
mac shift
|
||||
}
|
||||
sreturn local result `vl'
|
||||
end
|
||||
|
||||
program define detangle
|
||||
version 8
|
||||
/*
|
||||
Disentangle varlist:string clusters---e.g. for DF.
|
||||
Returns values in $S_*.
|
||||
If `4' is null, `3' is assumed to contain rhs
|
||||
and lowest and highest value checking is disabled.
|
||||
Heavily based on frac_dis.ado, but "=" disallowed as separator
|
||||
and "\" allowed (for use by passive()).
|
||||
*/
|
||||
args target tname rhs separator
|
||||
if "`separator'"=="" {
|
||||
local separator ","
|
||||
}
|
||||
unab rhs:`rhs'
|
||||
local nx: word count `rhs'
|
||||
forvalues j=1/`nx' {
|
||||
local n`j': word `j' of `rhs'
|
||||
}
|
||||
tokenize "`target'", parse("`separator'")
|
||||
local ncl 0 /* # of separator-delimited clusters */
|
||||
while "`1'"!="" {
|
||||
if "`1'"=="`separator'" {
|
||||
mac shift
|
||||
}
|
||||
local ncl=`ncl'+1
|
||||
local clust`ncl' "`1'"
|
||||
mac shift
|
||||
}
|
||||
if "`clust`ncl''"=="" {
|
||||
local --ncl
|
||||
}
|
||||
if `ncl'>`nx' {
|
||||
di as err "too many `tname'() values specified"
|
||||
exit 198
|
||||
}
|
||||
/*
|
||||
Disentangle each varlist:string cluster
|
||||
*/
|
||||
forvalues i=1/`ncl' {
|
||||
tokenize "`clust`i''", parse(":")
|
||||
if "`2'"!=":" {
|
||||
if `i'>1 {
|
||||
noi di as err "invalid `clust`i'' in `tname'() (syntax error)"
|
||||
exit 198
|
||||
}
|
||||
local 2 ":"
|
||||
local 3 `1'
|
||||
local 1
|
||||
forvalues j=1/`nx' {
|
||||
local 1 `1' `n`j''
|
||||
}
|
||||
}
|
||||
local arg3 `3'
|
||||
unab arg1:`1'
|
||||
tokenize `arg1'
|
||||
while "`1'"!="" {
|
||||
ChkIn `1' "`rhs'"
|
||||
local v`s(k)' `arg3'
|
||||
mac shift
|
||||
}
|
||||
}
|
||||
forvalues j=1/`nx' {
|
||||
if "`v`j''"!="" {
|
||||
global S_`j' `v`j''
|
||||
}
|
||||
else global S_`j'
|
||||
}
|
||||
end
|
||||
|
||||
*! Based on artformatnos.ado v 1.0.0 PR 26Feb2004
|
||||
program define formatline, rclass
|
||||
version 8
|
||||
syntax, N(string) Maxlen(int) [ Format(string) Leading(int 1) Separator(string) ]
|
||||
|
||||
if `leading'<0 {
|
||||
di as err "invalid leading()"
|
||||
exit 198
|
||||
}
|
||||
|
||||
if "`separator'"!="" {
|
||||
tokenize "`n'", parse("`separator'")
|
||||
}
|
||||
else tokenize "`n'"
|
||||
|
||||
local n 0
|
||||
while "`1'"!="" {
|
||||
if "`1'"!="`separator'" {
|
||||
local ++n
|
||||
local n`n' `1'
|
||||
}
|
||||
macro shift
|
||||
}
|
||||
local j 0
|
||||
local length 0
|
||||
forvalues i=1/`n' {
|
||||
*noi di in red "format=`format' i=`i' item=`n`i''"
|
||||
if "`format'"!="" {
|
||||
capture local out: display `format' `n`i''
|
||||
if _rc {
|
||||
di as err "invalid format attempted for: " `"`n`i''"'
|
||||
exit 198
|
||||
}
|
||||
}
|
||||
else local out `n`i''
|
||||
if `leading'>0 {
|
||||
local out " `out'"
|
||||
}
|
||||
local l1=length("`out'")
|
||||
local l2=`length'+`l1'
|
||||
if `l2'>`maxlen' {
|
||||
local ++j
|
||||
return local line`j'="`line'"
|
||||
local line "`out'"
|
||||
local length `l1'
|
||||
}
|
||||
else {
|
||||
local length `l2'
|
||||
local line "`line'`out'"
|
||||
}
|
||||
}
|
||||
local ++j
|
||||
return local line`j'="`line'"
|
||||
return scalar lines=`j'
|
||||
end
|
||||
*! version 1.1.0 PR 02aug2005.
|
||||
program define listsort, sclass
|
||||
version 6
|
||||
gettoken p 0 : 0, parse(" ,")
|
||||
if `"`p'"'=="" {
|
||||
exit
|
||||
}
|
||||
sret clear
|
||||
syntax , [ Reverse Lexicographic ]
|
||||
local lex="`lexicog'"!=""
|
||||
if "`reverse'"!="" { local comp < }
|
||||
else local comp >
|
||||
local np: word count `p'
|
||||
local i 1
|
||||
while `i'<=`np' {
|
||||
local p`i': word `i' of `p'
|
||||
local index`i' `i'
|
||||
if !`lex' { confirm number `p`i'' }
|
||||
local i=`i'+1
|
||||
}
|
||||
* Apply shell sort (Kernighan & Ritchie p 58)
|
||||
local gap=int(`np'/2)
|
||||
while `gap'>0 {
|
||||
local i `gap'
|
||||
while `i'<`np' {
|
||||
local j=`i'-`gap'
|
||||
while `j'>=0 {
|
||||
local j1=`j'+1
|
||||
local j2=`j'+`gap'+1
|
||||
if `lex' { local swap=(`"`p`j1''"' `comp' `"`p`j2''"') }
|
||||
else local swap=(`p`j1'' `comp' `p`j2'')
|
||||
if `swap' {
|
||||
local temp `p`j1''
|
||||
local p`j1' `p`j2''
|
||||
local p`j2' `temp'
|
||||
* swap indexes
|
||||
local temp `index`j1''
|
||||
local index`j1' `index`j2''
|
||||
local index`j2' `temp'
|
||||
}
|
||||
local j=`j'-`gap'
|
||||
}
|
||||
local i=`i'+1
|
||||
}
|
||||
local gap=int(`gap'/2)
|
||||
}
|
||||
local p
|
||||
local index
|
||||
local i 1
|
||||
while `i'<=`np' {
|
||||
sret local i`i' `p`i''
|
||||
sret local index`i' `index`i''
|
||||
local p `p' `p`i''
|
||||
local index `index' `index`i''
|
||||
local i=`i'+1
|
||||
}
|
||||
/* Find antirank of each obs
|
||||
forvalues i=1/`np' {
|
||||
forvalues j=1/`np' {
|
||||
if
|
||||
*/
|
||||
sret local list `p'
|
||||
sret local index `index'
|
||||
end
|
||||
exit
|
||||
|
||||
sort `c'
|
||||
local i 0
|
||||
while `i'<`nx' {
|
||||
local i=`i'+1
|
||||
/*
|
||||
Store positions of sorted predictors in user's list
|
||||
*/
|
||||
local j 0
|
||||
while `j'<`nx' {
|
||||
local j=`j'+1
|
||||
if `i'==`n'[`j'] {
|
||||
local r`j' `i'
|
||||
local j `nx'
|
||||
}
|
||||
}
|
||||
}
|
562
Modules/ado/plus/i/ice.hlp
Normal file
562
Modules/ado/plus/i/ice.hlp
Normal file
@ -0,0 +1,562 @@
|
||||
{smcl}
|
||||
{* 30aug2005}{...}
|
||||
{hline}
|
||||
help for {hi:ice}, {hi:uvis}{right:(SJ5-4: st0067_2; SJ5-2: st0067_1; SJ4-3: st0067)}
|
||||
{hline}
|
||||
|
||||
{title:Multiple imputation by the MICE system of chained equations}
|
||||
|
||||
{p 8 17 2}
|
||||
{cmd:ice}
|
||||
{it:mainvarlist}
|
||||
{cmd:using} {it:filename}[{cmd:.dta}]
|
||||
{ifin}
|
||||
{weight}
|
||||
[{cmd:,}
|
||||
{cmdab:bo:ot}[{cmd:(}{it:varlist}{cmd:)}]
|
||||
{cmd:cc(}{it:varlist}{cmd:)}
|
||||
{cmdab:cm:d(}{it:cmdlist}{cmd:)}
|
||||
{cmdab:cy:cles(}{it:#}{cmd:)}
|
||||
{cmdab:dry:run}
|
||||
{cmd:eq(}{it:eqlist}{cmd:)}
|
||||
{cmdab:g:enmiss(}{it:string}{cmd:)}
|
||||
{cmdab:i:d(}{it:string}{cmd:)}
|
||||
{cmd:m(}{it:#}{cmd:)}
|
||||
{cmdab:ma:tch}[{cmd:(}{it:varlist}{cmd:)}]
|
||||
{cmdab:nocons:tant}
|
||||
{cmdab:nosh:oweq}
|
||||
{cmd:on(}{it:varlist}{cmd:)}
|
||||
{cmdab:pass:ive(}{it:passivelist}{cmd:)}
|
||||
{cmdab:sub:stitute(}{it:sublist}{cmd:)}
|
||||
{cmd:replace}
|
||||
{cmdab:se:ed(}{it:#}{cmd:)}
|
||||
{cmdab:tr:ace(}{it:filename}{cmd:)}]
|
||||
|
||||
{p 8 17 2}
|
||||
{cmd:uvis}
|
||||
{it:regression_cmd}
|
||||
{it:yvar}
|
||||
{it:xvarlist}
|
||||
{ifin}
|
||||
{weight}
|
||||
{cmd:,}
|
||||
{cmdab:g:en(}{it:newvarname}{cmd:)}
|
||||
[{cmdab:bo:ot}
|
||||
{cmdab:ma:tch}
|
||||
{cmdab:nocons:tant}
|
||||
{cmd:replace}
|
||||
{cmdab:se:ed(}{it:#}{cmd:)}]
|
||||
|
||||
{p 4 4 2}
|
||||
where
|
||||
|
||||
{p 8 8 2}
|
||||
{it:regression_cmd} may be
|
||||
{helpb logistic},
|
||||
{helpb logit},
|
||||
{helpb mlogit},
|
||||
{helpb ologit},
|
||||
or
|
||||
{helpb regress}.
|
||||
|
||||
{p 4 4 2}
|
||||
All weight types supported by {it:regression_cmd} are allowed; see {help weight}.
|
||||
|
||||
|
||||
{title:Description}
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:ice} imputes missing values
|
||||
in {it:mainvarlist} by using switching regression, an iterative multivariable
|
||||
regression technique. The abbreviation MICE means multiple imputation by
|
||||
chained equations and was apparently coined by Steff van Buuren. {cmd:ice}
|
||||
implements MICE for Stata. Sets of imputed and nonimputed variables are
|
||||
stored to a new file called {it:filename}. Any number of complete imputations
|
||||
may be created.
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:uvis} (univariate imputation sampling) imputes missing values in the
|
||||
single variable {it:yvar} based on multiple regression on {it:xvarlist}.
|
||||
{cmd:uvis} is called repeatedly by {cmd:ice} in a regression switching mode to
|
||||
perform multivariate imputation.
|
||||
|
||||
{p 4 4 2}
|
||||
The missing observations are assumed to be missing at random (MAR) or
|
||||
missing completely at random (MCAR), according to the jargon. See, for
|
||||
example, van Buuren et al. (1999) for an explanation of these concepts.
|
||||
|
||||
{p 4 4 2}
|
||||
Please note that {cmd:ice} and {cmd:uvis} require Stata 8 or later.
|
||||
There have been incompatibility issues with Stata 7 and earlier.
|
||||
|
||||
|
||||
{title:Options for ice}
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:boot}[{cmd:(}{it:varlist}{cmd:)}] instructs that each member of
|
||||
{it:varlist}, a subset of {it:mainvarlist}, be imputed with the {cmd:boot}
|
||||
option of {cmd:uvis} activated. If {cmd:(}{it:varlist}{cmd:)} is omitted,
|
||||
all members of {it:mainvarlist} with missing observations are imputed using
|
||||
the {cmd:boot} option of {cmd:uvis}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:cc(}{it:varlist}{cmd:)} prevents imputation of missing data in
|
||||
{it:mainvarlist} for cases in which any member of {it:varlist} has a missing
|
||||
value. "cc" signifies "complete case". Note that members of {it:varlist} are
|
||||
used for imputation if they appear in {it:mainvarlist}, but not otherwise. Use
|
||||
of this option is equivalent to entering {cmd:if}
|
||||
{cmd:~missing(}{it:var1}{cmd:) &} {cmd:~missing(}{it:var2}{cmd:)} ..., where
|
||||
{it:var1}, {it:var2}, ... denote the members of {it:varlist}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:cmd(}{it:cmdlist}{cmd:)} defines the regression commands to be used for
|
||||
each variable in {it:mainvarlist}, when it becomes the dependent variable in
|
||||
the switching regression procedure used by {cmd:uvis} (see {hi:Remarks}). The
|
||||
first item in {it:cmdlist} may be a command, such as {cmd:regress}, or may have
|
||||
the syntax {it:varlist}{cmd::}{it:cmd}, specifying that command {it:cmd}
|
||||
applies to all the variables in {it:varlist}. Subsequent items in
|
||||
{it:cmdlist} must follow the latter syntax, and each item should be followed
|
||||
by a comma.
|
||||
|
||||
{p 8 8 2}
|
||||
The default {it:cmd} for a variable is {cmd:logit} when there are two distinct
|
||||
values, {cmd:mlogit} when there are 3-5 and {cmd:regress} otherwise.
|
||||
|
||||
{p 8 18 2} Example: {cmd:cmd(regress)} specifies that all variables are
|
||||
to be imputed by {cmd:regress}, overriding the defaults.
|
||||
|
||||
{p 8 18 2} Example: {cmd:cmd(x1 x2:logit, x3:regress)} specifies that
|
||||
{cmd:x1} and {cmd:x2} are to be imputed by {cmd:logit}, {cmd:x3} by
|
||||
{cmd:regress} and all others by their default choices.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:cycles(}{it:#}{cmd:)} determines the number of cycles of regression
|
||||
switching to be carried out. The default is {cmd:cycles(10)}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:dryrun} does a "dry run"; that is, {cmd:ice}
|
||||
reports the prediction equations it has constructed from the various
|
||||
inputs. No imputation is done, and no files are created. It is not
|
||||
mandatory to specify an output file with {cmd:using} for a dry run.
|
||||
Sometimes the prediction equation set-up needs to be carefully
|
||||
checked before running what may be a lengthy imputation process.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:eq(}{it:eqlist}{cmd:)} allows one to define customized prediction
|
||||
equations for any subset of variables in {it:mainvarlist}. The option,
|
||||
particularly when used with {cmd:passive()}, allows
|
||||
great flexibility in the possible imputation schemes. The
|
||||
syntax of {it:eqlist} is {it:varname1}{cmd::}{it:varlist1}
|
||||
[{cmd:,}{it:varname2}{cmd::}{it:varlist2} ...], where each
|
||||
{it:varname#} (or {it:varlist#})
|
||||
is a member (or subset) of {it:mainvarlist}. It is your responsibility to ensure
|
||||
that each equation is sensible. {cmd:ice} places no restrictions
|
||||
except to check that all variables mentioned are indeed in
|
||||
{it:mainvarlist} and that an equation is not defined
|
||||
for a variable specified to be passively imputed
|
||||
(see the {cmd:passive()} option. Note that {cmd:eq()} takes
|
||||
precedence over all default definitions and assumptions about
|
||||
the way a given variable in {cmd:mainvarlist} will be imputed.
|
||||
The default, if the {cmd:passive()} and {cmd:substitute()}
|
||||
options are not invoked, is that each
|
||||
variable in {it:mainvarlist} with any missing data is imputed from all
|
||||
the other variables in {it:mainvarlist}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:genmiss(}{it:string}{cmd:)} creates an indicator variable for the
|
||||
missingness of data in any variable in {it:mainvarlist} for which at least one
|
||||
value has been imputed. The indicator variable is set to missing for
|
||||
observations excluded by {cmd:if}, {cmd:in}, etc. The indicator variable for
|
||||
{it:xvar} is named {it:string}{it:xvar}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:id(}{it:string}{cmd:)} creates a variable called {it:string} containing
|
||||
the original sort order of the data. The default {it:string} is {cmd:_i}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:m(}{it:#}{cmd:)} defines {it:#} as the number of imputations required
|
||||
(minimum 1, no upper limit). The default is {cmd:m(1)}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:match}[{cmd:(}{it:varlist}{cmd:)}] instructs that each member of
|
||||
{it:varlist} be imputed with the {cmd:match} option of {cmd:uvis}.
|
||||
This provides prediction matching for each member of {it:varlist}.
|
||||
If {cmd:(}{it:varlist}{cmd:)} is omitted then all relevant variables are
|
||||
imputed with the {cmd:match} option of {cmd:uvis}. The default, if
|
||||
{cmd:match()} is not specified, is to draw from the posterior
|
||||
predictive distribution of each variable requiring imputation.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:noconstant} suppresses the regression constant in all regressions.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:noshoweq} suppresses the presentation of the prediction equations.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:on(}{it:varlist}{cmd:)} changes the operation of {cmd:ice} in a major
|
||||
way. With this option, {cmd:uvis} imputes each member of {it:mainvarlist}
|
||||
univariately on {it:varlist}. This provides a convenient way of producing
|
||||
multiple imputations when imputation for each variable in {it:mainvarlist} is
|
||||
to be done univariately on a set of complete predictors.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:passive(}{it:passivelist}{cmd:)} allows the use of "passive" imputation
|
||||
of variables that depend on other variables, some of which are imputed.
|
||||
The syntax of {it:passivelist} is {it:varname}{cmd::}{it:exp}
|
||||
[{cmd:\}{it:varname}{cmd::}{it:exp} ...]. Notice the requirement to use
|
||||
"\" as a separator between items in {it:passivelist}, rather than the usual comma;
|
||||
the reason is that a comma may be a valid part of an expression.
|
||||
The option is most easily explained by example. Suppose x1 is a categorical variable
|
||||
with 3 levels, and that two dummy variables x1a, x1b have been created by the commands
|
||||
|
||||
{p 8 8 2}
|
||||
{cmd:. generate byte x1a=(x1==2)}{break}
|
||||
{cmd:. generate byte x1b=(x1==3)}
|
||||
|
||||
{p 8 8 2}
|
||||
Now suppose that x1 is to be imputed by the {cmd:mlogit} command and is
|
||||
to be treated as the two dummy variables x1a and x1b when predicting other
|
||||
variables. Use of {cmd:mlogit} is achieved by the option
|
||||
{cmd:cmd(x1:mlogit)}. When x1 is imputed, we want x1a and x1b to be updated
|
||||
with new values which depend on the imputed values of x1. This may be
|
||||
achieved by specifying {cmd:passive(x1a:x1==2 \ x1b:x1==3)}. It is necessary
|
||||
also to remove x1 from the list of predictors when variables other than x1 are
|
||||
being imputed, and this is done by using the {cmd:substitute()} option; in the
|
||||
present example, you would specify {cmd:substitute(x1:x1a x1b)}.
|
||||
|
||||
{p 8 8 2}
|
||||
Note that although in this example x1a will take the (possibly
|
||||
unintended) value of 0 when x1 is missing, {cmd:ice} is careful to
|
||||
ensure that x1a (and x1b) inherit the missingness of x1 and are
|
||||
passively imputed following active imputation of missing values
|
||||
of x1. If this were not done, incorrect results could occur. The
|
||||
responsibility of the user is to create x1a and x1b before running
|
||||
{cmd:ice} such that their missing values are identical
|
||||
to those of x1.
|
||||
|
||||
{p 8 8 2}
|
||||
A second example is multiplicative interactions between variables, for
|
||||
example, between x1 and x2 (e.g., x12=x1*x2); this could be entered as
|
||||
{cmd:passive(x12:x1*x2)}. It would cause the interaction term
|
||||
x12 to be omitted when either x1 or x2 was being imputed, since it would
|
||||
make no sense to impute x1 from its interaction with x2.
|
||||
{cmd:substitute()} is not needed here.
|
||||
|
||||
{p 8 8 2}
|
||||
It should be stressed that variables to be imputed passively must already
|
||||
exist and must be included in {it:mainvarlist}; otherwise, they will not be
|
||||
recognized.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:substitute(}{it:sublist}{cmd:)} is typically used with the
|
||||
{cmd:passive()} option to represent multilevel categorical variables
|
||||
as dummy variables in models for predicting other variables. See
|
||||
{cmd:passive()} for more details. The syntax of {it:sublist} is
|
||||
{it:varname}{cmd::}{it:dummyvarlist}
|
||||
[{cmd:,}{it:varname}{cmd::}{it:dummyvarlist} ...], where {it:varname} is the
|
||||
name of a variable to be substituted and {it:dummyvarlist} is the list of
|
||||
dummy variables representing it.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:replace} permits {it:filename} to be overwritten with new data.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:seed(}{it:#}{cmd:)} sets the random-number seed to {it:#}.
|
||||
To reproduce a set of imputations, the same random-number seed should be used.
|
||||
The default is {cmd:seed(0)}, meaning no seed is set by the program.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:trace(}{it:filename}{cmd:)} monitors the convergence of the imputation
|
||||
algorithm. For each original variable with missing values, the mean of the
|
||||
imputed values is stored as a variable in {it:filename}, together
|
||||
with the cycle number at which that
|
||||
mean was calculated. The results are stored only for the final imputation.
|
||||
For diagnostic purposes, it is sensible to run {cmd:trace()}
|
||||
with {cmd:m(1)} and many cycles, such as {cmd:cycles(100)}.
|
||||
When the run is complete, it is helpful to load {it:filename}
|
||||
into memory and plot the mean for each imputed
|
||||
variable against the cycle number. If necessary, smoothing may be applied
|
||||
to clarify any apparent pattern. Convergence is judged to have occurred
|
||||
when the pattern of the imputed means is random.
|
||||
The number of cycles needed for convergence is usually obvious from the appearance
|
||||
of the plot.
|
||||
|
||||
|
||||
{title:Options for uvis}
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:gen(}{it:newvar}{cmd:)} is not optional. {it:newvar} contains original
|
||||
(nonmissing) and imputed (originally missing) values of {it:yvar}.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:boot} invokes a bootstrap method for creating imputed values (see Remarks).
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:match} creates imputations by prediction matching. The default is to draw
|
||||
imputations at random from the posterior distribution of the missing values of
|
||||
{it:yvar}, conditional on the observed values and the members of
|
||||
{it:xvarlist}. See Remarks for further details.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:noconstant} suppresses the regression constant in all regressions.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:replace} permits {it:newvar} (see {cmd:gen(}{it:newvar}{cmd:)})
|
||||
to be overwritten with new data. {cmd:replace} may not be abbreviated.
|
||||
|
||||
{p 4 8 2}
|
||||
{cmd:seed(}{it:#}{cmd:)} sets the random-number seed to {it:#}.
|
||||
See {hi:Remarks} for comments on how to ensure reproducible imputations
|
||||
by using the {cmd:seed()} option.
|
||||
The default is {cmd:seed(0)}, meaning no seed is set by the program.
|
||||
|
||||
|
||||
{title:Remarks}
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:uvis} imputes {it:yvar} from {it:xvarlist} according to the following
|
||||
algorithm (see van Buuren et al. (1999, section 3.2) for further technical
|
||||
details):
|
||||
|
||||
{p 8 12 2}
|
||||
1. Estimate the vector of coefficients (beta) and the residual variance
|
||||
by regressing the nonmissing values of {it:yvar} on the current "completed"
|
||||
version of {it:xvarlist}. Predict the fitted values {it:etaobs} at the
|
||||
nonmissing observations of {it:yvar}.
|
||||
|
||||
{p 8 12 2}
|
||||
2. Draw at random a value (sigma_star) from the posterior distribution of the
|
||||
residual standard deviation.
|
||||
|
||||
{p 8 12 2}
|
||||
3. Draw at random a value (beta_star) from the posterior distribution of beta,
|
||||
allowing, through sigma_star, for uncertainty in beta.
|
||||
|
||||
{p 8 12 2}
|
||||
4. Use beta_star to predict the fitted values {it:etamis}
|
||||
at the missing observations of {it:yvar}.
|
||||
|
||||
{p 8 12 2}
|
||||
5. The imputed values are predicted directly from beta_star, sigma_star and
|
||||
the covariates. When imputation is by linear regression ({cmd:regress}
|
||||
command), this step assumes that {it:yvar} is Normally distributed, given the
|
||||
covariates. For other types of imputation, samples are drawn from the
|
||||
appropriate distribution.
|
||||
|
||||
{p 4 4 2}
|
||||
With the {cmd:match} option, step 5 is replaced by the following.
|
||||
For each missing observation of {it:yvar} with prediction {it:etamis},
|
||||
find the non-missing observation of {it:yvar} whose prediction
|
||||
({it:etaobs}) on observed data is closest to {it:etamis}. This closest
|
||||
non-missing observation is used to impute the missing value of {it:yvar}.
|
||||
|
||||
{p 4 4 2}
|
||||
The default draw method is not robust to departures from Normality and
|
||||
may produce implausible imputations. For example, if the original distribution
|
||||
is skew and positive-valued, the imputed distribution will not necessarily
|
||||
have the appropriate amount of skewness, nor will all the imputed values
|
||||
necessarily be positive. Log transformation of positive variables may greatly
|
||||
improve the appropriateness of the imputations.
|
||||
|
||||
{p 4 4 2}
|
||||
The alternative {cmd:match} method is recommended only for continuous variables
|
||||
when the Normality assumption is clearly untenable, even approximately.
|
||||
It is not necessary, nor is it recommended, for binary, ordered categorical or
|
||||
nominal variables. {cmd:match} may work well when the distribution of a
|
||||
continuous variable is very non-Normal, but it may sometimes result in biased
|
||||
imputations.
|
||||
|
||||
{p 4 4 2}
|
||||
With the {cmd:boot} option, steps 2-4 are replaced by a bootstrap estimation of
|
||||
beta_star; beta_star
|
||||
is estimated by regressing {it:yvar} on {it:xvarlist} after taking a bootstrap sample
|
||||
of the non-missing observations. This has the advantage of robustness since the
|
||||
distribution of beta is no longer assumed to be multivariate normal.
|
||||
|
||||
{p 4 4 2}
|
||||
Note that {cmd:uvis} will not impute observations for which a value
|
||||
of a variable in {it:xvarlist} is missing. However, all original
|
||||
(missing or nonmissing) observations of {it:yvar} will be copied into
|
||||
{it:newvarname} in such cases. This is a change from the first release of
|
||||
{cmd:uvis} (with {cmd:mvis}). Previously, {it:newvarname} would be set to
|
||||
missing whenever a value of a variable in {it:xvarlist} was missing,
|
||||
irrespective of the value of {it:yvar}.
|
||||
|
||||
{p 4 4 2}
|
||||
Missing data for ordered (or unordered) categorical covariates should
|
||||
be imputed by using the {cmd:ologit} (or {cmd:mlogit}) command. In these cases,
|
||||
prediction matching is done on the scale of the mean absolute difference
|
||||
in the predicted class probabilities, preceded by logit transformation.
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:ice} carries out multivariate imputation in {it:mainvarlist} using
|
||||
regression switching (van Buuren et al. 1999) as follows:
|
||||
|
||||
{p 8 12 2}
|
||||
1. Ignore any observations for which {it:mainvarlist} has only missing values,
|
||||
or if the {cmd:ccvarlist(}{it:varlist}{cmd:)} option has been specified, for
|
||||
which any member of {it:varlist} has a missing value.
|
||||
|
||||
{p 8 12 2}
|
||||
2. For each variable in {it:mainvarlist} with any missing data, randomly order
|
||||
that variable and replicate the observed values across the missing cases.
|
||||
This step initializes the iterative procedure by ensuing that no relevant
|
||||
values are missing.
|
||||
|
||||
{p 8 12 2}
|
||||
3. For each variable in {it:mainvarlist} in turn, impute missing values by
|
||||
applying {cmd:uvis} with the remaining variables as covariates.
|
||||
|
||||
{p 8 12 2}
|
||||
4. Repeat step 3 {cmd:cycles()} times, replacing the imputed values with updated
|
||||
values at the end of each cycle.
|
||||
|
||||
{p 4 4 2}
|
||||
A single imputation sample is created for each variable with any relevant
|
||||
missing values.
|
||||
|
||||
{p 4 4 2}
|
||||
Van Buuren recommends {cmd:cycles(20)} but goes on to say that 10 or even 5
|
||||
iterations are probably sufficient. We have chosen a compromise default of 10.
|
||||
|
||||
{p 4 4 2}
|
||||
"Multiple imputation" (MI) implies the creation and analysis of several
|
||||
imputed datasets. To do this, one would run {cmd:ice} with {it:m} set
|
||||
to a suitable number, for example 5. To obtain final estimates
|
||||
of the parameters of interest and their standard errors,
|
||||
one would fit a model in
|
||||
each imputation and carry out the appropriate post-MI averaging procedure
|
||||
on the results from the {it:m} separate imputations. A suitable
|
||||
estimation tool for this purpose is {helpb micombine}.
|
||||
|
||||
{title:Handling categorical variables}
|
||||
|
||||
{p 4 4 2}
|
||||
Binary variables present no difficulty: by default, in the MICE
|
||||
procedure, when such a variable is the response, it is
|
||||
predicted from other variables by using logistic regression;
|
||||
when it is a covariate, it is modeled in the only way possible,
|
||||
effectively as a single dummy variable. Categorical variables with 3 or
|
||||
more levels may in principle be treated in different ways.
|
||||
By default, in {cmd:ice} variables with 3-5 levels are modeled
|
||||
using multinomial logistic regression ({cmd:mlogit} command) when
|
||||
the response, and as a single linear term when a covariate. The
|
||||
same behavior occurs with the ordered logistic model ({cmd:ologit}
|
||||
command), requested via the {cmd:cmd()} option. The use of dummy variables
|
||||
instead of a single linear term may be imposed as described under
|
||||
the {cmd:passive()} option. The requisite dummy variables
|
||||
must be created before {cmd:ice} is invoked. Variables with 6 or
|
||||
more levels are treated as ordered and continuous, but again
|
||||
different choices may be imposed by use of the {cmd:cmd()},
|
||||
{cmd:passive()} and {cmd:substitute()} options.
|
||||
|
||||
{p 4 4 2}
|
||||
You should be aware that
|
||||
unless the dataset is large, use of the {cmd:mlogit} command may produce
|
||||
unstable estimates if the number of levels is too large, and
|
||||
may compromise the accuracy of the imputations. It is hard to
|
||||
predict when this will occur.
|
||||
|
||||
{p 4 4 2}
|
||||
Note that due to a peculiarity of the way the {cmd:mlogit} command works,
|
||||
variables with score labels cause problems to {cmd:ice}
|
||||
and {cmd:uvis} when missing data are imputed using {cmd:mlogit}.
|
||||
Score labels for such variables are removed in the file of imputed
|
||||
data. See also the related comment on {hi:Postestimation prediction} in
|
||||
{helpb micombine}.
|
||||
|
||||
|
||||
{title:Further notes}
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:ice} determines the order of imputing variables in the round
|
||||
of chained equations according to the amount of missing data.
|
||||
Variables with the least missingness are imputed first.
|
||||
|
||||
{p 4 4 2}
|
||||
An important application of MI is to investigate possible models, for example
|
||||
prognostic models, in which selection of influential variables is required
|
||||
(Clark and Altman 2003). For example, the stability of the final model across
|
||||
the imputation samples is of interest. This area of inquiry is in its infancy.
|
||||
|
||||
{p 4 4 2}
|
||||
In survival analysis, it is recommended to include the censoring indicator
|
||||
and the log of the survival time in the variables to be used for imputation.
|
||||
Van Buuren et al. (1999) give a detailed discussion of the different types
|
||||
of covariate that can be included in the imputation model and discuss the
|
||||
important issue of how to deal with variables which are missing completely at
|
||||
random (MCAR), missing at random (MAR), and missing not at random (MNAR).
|
||||
|
||||
{p 4 4 2}
|
||||
See also Van Buuren's web site http://www.multiple-imputation.com for further
|
||||
information and software sources.
|
||||
|
||||
|
||||
{title:Examples}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. uvis regress y x1 x2 x3, gen(ym)}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. ice x1 x2 x3 using imputed, m(5)}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. ice x1 x2 x3 using imputed, m(5) cycles(20) cc(x4 x5)}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. ice x1-x5 using imputed, m(10) boot match(x1 x2 x3) cmd(x1 x2:mlogit, x3:ologit) id(pid) seed(101) genmiss(m_)}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. ice x1 x1a x1b x2 x3 x23 using imputed, m(5) cmd(x1:ologit) passive(x1a:x1==2 \x1b:x1==3 \x23=x2*x3) substitute(x1:x1a x1b)}
|
||||
|
||||
{p 4 10 2}
|
||||
{cmd:. ice y1 y2 y3 x1 x2 x3 x4 using imputed, m(5) eq(y1:x1 x2 y2, y2:y1 x3 x4, y3:y1 y2) match(y3)}
|
||||
|
||||
|
||||
{title:Acknowledgement}
|
||||
|
||||
{p 4 4 2}
|
||||
I am grateful to Gillian Raab for pointing out certain issues with the prediction
|
||||
matching approach, particularly that it is only useful with continuous variables.
|
||||
As a result, the default imputation method has been
|
||||
changed from matching to drawing from the predictive distribution. Gillian also
|
||||
suggested imputing the variables in reverse order of the amount of missingness,
|
||||
and selecting the imputed value at random from the set determined by the available
|
||||
matching predictions. Both suggestions have been implemented in this software update.
|
||||
|
||||
|
||||
{title:Author}
|
||||
|
||||
{p 4 4 2}
|
||||
Patrick Royston, MRC Clinical Trials Unit, London.{break}
|
||||
patrick.royston@ctu.mrc.ac.uk
|
||||
|
||||
|
||||
{title:References}
|
||||
|
||||
{p 4 8 2}
|
||||
van Buuren S., H. C. Boshuizen and D. L. Knook. 1999. Multiple imputation of
|
||||
missing blood pressure covariates in survival analysis.
|
||||
{it:Statistics in Medicine} {cmd:18}:681-694.
|
||||
Also see http://www.multiple-imputation.com.
|
||||
|
||||
{p 4 8 2}
|
||||
Carlin J. B., N. Li, P. Greenwood, and C. Coffey. 2003. Tools for analyzing
|
||||
multiple imputed datasets. {it:Stata Journal} 3(3): 226-244.
|
||||
|
||||
{p 4 8 2}
|
||||
Clark T. G. and D. G. Altman. 2003. Developing a prognostic model
|
||||
in the presence of missing data: an ovarian cancer case-study.
|
||||
{it:Journal of Clinical Epidemiology} 56: 28-37.
|
||||
|
||||
{p 4 8 2}
|
||||
Royston P. 2004. Multiple imputation of missing values.
|
||||
{it:Stata Journal} 4(3): 227-241.
|
||||
|
||||
|
||||
{title:Also see}
|
||||
|
||||
{psee}
|
||||
Online: {helpb mijoin}, {helpb micombine}, {helpb mitools}, and related programs,
|
||||
if installed
|
||||
{p_end}
|
53
Modules/ado/plus/i/iclassr.ado
Normal file
53
Modules/ado/plus/i/iclassr.ado
Normal file
@ -0,0 +1,53 @@
|
||||
*! version 1.1.1 STB-35 sg65
|
||||
program define iclassr
|
||||
version 4.0
|
||||
local varlist "req ex min(2) max(2)"
|
||||
local if "opt"
|
||||
local in "opt"
|
||||
local weight "aweight"
|
||||
local options "Center(string) Ems NOIsily"
|
||||
parse "`*'"
|
||||
parse "`varlist'", parse(" ")
|
||||
local weight "[`weight'`exp']"
|
||||
local wt : word 2 of `exp'
|
||||
|
||||
tempvar use
|
||||
quietly {
|
||||
mark `use' `if' `in'
|
||||
markout `use' `varlist' `wt'
|
||||
}
|
||||
tempname gr df fm
|
||||
if "`ems'" != "" {
|
||||
preserve
|
||||
qui keep if `use'
|
||||
sort `2'
|
||||
if "`wt'" == "" {
|
||||
tempvar Wt
|
||||
qui gen byte `Wt' = 1
|
||||
local wt "`Wt'"
|
||||
}
|
||||
tempvar sw
|
||||
qui by `2': gen double `sw' = sum(`wt')
|
||||
qui summ `sw' if `2' < `2'[_n+1]
|
||||
scalar `df' = _result(1) - 1
|
||||
scalar `gr' = _result(1) * _result(3)
|
||||
scalar `gr' = (`gr' - _result(3) - _result(4)*`df'/`gr')/`df'
|
||||
capture `noisily' oneway `1' `2' `weight'
|
||||
}
|
||||
else {
|
||||
capture `noisily' oneway `1' `2' `weight' if `use'
|
||||
scalar `gr' = _result(1)/ (_result(3) + 1)
|
||||
}
|
||||
if _rc == 134 { error(134) }
|
||||
scalar `df' = 1
|
||||
if "`center'" == "mean" { scalar `df' = _result(5)/(_result(5)-2) }
|
||||
else if "`center'" == "med" {
|
||||
scalar `df' = invfprob(_result(3), _result(5), 0.5)
|
||||
}
|
||||
scalar `fm' = max(_result(6) - `df', 0)
|
||||
global S_1 = `fm' / (`fm' + `df'* `gr')
|
||||
global S_2 = `fm' / (`fm' + `df')
|
||||
di _new in gr "Intra-`2' r =" in ye %7.4f $S_1 _new in gr /*
|
||||
*/ "Estimated reliability of a `2' mean (n=" in ye %3.2f `gr' in gr /*
|
||||
*/ ") =" in ye %7.4f $S_2
|
||||
end
|
1
Modules/ado/plus/i/iclassr.hlp
Normal file
1
Modules/ado/plus/i/iclassr.hlp
Normal file
@ -0,0 +1 @@
|
||||
.h l1way
|
38
Modules/ado/plus/i/iclassr2.ado
Normal file
38
Modules/ado/plus/i/iclassr2.ado
Normal file
@ -0,0 +1,38 @@
|
||||
*! version 1.1.1 STB-35 sg65
|
||||
program define iclassr2
|
||||
version 4.0
|
||||
local varlist "req ex min(2) max(2)"
|
||||
local if "opt"
|
||||
local in "opt"
|
||||
local options "Center(string)"
|
||||
parse "`*'"
|
||||
parse "`varlist'", parse(" ")
|
||||
local weight "[`weight'`exp']"
|
||||
|
||||
tempvar use
|
||||
quietly {
|
||||
mark `use' `if' `in'
|
||||
markout `use' `varlist'
|
||||
|
||||
tempname m k f
|
||||
tempvar tt
|
||||
gen `tt' = `1' + `2' if `use'
|
||||
summ `tt' /* `weight' */
|
||||
if !_result(1) { error 2000 }
|
||||
scalar `k' = _result(1)
|
||||
scalar `f' = _result(4)
|
||||
replace `tt' = `1' - `2' if `use'
|
||||
summ `tt' /* `weight' */
|
||||
scalar `f' = `f'/((`k'-1)*_result(4)/`k' + _result(3)*_result(3))
|
||||
scalar `m' = 1
|
||||
if "`center'" == "mean" { scalar `m' = `k'/(`k'-2) }
|
||||
else if "`center'" == "med" {
|
||||
scalar `m' = invfprob(`k'-1, `k', 0.5)
|
||||
}
|
||||
global S_1 = (`f' - `m') / (`f' + `m')
|
||||
global S_2 = (`f' - `m') / `f'
|
||||
}
|
||||
di _new in gr "Intra-class r =" in ye %7.4f $S_1 in gr /*
|
||||
*/ " Number of classes =", in ye `k' _new in gr /*
|
||||
*/ "Estimated reliability of a class mean (n=2) =" in ye %7.4f $S_2
|
||||
end
|
48
Modules/ado/plus/i/iclassr2.hlp
Normal file
48
Modules/ado/plus/i/iclassr2.hlp
Normal file
@ -0,0 +1,48 @@
|
||||
.-
|
||||
help for ^iclassr2^
|
||||
.-
|
||||
|
||||
Intra-class correlation for paired measures [STB-35 sg65]
|
||||
-------------------------------------------
|
||||
|
||||
^iclassr^ response_var1 response_var2 [^if^ exp] [^in^ range] [^, c^enter^(^Fpos^)^]
|
||||
|
||||
|
||||
Description
|
||||
-----------
|
||||
|
||||
^iclassr2^ calculates the intra-class correlation for paired measurements or as-
|
||||
sessments stored in response_var1 and response_var2. ^iclassr2^ also reports the
|
||||
reliability of the mean of the two measurements as estimated from the Spearman-
|
||||
Brown prophecy formula. Missing values are handled by casewise deletion.
|
||||
|
||||
@loneway@ also calculates intra-class correlations, but requires data in long,
|
||||
rather than wide format. ^iclassr2^ is also much faster than ^loneway^, and offers
|
||||
a small sample adjustment to the intra-class r; see the option ^center^, below.
|
||||
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
^center^ chooses a reference point in the F distribution to center the observed
|
||||
F statistic in estimating the intra-class correlation. The argument Fpos
|
||||
may be 1 (the default), or "med" or "mean" to choose the median or the mean
|
||||
of the appropriate F distribution. This option has little effect unless the
|
||||
number of pairs is small.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
. ^iclassr2 judge1 judge2^ (inter-judge reliability)
|
||||
. ^iclassr2 judge1 judge2, c(med)^ (intra-judge reliability, center the ob-
|
||||
served F on its median)
|
||||
|
||||
|
||||
Also see
|
||||
--------
|
||||
|
||||
STB: sg65 (STB-35)
|
||||
Manual: ^[R] loneway^
|
||||
On-line: help for @loneway@
|
||||
|
188
Modules/ado/plus/i/imputeitems.ado
Normal file
188
Modules/ado/plus/i/imputeitems.ado
Normal file
@ -0,0 +1,188 @@
|
||||
*! version 2.1 24 November 2008
|
||||
*! Jean-Benoit Hardouin
|
||||
************************************************************************************************************
|
||||
* imputeitems: Imputation of missing data of binary items
|
||||
*
|
||||
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
|
||||
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
|
||||
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
|
||||
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
|
||||
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
|
||||
*
|
||||
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
|
||||
* jean-benoit.hardouin@univ-nantes.fr
|
||||
*
|
||||
* News about this program : http://www.anaqol.org
|
||||
* FreeIRT Project : http://www.freeirt.org
|
||||
*
|
||||
* Copyright 2006-2008 Jean-Benoit Hardouin
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
************************************************************************************************************/
|
||||
|
||||
|
||||
program define imputeitems
|
||||
version 9
|
||||
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int 0)]
|
||||
|
||||
if "`if'"=="" {
|
||||
local if=1
|
||||
local ifif
|
||||
}
|
||||
else {
|
||||
local ifif if `if'
|
||||
}
|
||||
|
||||
*di "IF : `if' `ifif'"
|
||||
|
||||
local nbitems : word count `varlist'
|
||||
tokenize `varlist'
|
||||
|
||||
if `max'==0 {
|
||||
local max=`nbitems'
|
||||
}
|
||||
|
||||
forvalues i=1/`nbitems' {
|
||||
qui su ``i'' `ifif'
|
||||
if `r(min)'!=0&`r(max)'!=1 {
|
||||
di in red "The {hi:imputeqol} command runs only with dichotomous items"
|
||||
error
|
||||
}
|
||||
local p`i'=r(mean)
|
||||
}
|
||||
|
||||
if "`method'"=="" {
|
||||
local method pms
|
||||
}
|
||||
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
|
||||
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
|
||||
error
|
||||
}
|
||||
forvalues i=1/`nbitems'{
|
||||
qui su ``i'' `ifif'
|
||||
local mean`i'=r(mean)
|
||||
}
|
||||
|
||||
if "`method'"=="pms"&"`random'"!="" {
|
||||
local method bip
|
||||
}
|
||||
else if "`method'"=="ims"&"`random'"!="" {
|
||||
local method bii
|
||||
}
|
||||
else if "`method'"=="log"&"`random'"!="" {
|
||||
local method bil
|
||||
}
|
||||
else if "`method'"=="cim"&"`random'"!="" {
|
||||
local method bic
|
||||
}
|
||||
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
|
||||
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
|
||||
local random
|
||||
}
|
||||
|
||||
|
||||
forvalues i=1/`nbitems' {
|
||||
tempvar imp`i' tmp`i'
|
||||
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
|
||||
qui egen `imp`i''=rowtotal(`varlist') `ifif'
|
||||
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
|
||||
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
|
||||
qui replace `imp`i''=``i'' if ``i''!=.&`if'
|
||||
if "`method'"=="pms" {
|
||||
qui replace `imp`i''=round(`imp`i'') `ifif'
|
||||
}
|
||||
else if "`method'"=="bip" {
|
||||
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
|
||||
}
|
||||
else if "`method'"=="cim"|"`method'"=="bic"{
|
||||
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
|
||||
qui replace `tmp`i''=0 `ifif'
|
||||
forvalues j=1/`nbitems' {
|
||||
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
|
||||
}
|
||||
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
|
||||
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
|
||||
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
|
||||
if "`method'"=="cim" {
|
||||
qui replace `imp`i''=round(`imp`i'') `ifif'
|
||||
}
|
||||
else if "`method'"=="bic" {
|
||||
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
|
||||
}
|
||||
}
|
||||
}
|
||||
else if "`method'"=="ims"|"`method'"=="bii" {
|
||||
qui gen `imp`i''=`mean`i'' `ifif'
|
||||
if "`method'"=="ims" {
|
||||
qui replace `imp`i''=round(`imp`i'') `ifif'
|
||||
}
|
||||
else if "`method'"=="bii" {
|
||||
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
|
||||
}
|
||||
}
|
||||
else if "`method'"=="ics" {
|
||||
local item=0
|
||||
local corrmax=-2
|
||||
forvalues j=1/`nbitems' {
|
||||
if `i'!=`j' {
|
||||
qui corr ``i'' ``j'' `ifif'
|
||||
if r(rho)>`corrmax'&r(rho)!=. {
|
||||
local item `j'
|
||||
local corrmax=r(rho)
|
||||
}
|
||||
}
|
||||
}
|
||||
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
|
||||
qui gen `imp`i''=``i'' `ifif'
|
||||
qui replace `imp`i''=``item'' if ``i''==.&`if'
|
||||
}
|
||||
else if "`method'"=="log"|"`method'"=="bil" {
|
||||
local liste`i'
|
||||
forvalues j=1/`nbitems' {
|
||||
if `i'!=`j' {
|
||||
local liste`i' `liste`i'' ``j''
|
||||
}
|
||||
}
|
||||
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
|
||||
*local select :colnames e(b)
|
||||
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
|
||||
qui logit ``i'' `select' `ifif'
|
||||
qui predict `imp`i'' `ifif'
|
||||
if "`method'"=="log" {
|
||||
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
|
||||
}
|
||||
else if "`method'"=="bil" {
|
||||
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
|
||||
}
|
||||
}
|
||||
else if "`method'"=="worst" {
|
||||
qui gen `imp`i''=0 `ifif'
|
||||
}
|
||||
}
|
||||
forvalues i=1/`nbitems' {
|
||||
qui replace `imp`i''=``i'' if ``i''!=.&`if'
|
||||
if "`prefix'"=="" {
|
||||
local prefix imp
|
||||
}
|
||||
qui gen `prefix'``i''=`imp`i'' `ifif'
|
||||
}
|
||||
|
||||
tempvar miss
|
||||
qui egen `miss'=rowmiss(`varlist')
|
||||
forvalues i=1/`nbitems' {
|
||||
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
|
||||
}
|
||||
|
||||
end
|
68
Modules/ado/plus/i/imputeitems.hlp
Normal file
68
Modules/ado/plus/i/imputeitems.hlp
Normal file
@ -0,0 +1,68 @@
|
||||
{smcl}
|
||||
{* 30June2008}{...}
|
||||
{hline}
|
||||
help for {hi:imputeitems}{right:Jean-Benoit Hardouin}
|
||||
{hline}
|
||||
|
||||
{title:Imputation of missing item responses}
|
||||
|
||||
{p 8 14 2}{cmd:imputeitems} {it:varlist} [{it:if}] [,{cmdab:pref:ix}({it:string}) {cmdab:meth:od}({it:string}) {cmdab:rand:om} {cmdab:max}({it:#})]
|
||||
|
||||
|
||||
{title:Description}
|
||||
|
||||
{p 4 4 2}{cmd:imputeitems} imputes missing item responses by different ways : Item Mean Substitution (IMS), Person Mean Substitution (PMS), Corrected Item Mean Substiutution (CIM), Interitem Correlation Substitution (ICS), logistic model (LOG) and Worst Case (WORST). A random process can be added to several methods.
|
||||
|
||||
{title:Options}
|
||||
|
||||
{p 4 8 2}{cmd:prefix} defines the prefix to use to name the imputted variables (this prefix is followed by the name of the initial variable). By default, this prefix is "imp".
|
||||
|
||||
{p 4 8 2}{cmd:method} defines the method to impute missing data :
|
||||
|
||||
{p 8 8 2}{it:pms} computes the proportion of positive response of each individual on non missing items, and impute a deterministic result (if p<.5 then 0, else 1),
|
||||
|
||||
{p 8 8 2}{it:ims} computes the proportion of positive response to each items, and impute a deterministic result (if p<.5 then 0, else 1),
|
||||
|
||||
{p 8 8 2}{it:cim} computes the proportion of positive response to each items, corrected by the ability of the individual and impute a deterministic result (if p<.5 then 0, else 1),
|
||||
|
||||
{p 8 8 2}{it:ics} searchs for each item the more correlated item and replaces a missing data by the data of this more correlated item (if the other response is missing too, there is no imputation),
|
||||
|
||||
{p 8 8 2}{it:log} explains the responses of each item by a logistic model where the independent variables are the responses to the others items. Only significant variables are rettained (5%). These methods impute a deterministic result (if p<.5 then 0, else 1) [{it:log}] to missing responses (if the response to an independant variable is missing, there is no imputation),
|
||||
|
||||
{p 8 8 2}{it:worst} replaces the missing data by a 0.
|
||||
|
||||
{p 4 8 2}{cmd:random} adds a random effect to the imputation process (available only with {it:pms}, {it:ims}, {it:cim} or {it:log}). In these cases, the imputed value is randomly drawed from a binomial distribution using the parameter p.
|
||||
|
||||
{p 4 8 2}{cmd:max} allows imputing missing values only for individuals with a maximal number of missing values defined with this option.
|
||||
|
||||
{p 4 8 2}By default, {it:pms} method is working.
|
||||
|
||||
{p 4 8 2}Old names of methods ({it:bip}, {bii}, {it:bic} and {it:bil} continues to run. They actually correspond to the add of the {cmd:random} option to the {it:pms}, {it:ims}, {it:cim} and {it:log} methods.
|
||||
|
||||
|
||||
{title:Example}
|
||||
|
||||
{cmd:. imputeitems itemA*} /*PMS method, IMP prefix*/
|
||||
|
||||
{cmd:. imputeitems itemA*, prefix(cim) method(cim)}
|
||||
|
||||
{cmd:. imputeitems itemA*, method(log) random}
|
||||
|
||||
|
||||
{title:Reference}
|
||||
|
||||
{p 4 8 2}{cmd:Huisman M.} (2000), Imputation of missing item responses: some simple techniques. {it: Quality & Quantity}, {cmd:34}, 331-351.
|
||||
|
||||
|
||||
{title:Author}
|
||||
|
||||
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
|
||||
{p 4 8 2}EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
|
||||
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
|
||||
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
|
||||
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
|
||||
{p 4 8 2}Email:
|
||||
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
|
||||
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
|
||||
and {browse "http://www.freeirt.org":FreeIRT}
|
||||
|
78
Modules/ado/plus/i/inslist.ado
Normal file
78
Modules/ado/plus/i/inslist.ado
Normal file
@ -0,0 +1,78 @@
|
||||
program def inslist, rclass
|
||||
*! NJC 1.1.0 14 December 2000
|
||||
* NJC 1.0.0 7 November 2000
|
||||
version 6.0
|
||||
gettoken list 0 : 0, parse(",")
|
||||
if "`list'" == "" | "`list'" == "," {
|
||||
di in r "nothing in list"
|
||||
exit 198
|
||||
}
|
||||
|
||||
local nlist : word count `list'
|
||||
|
||||
syntax , Insert(string) Pos(numlist sort int >=-`nlist' <=`nlist') /*
|
||||
*/ [ Global(str) Noisily ]
|
||||
|
||||
if length("`global'") > 8 {
|
||||
di in r "global name must be <=8 characters"
|
||||
exit 198
|
||||
}
|
||||
|
||||
local np1 = `nlist' + 1
|
||||
tknz `pos' `np1', s(p)
|
||||
local np : word count `pos'
|
||||
|
||||
* negative indexes to positive
|
||||
local i = 1
|
||||
while `p`i'' < 0 {
|
||||
local p`i' = `nlist' + 1 + `p`i''
|
||||
local i = `i' + 1
|
||||
}
|
||||
|
||||
local nins : word count `insert'
|
||||
if `nins' < `np' {
|
||||
local rep = 1 + int( `np' / `nins')
|
||||
local insert : di _dup(`rep') "`insert' "
|
||||
local nins : word count `insert'
|
||||
}
|
||||
|
||||
tknz `insert', s(i)
|
||||
|
||||
local j = 1
|
||||
|
||||
while `p`j'' == 0 {
|
||||
local newlist "`newlist'`i`j'' "
|
||||
local j = `j' + 1
|
||||
}
|
||||
|
||||
tokenize `list'
|
||||
|
||||
local i = 1
|
||||
while `i' <= `nlist' {
|
||||
local newlist "`newlist'``i'' "
|
||||
while `i' == `p`j'' & `j' <= `np' {
|
||||
local newlist "`newlist'`i`j'' "
|
||||
local j = `j' + 1
|
||||
}
|
||||
local i = `i' + 1
|
||||
}
|
||||
|
||||
if "`noisily'" != "" { di "`newlist'" }
|
||||
if "`global'" != "" { global `global' "`newlist'" }
|
||||
return local list `newlist'
|
||||
end
|
||||
|
||||
program def tknz, rclass
|
||||
* NJC 1.1.0 2 June 2000
|
||||
version 6.0
|
||||
gettoken list 0 : 0, parse(",")
|
||||
syntax , Stub(str) [ * ]
|
||||
tokenize `"`list'"' , `options'
|
||||
|
||||
local i = 1
|
||||
while "``i''" != "" {
|
||||
c_local `stub'`i' `"``i''"'
|
||||
local i = `i' + 1
|
||||
}
|
||||
end
|
||||
|
2
Modules/ado/plus/i/inslist.hlp
Normal file
2
Modules/ado/plus/i/inslist.hlp
Normal file
@ -0,0 +1,2 @@
|
||||
.h listutil
|
||||
|
27
Modules/ado/plus/i/isvar.ado
Normal file
27
Modules/ado/plus/i/isvar.ado
Normal file
@ -0,0 +1,27 @@
|
||||
*! NJC 1.0.0 20 Sept 2005
|
||||
program isvar, rclass
|
||||
version 8
|
||||
syntax anything
|
||||
|
||||
foreach v of local anything {
|
||||
capture unab V : `v'
|
||||
if _rc == 0 local varlist `varlist' `V'
|
||||
else local badlist `badlist' `v'
|
||||
}
|
||||
|
||||
di
|
||||
|
||||
if "`varlist'" != "" {
|
||||
local n : word count `varlist'
|
||||
local what = plural(`n', "variable")
|
||||
di as txt "{p}`what': " as res "`varlist'{p_end}"
|
||||
return local varlist "`varlist'"
|
||||
}
|
||||
|
||||
if "`badlist'" != "" {
|
||||
local n : word count `badlist'
|
||||
local what = plural(`n', "not variable")
|
||||
di as txt "{p}`what': " as res "`badlist'{p_end}"
|
||||
return local badlist "`badlist'"
|
||||
}
|
||||
end
|
56
Modules/ado/plus/i/isvar.hlp
Normal file
56
Modules/ado/plus/i/isvar.hlp
Normal file
@ -0,0 +1,56 @@
|
||||
{smcl}
|
||||
{* 20sep2005}{...}
|
||||
{hline}
|
||||
help for {hi:isvar}
|
||||
{hline}
|
||||
|
||||
{title:Filter names into variable names and others}
|
||||
|
||||
{p 8 17 2}
|
||||
{cmdab:isvar} {it:possiblevarlist}
|
||||
|
||||
|
||||
{title:Description}
|
||||
|
||||
{p 4 4 2}
|
||||
{cmd:isvar} takes a list of names that might name variables in
|
||||
your dataset and filters it into a list of those names that are indeed
|
||||
variable names and a list of the others.
|
||||
|
||||
{p 4 4 2}
|
||||
One application of {cmd:isvar} arises when you are moving between similar
|
||||
datasets, especially if they are large, but are not sure which variables are
|
||||
defined in which dataset. Commands such as {help describe} would fail at the
|
||||
first name not in fact a variable name. {cmd:isvar} offers a more direct way to
|
||||
establish existence or non-existence of several possible variables.
|
||||
|
||||
|
||||
{title:Saved results}
|
||||
|
||||
{p 4 8 2}r(varlist) names of variables in current dataset{p_end}
|
||||
{p 4 8 2}r(badlist) names that do not correspond to variables in current dataset
|
||||
|
||||
|
||||
{title:Examples}
|
||||
|
||||
{p 4 8 2}{cmd:. isvar mpg rep78 rep77}{p_end}
|
||||
{p 4 8 2}{cmd:. local OKlist "`r(varlist)'"}{p_end}
|
||||
{p 4 8 2}{cmd:. su `OKlist'}
|
||||
|
||||
|
||||
{title:Author}
|
||||
|
||||
{p 4 4 2}Nicholas J. Cox, Durham University, U.K.{break}
|
||||
n.j.cox@durham.ac.uk
|
||||
|
||||
|
||||
{title:Acknowledgements}
|
||||
|
||||
{p 4 4 2}This problem was suggested by Amadou Diallo.
|
||||
|
||||
|
||||
{title:Also see}
|
||||
|
||||
{p 4 13 2}
|
||||
Online: help for {help describe}; {help unab}
|
||||
|
Reference in New Issue
Block a user