Computed theoretical power for N=100 and N=200 scenarios

This commit is contained in:
2024-02-19 18:35:26 +01:00
parent ac9189d26a
commit 238852b08b
704 changed files with 261610 additions and 187 deletions

View File

@ -0,0 +1,31 @@
*! Author: Ross Harris
*! Date: 1 August 2008
* Confidence interval for I-sq, to be run after metan
version 8.0
program i2ci
local k = r(df)+1
local q = r(het)
if `q' > `k'{
local selogH = 0.5 * ( ln(`q') - ln(`k'-1) ) / ( sqrt(2*`q') - sqrt(2*`k'-3) )
}
else{
local selogH = sqrt( (1/(2*(`k'-2))) * (1- (1/(3*(`k'-2)^2))) )
}
local H = sqrt(`q'/(`k'-1))
local Hlow = exp(ln(`H')-1.96*`selogH')
local Hupp = exp(ln(`H')+1.96*`selogH')
local I2 = string( r(i_sq) , "%5.1f")
local I2low = string( max( 100* ( (`Hlow'^2-1) / `Hlow'^2 ) , 0) , "%5.1f")
local I2upp = string( max( 100* ( (`Hupp'^2-1) / `Hupp'^2 ) , 0) , "%5.1f")
di in ye "I-sq= `I2'%, 95% CI: `I2low'% to `I2upp'%"
di in whi "CI based on Higgins & Thompson, Statist. Med. 2002; 21:1539<33>1558,
di in whi "Appendix A2: Intervals based on the statistical significance of Q."
end

View File

@ -0,0 +1,146 @@
*! version 17feb09
program define icc23, rclass
version 9
syntax varlist(min=3 max=3) [if] [, MOdel(integer 2) LEvel(real .95)]
tokenize "`varlist'"
marksample touse
local dv `1'
local rater `2'
local id `3'
capture assert `model'==2 |`model'==3
if _rc~=0 {
di
di in re "The ICC model must be specified as either 2 or 3"
exit 198
}
capture assert `level'>0 & `level'<1.0
if _rc~=0 {
di
di in re "The CI level must be a value between 0 and 1.0"
exit 198
}
qui anova `dv' `rater' `id' `if', repeated(`rater')
local f1 e(F_1)
local ss2 e(ss_2)
local df2 e(df_2)
local rss e(rss)
local dfr e(df_r)
local df1 e(df_1)
local ss1 e(ss_1)
local n e(N_bse) /* the number of subjects tested */
local k = `df1'+1 /* the number of raters */
*Compute F-test for rater
local p_rater=Ftail(`df1',`dfr',`f1')
*Compute components of ICC
local bms = `ss2'/`df2'
local ems = `rss'/`dfr'
local jms = `ss1'/`df1'
local Fj = `jms'/`ems'
local alpha2 =1-((1-`level')/2)
local cilevel = `level'*100
if `model' == 2 {
*Compute ICC Model 2 for single observations (ICC21) and for means (ICC2k)
local num21 = `bms'-`ems'
local dentmp = (`k'*(`jms'-`ems'))/`n' /* the ratio within the denominator */
local den21 = `bms'+(`k'-1)*`ems'+`dentmp'
local icc21 = `num21'/`den21'
*Compute ICC21 confidence intervals (values will be used for ICC2k confidence intervals)
local nu_num21 = (`k'-1)*(`n'-1)*(`k'*`icc21'*`Fj'+`n'*(1+(`k'-1)*`icc21')-`k'*`icc21')^2
local nu_den21 = (`n'-1)*(`k'^2)*((`icc21')^2)*((`Fj')^2)+(`n'*(1+(`k'-1)*`icc21')-`k'*`icc21')^2
local nu21 = `nu_num21'/`nu_den21'
local Fsuper = invF(`df2',`nu21',`alpha2')
local Fsub = invF(`nu21',`df2',`alpha2')
local cilower = (`n'*(`bms'-`Fsuper'*`ems'))/(`Fsuper'*(`k'*`jms'+(`k'*`n'-`k'-`n')*`ems')+`n'*`bms')
local ciupper = (`n'*(`Fsub'*`bms'-`ems'))/(`k'*`jms'+(`k'*`n'-`k'-`n')*`ems'+`n'*`Fsub'*`bms')
*Compute ICC2k and its confidence intervals
local num2k = `bms'-`ems'
local dentmp1 = (`jms'-`ems')/`n' /* the ratio within the denominator */
local den2k = `bms'+`dentmp1'
local icc2k = `num2k'/`den2k'
local cilowerK = (`k'*`cilower')/(1+(`k'-1)*`cilower')
local ciupperK = (`k'*`ciupper')/(1+(`k'-1)*`ciupper')
di
di in gr " **************************************************************************"
di in ye " Two-Way Random Effects Models: ICC[2,1] and ICC[2,k]"
di in gr " **************************************************************************"
di
di in gr " The total number of subjects is: " in ye %3.0f `n'
di in gr " The total number of raters is: " in ye %3.0f `k'
di
di in gr " Reliability of observations: ICC[2,1] = " in ye %4.3f `icc21' ", (" `cilevel' "% CI: " %5.3f `cilower' ", " %5.3f `ciupper' ")"
di
di in gr " Reliability of the mean: ICC[2,`k'] = " in ye %4.3f `icc2k' ", (" `cilevel' "% CI: " %5.3f `cilowerK' ", " %5.3f `ciupperK' ")"
di
di in gr " **************************************************************************"
di
if `p_rater' <= .05 {
di in red " Note: There is a significant `rater' effect: p = " %5.4f `p_rater'
}
}
if `model' == 3 {
* Compute ICC31
local num31 = `bms'-`ems'
local den31 = `bms'+(`k'-1)*`ems'
local icc31 = `num31'/`den31'
*Compute ICC Model 3 Confidence Interval (Single Observations)
local fzero = `bms'/`ems'
local fdistL = invF(`n'-1,(`n'-1)*(`k'-1),`alpha2')
local fdistU = invF((`n'-1)*(`k'-1),`n'-1,`alpha2')
local FL = `fzero'/`fdistL'
local FU = `fzero'*`fdistU'
local cilower = (`FL'-1)/(`FL'+(`k'-1))
local ciupper = (`FU'-1)/(`FU'+(`k'-1))
*Compute ICC3k
local num3k = `bms'-`ems'
local den3k = `bms'
local icc3k = `num3k'/`den3k'
*Compute ICC3k confidence intervals
local cilowerK = 1-(1/`FL')
local ciupperK = 1-(1/`FU')
di
di in gr " **************************************************************************"
di in ye " Two-Way Mixed Effects Models: ICC[3,1] and ICC[3,k]"
di in gr " **************************************************************************"
di
di in gr " The total number of subjects is: " in ye %3.0f `n'
di in gr " The total number of raters is: " in ye %3.0f `k'
di
di in gr " Reliability of observations: ICC[3,1] = " in ye %4.3f `icc31' ", (" `cilevel' "% CI: " %5.3f `cilower' ", " %5.3f `ciupper' ")"
di
di in gr " Reliability of the mean: ICC[3,`k'] = " in ye %4.3f `icc3k' ", (" `cilevel' "% CI: " %5.3f `cilowerK' ", " %5.3f `ciupperK' ")"
di
di in gr " **************************************************************************"
di
if `p_rater' <= .05 {
di in red " Note: There is a significant `rater' effect: p = " %5.4f `p_rater'
}
}
end

View File

@ -0,0 +1,110 @@
.-
help for ^icc23^
.-
^Calculation of ICC Models 2 and 3^
^---------------------------------^
.^icc23^ <dv> <classvar> <within_var>, MOdel(#) LEvel(#)
^Description^
^-----------^
^icc23^ computes the intra-class correlation for random effects models based on repeated
measures ANOVA. These models are ICC[2,1], ICC[2,k], ICC[3,1], and ICC[3,k], as described
by Shrout and Fleiss, 1979 (see reference below). (For the ICC[1,1] and ICC[1,k] models
based on a one-way ANOVA, see @loneway@). ^icc23^ runs a repeated measures ANOVA to derive
the appropriate estimates and degrees of freedom. In the event there is a significant F-test
for the ^classvar^ (e.g., a significant differernce among raters), the program will provide
the p-value from the ANOVA table.
Data must be in the "long" format. If not, use the @reshape@ command to reconfigure the data.
Four types of ICC models are considered:
ICC[2,1]: reflects the case where the same group of subjects is rated by k raters,
interest is in the reliability of individual scores. In this model, raters
are considered a representative sample of a population of similar raters. This
model is a two-way random effects model.
ICC[2,k]: is the same approach as ICC[2,1] above, but interest is in the reliability
of the MEAN score, rather than among single observations.
ICC[3,1]: reflects the case where the same group of subjects is rated by k raters,
interest is in the reliability of individual scores. In this model, the only
raters of interest are those participating in the study (e.g., there is no
intention of generalizing the raters' scores to a larger population of raters).
This model is considered a "mixed" model (subjects are random, raters are fixed).
ICC[3,k]: is the same approach as ICC[3,1] above, but interest is in the reliability
of the MEAN score, rather than among single observations.
Three inputs are required:
dv is the dependent variable
classvar the class variable refers to factor that is repeated within subjects, e.g., raters,
devices, time points, etc., e.g., the variable which would be entered as the
repeated() variable in the ANOVA option.
within_var refers to the "within subject" variable, e.g., subjects being assessed
^NOTE: The order of entry of the variables is critical!^
^Options^
^-------^
^MO^del(#) refers to the type of model to be estimated. ICC model 2 is the default (
producing ICC[2,1] and ICC[2,k] estimates).
^LE^vel(#) the degree of precision of the confidence interval, entered as a decimal.
The default is 95%, i.e., level(.95)
Examples
--------
For ICC[2,1] and ICC[2,k]: Two-way random effects (Subjects and raters are considered to be
sampled from larger populations); 95% CIs are assumed.
^.icc23 score rater person_id^
For ICC[3,1] and ICC[3,k]: Two-way mixed model: Subjects are random, but raters are fixed
(i.e., the raters are not considered a sample -- they are the
only raters of interest); 90% CIs are requested.
^.icc23 score rater person_id, model(3) level(.90)^
References
----------
Shrout PE, FLeiss JL. Intraclass correlation: uses in assessing rater reliability. Psychol
Bull, 1979; 86: 420-428.
Portney LG, Watkins MP. Foundations of Clinical Research: Applications to Practice (2nd ed.).
Prentice-Hall, Inc: Upper Saddle River, NJ., 2000.
Author
------
Paul F. Visintainer, PhD
Baystate Health System
Springfield, MA 01089
visint46@gmail.com
Luis C.Orozco, MD, MSc
Facultad de Salud
Universidad Industrial de Santander
Colombia
lcorovar@gmail.com
Also see
--------
Manual or on-line help for: @loneway@, @reshape@, @iclassr@, @iclassr2@

View File

@ -0,0 +1,55 @@
*! November 5, 2008 by Paul F. Visintainer, PhD
program define iccconf
version 8.0
syntax anything [, level(real .95)]
tokenize "`anything'"
local icc `1'
local k `2' /* number of observations */
local reps `3'
confirm number `icc'
confirm integer number `k'
confirm integer number `reps'
if `icc'<=0 | `icc'>=1 {
di
di in red "RE-enter " in ye "ICC" in red " between 0 and 1"
error 197
}
if `level' >=1.0 {
di
di in red " Confidence level must be between 0 and 1"
error 197
}
* Defining the components of the test
local alpha = 1 - `level'
local N = `k'*`reps'
local df1 = `k' - 1
local df2 = `N' - `k'
local F = ((`icc'*`reps') - `icc' + 1)/(1-`icc')
local lFcrit = invF(`df1',`df2',(1-`alpha'/2))
local uFcrit = invF(`df1',`df2',(`alpha'/2))
*Compute confidence limits based on Rosner, "Fundamental of Bios, 6th", pg. 615
local ul = (`F'/`uFcrit'-1)/(`reps'+(`F'/`uFcrit')-1)
local ll = (`F'/`lFcrit'-1)/(`reps'+(`F'/`lFcrit')-1)
di
di in gr " ******************************************************************************* "
di in ye " Confidence Interval for the INTRACLASS COEFFICIENT "
di in gr " ******************************************************************************* "
di
di in gr " The ICC with " in ye %3.0f `level'*100 "% " in gr "CI is: " %3.2f in ye `icc' " (" %3.2f in ye `ll' ", " %3.2f in ye `ul' ")"
di
di in gr " The number of subjects is: " %5.0f in ye `k'
di
di in gr " The number of repeated assessments is: " %2.0f in ye `reps'
end

View File

@ -0,0 +1,62 @@
.-
help for ^iccconf^
.-
Confidence Interval for A Single Intraclass Correlation (ICC)
-------------------------------------------------------------
.^iccconf <icc> <k> <reps>, level(#)^
^Description^
^-----------^
^iccconf^ is an immediate command that computes a confidence interval for a
single intraclass correlation (ICC). The procedure is based on Rosner's approach
using the F-test.
Three inputs are required:
^icc^ is the intraclass correlation. The input must be a value between 0 and 1
^k^ is the number of subjects
^reps^ is the number of repeated assessments or "within" pair
measurements.
^Option^
^------^
level(#) The level option allows the user to modify the confidence limit.
Use values between 0 and 1. The default is .95.
Example
--------
To find a 95% confidence interval for an ICC of .914, based on 5 observations
with 2 replcations each:
^.iccconf .914 5 2^
For a 90% confidence interval for the above example, type:
^.iccconf .914 5 2, level(.9)^
References
----------
Rosner, B. "Fundamentals of Biostatistics, 6th ed" Duxbury, Press (The Thompson
Company): Belmont, CA, 2006, pgs 615-616.
Author
------
Paul F. Visintainer, PhD
Springfield, MA 01089
visint46@gmail.com
Also see
--------
Manual or on-line help for: @loneway@, @xtreg@, or ^bootcor^, ^xtrho^ if installed

905
Modules/ado/plus/i/ice.ado Normal file
View File

@ -0,0 +1,905 @@
*! version 1.1.1 PR 23sep2005.
*
* History of ice
* 1.1.1 23sep2005 Better error trapping for passive() and substitute() options.
* 1.1.0 23aug2005 Replace -draw- option with -match-. Default becomes draw.
* Trace option documented, now has argument for filename.
* Report number of rows with 0, 1, 2, ... missing values.
* Arrange variables in increasing order of missingness when imputing.
* Split ties at random when more than one observation satisfies the
* prediction matching criterion
* 1.0.4 21jul2005 Trap and report error when running uvis
* 1.0.3 08jun2005 Tidy up display of equations when have multiple lines (long equations)
* 1.0.3 03jun2005 Silence file load/save
* 1.0.2 20may2005 Changed files containing imputations to tempfiles (standalone mode)
* (Angela Wood reported problem).
* 1.0.1 04may2005 Added a trace to a file (undocumented in help file).
* 1.0.0 18apr2005 First release, based on mice.
*
* History of mice
* 1.0.3 13apr2005 Minor tidying up, including recode of ChkIn and deletion of strdel.
* Check if prediction equations have a variable on both sides.
* 1.0.2 17feb2005 Added code to take care of inherited missingness of passive variables robustly.
* 1.0.1 21jan2005 Added display of regression command in showing prediction equations.
* 1.0.0 20jan2005 First release, based on mvis2/_mvis2.
*
* History of mvis
* 1.1.0 18jan2005 categoric() option removed.
* New options dryrun, passive(), substitute(), eq() added.
* Improvements to output showing actual prediction equations.
* 1.0.5 19nov2004 Delete dummy variables for categoric() variables with no missing data from output file
* Found problem with bsample in Stata 7 with "if" clause and boot option.
* Revert to Stata 8 for mvis, _mvis and uvis.
* 1.0.4 18nov2004 Weights not working (syntax error), fixed.
* 1.0.3 16nov2004 Categoric() option added to deal with unordered categoric
* covariates, updated default handling of such variables
* 1.0.2 16oct2004 Saving, using etc of file safest with compound quotes, fixed.
*
program define ice, rclass
version 8
* Check for _I* variables, could be created by xi:
capture describe _I*
if _rc==0 {
di as err _n "Warning: _I* variables detected in the dataset - was xi: used?"
di as inp "Use of xi: with mvis is liable to give incorrect results."
di as inp "If you wish to model categoric covariates with dummy"
di as inp "variables, please recalculate the dummies via the passive() option"
di as inp "and use the substitute() option to identify the dummies as predictors." _n
}
local m `s(MI_m)'
if "`m'"!="" {
* Called by mi_impute
local mitools mitools
local mopt
local uopt
local fn0 `s(MI_tfile)'
local using `fn0'
forvalues i=1/`m' {
local fn`i' `s(MI_tfile`i')'
}
}
else {
* standalone
local mitools
local mopt "m(int 1)"
local uopt [using/]
}
syntax varlist(min=2 numeric) [if] [in] [aweight fweight pweight iweight] `uopt', /*
*/ [ `mopt' REPLACE Seed(int 0) BOot MAtch DRYrun * ]
* Check if there are variables called boot and/or match
if "`boot'"=="boot" {
cap confirm var boot
if _rc local options `options' boot(`varlist')
else local options `options' boot(boot)
}
if "`match'"=="match" {
cap confirm var match
if _rc local options `options' match(`varlist')
else local options `options' match(match)
}
if `seed'>0 set seed `seed'
local first first
if "`dryrun'"!="" {
if `"`using'"'=="" {
tempname fn
local using `fn'
}
_ice `varlist' `if' `in' [`weight' `exp'] using `using', `options' first dryrun
di as text _n "End of dry run. No imputations were done, no files were created."
exit
}
preserve
if "`mitools'"=="" {
if `m'<1 {
di as err "number of imputations must be 1 or more"
exit 198
}
if `"`using'"'=="" {
if "`dryrun'"=="" {
di as err "using required"
exit 100
}
}
else {
if substr(`"`using'"',-4,.)!=".dta" {
local using `using'.dta
}
if "`replace'"=="" {
confirm new file `"`using'"'
}
}
forvalues i=1/`m' {
tempfile fn`i'
_ice `varlist' `if' `in' [`weight' `exp'] using `fn`i'', `options' `first'
di as text `i' ".." _cont
local first
}
* Join files of imputations vertically using code from mijoin.ado
quietly {
local J _j
forvalues j=1/`m' {
* could automate this part
use `"`fn`j''"', clear
chkrowid
local I `s(I)'
if "`I'"=="" {
* create row number
local I _i
cap drop `I'
gen long `I'=_n
lab var `I' "obs. number"
}
cap drop `J'
gen int `J'=`j'
lab var `J' "imputation number"
save `"`fn`j''"', replace
}
use `"`fn1'"', clear
forvalues j=2/`m' {
append using `"`fn`j''"'
}
char _dta[mi_id] `I'
}
save `"`using'"', `replace'
}
else {
* Save original data and imputations to tempfiles for mi_impute to stack
* fn0,...,fn`m' are local macros created by mi_impute and supplied as s() functions;
* they contain the actual names of tempfiles, hence the need for compound quotes.
local original original
forvalues i=0/`m' {
if "`replace'"!="" cap drop `"`fn`i''"' // !! bug - should be erase not cap drop?
_ice `varlist' `if' `in' [`weight' `exp'] using `"`fn`i''"', ///
`options' `first' `original' mitools
di as text `i' ".." _cont
local original
if `m'>0 local first
}
}
end
program define chkrowid, sclass
version 8
local I: char _dta[mi_id]
if "`I'"=="" exit
cap confirm var `I'
if _rc exit
sret local I `I'
end
*! Based on _mvis2 version 1.0.2 PR 19jan2005.
program define _ice, rclass
version 8
syntax varlist(min=2 numeric) [if] [in] [aw fw pw iw] using/, /*
*/ [ BOot(varlist) CC(varlist) CMd(string) CYcles(int 10) noCONStant MAtch(varlist) /*
*/ DRYrun EQ(string) first Genmiss(string) Id(string) mitools ON(varlist) original /*
*/ PASsive(string) noSHoweq SUBstitute(string) TRace(string) ]
if "`original'"!="" {
* Save original data
quietly save `"`using'"', replace
exit
}
local nvar: word count `varlist'
if "`id'"!="" {
confirm new var `id'
}
else local id _i
preserve
tempvar touse order
quietly {
marksample touse, novarlist
if "`cc'`on'"!="" {
markout `touse' `cc' `on'
}
* Record sort order
gen long `order'=_n
lab var `order' "obs. number"
* For standard operation (no `on' list), disregard any completely missing rows in varlist, among marked obs
if "`on'"=="" {
tempvar rmis
egen int `rmis'=rmiss(`varlist') if `touse'==1
count if `rmis'==0
replace `touse'=0 if `rmis'==`nvar'
replace `rmis'=. if `rmis'==`nvar'
lab var `rmis' "#missing values"
if "`first'"!="" & "`showeq'"=="" noi tab `rmis', missing
drop `rmis'
}
* Deal with weights
frac_wgt `"`exp'"' `touse' `"`weight'"'
local wgt `r(wgt)'
* Sort out cmds (not checking if each cmd is valid - any garbage may be entered)
if "`cmd'"!="" {
* local cmds "regress logistic logit ologit mlogit"
detangle "`cmd'" cmd "`varlist'"
forvalues i=1/`nvar' {
if "${S_`i'}"!="" {
local cmd`i' ${S_`i'}
}
}
}
* Default for all uvis operations is nomatch, meaning draw
if "`match'"!="" {
tokenize `match'
while "`1'"!="" {
ChkIn `1' "`varlist'"
if `s(k)'>0 {
local match`s(k)' match
}
mac shift
}
}
if "`boot'"!="" {
tokenize `boot'
while "`1'"!="" {
ChkIn `1' "`varlist'"
if `s(k)'>0 {
local boot`s(k)' boot
}
mac shift
}
}
local anyerr 0
if `"`passive'"'!="" {
tempvar passmiss
/*
Defines vars that are functions or transformations of others in varlist.
They are (may be) "passively imputed". "\" is an expression separator.
Default is comma.
Comma may not always be appropriate (i.e. may appear in an expression).
*/
detangle "`passive'" passive "`varlist'" \
local haserr 0
forvalues i=1/`nvar' {
if "${S_`i'}"!="" {
local exp`i' ${S_`i'}
ParsExp `exp`i''
local exclude `s(result)'
if "`exclude'"!="" {
* Count missingness of this passive variable
egen int `passmiss'=rmiss(`exclude') if `touse'
count if `passmiss'>0 & `touse'==1
local nimp`i'=r(N)
if `nimp`i''==0 {
local v: word `i' of `varlist'
noi di as err "passive definition `v' = (${S_`i'}) redundant: `exclude' has no missing data."
local ++haserr
}
drop `passmiss'
}
}
}
if `haserr'>0 {
di as err "`haserr' error(s) found in option " as inp "passive(`passive')"
local anyerr 1
}
}
if "`substitute'"!="" {
* defines vars that are to be substituted in the recalc context
detangle "`substitute'" substitute "`varlist'"
local haserr 0
forvalues i=1/`nvar' {
if "${S_`i'}"!="" {
local sub`i' ${S_`i'}
local v: word `i' of `varlist'
count if missing(`v') & `touse'==1
if r(N)==0 {
noi di as err "substitute for variable `v' redundant: `v' has no missing data."
local ++haserr
}
}
}
if `haserr'>0 {
noi di as err "`haserr' error(s) found in option " as inp "substitute(`substitute')"
local anyerr 1
}
}
if `"`eq'"'!="" {
* defines equations specified vars.
detangle "`eq'" equation "`varlist'"
forvalues i=1/`nvar' {
if "${S_`i'}"!="" {
local Eq`i' ${S_`i'}
* Check that eq vars are in mainvarlist
tokenize `Eq`i''
while "`1'"!="" {
ChkIn `1' "`varlist'"
mac shift
}
}
}
}
if `anyerr' {
di as err _n "specification error(s) found."
exit 198
}
count if `touse'
local n=r(N)
/*
Count potentially imputable missing values for each variable,
and where necessary create an equation for each
*/
local to_imp 0 // actual number of vars with missing values to be imputed
local recalc 0 // number of passively imputed vars to be recalculated
tempvar xtmp // temporary holding area
local nimp // list of number of missing values for each variable
forvalues i=1/`nvar' {
local xvar: word `i' of `varlist'
if "`genmiss'"!="" {
tempvar mvar`i'
gen byte `mvar`i''=missing(`xvar') if `touse'==1
lab var `mvar`i'' "1 if `xvar' missing, 0 otherwise"
}
local x`i' `xvar'
count if missing(`xvar') & `touse'==1
* Create prediction equation for each active variable
if r(N)>0 & `"`exp`i''"'=="" {
local nimp`i'=r(N)
* active var: has missing obs, not passive
local ++to_imp
local main`i' 1
* Keep missingness of the original variable
tempvar miss`i'
gen byte `miss`i''=missing(`xvar') if `touse'==1
* Define equation for this variable - user definition from eq() takes precedence
if "`Eq`i''"!="" {
local eq`i' `Eq`i''
}
else {
* Remove variable from mainvarlist
local eq`i': list varlist - xvar
}
if "`cmd`i''"=="" {
/*
Assign default cmd for vars not so far accounted for.
cmd is relevant only for vars requiring imputation, i.e. with >=1 missing values.
Use logit if 2 distinct values, mlogit if 3-5, otherwise regress.
*/
inspect `xvar' if `touse'
local nuniq=r(N_unique)
if `nuniq'==1 {
noi di as err "only 1 distinct value of `xvar' found"
exit 2000
}
if `nuniq'==2 {
count if `xvar'==0 & `touse'==1
if r(N)==0 {
noi di as err "variable `xvar' unsuitable for imputation,"
noi di as err "binary variables must include at least one 0 and one non-missing value"
exit 198
}
local cmd`i' logit
}
else if `nuniq'<=5 {
local cmd`i' mlogit
}
else local cmd`i' regress
}
if "`cmd`i''"=="mlogit" {
* With mlogit, if xvar carries a score label,
* drop it since it causes prediction problems
local xlab: value label `xvar'
capture label drop `xlab'
}
if "`on'"=="" {
* Initially fill missing obs cyclically with nonmissing obs
sampmis `xtmp'=`xvar'
replace `xvar'=cond(`touse'==0, ., `xtmp')
drop `xtmp'
}
else replace `xvar'=. if `touse'==0
local lab`i' `xvar' imput.`suffix' (`nimp`i'' values)
}
else {
local main`i' 0
if "`nimp`i''"=="" { // may have been set earlier by consideration of ParsExp
local nimp`i'=r(N)
}
if `"`exp`i''"'!="" {
if "`Eq`i''"!="" {
noi di as err "equation" as input " `xvar':`Eq`i'' " ///
as err "invalid, `xvar' is passively imputed"
exit 198
}
local ++recalc
}
}
local nimp `nimp' `nimp`i''
}
if `to_imp'==0 {
noi di as err _n "All relevant cases are complete, no imputation required."
return scalar N=`n'
return scalar imputed=0
exit 2000
}
* Remove passivevars from equations as necessary
forvalues i=1/`nvar' {
if `"`exp`i''"'!="" {
ParsExp `exp`i''
local exclude `s(result)'
* remove current passivevar from each relevant equation
local passive `x`i''
tokenize `exclude'
while "`1'"!="" {
* identify which variable in mainvarlist we are looking at
ChkIn `1' "`varlist'"
local index `s(k)'
* Remove `passive' from equation of variable
* whose index in mainvarlist is `index'
* (only allowed to be done if there is no
* user equation Eq`' for var #`index')
if "`eq`index''"!="" & "`Eq`index''"=="" {
local eq`index': list eq`index' - passive
}
mac shift
}
}
}
if "`substitute'"!="" {
forvalues i=1/`nvar' {
if `main`i'' & "`sub`i''"!="" {
* substitute for this variable in all equations where it is a covariate
forvalues j=1/`nvar' {
if `main`j'' & (`j'!=`i') & "`Eq`j''"=="" {
local res: list eq`j' - x`i'
* substitute sub`i' if necessary i.e. if not already there
tokenize `sub`i''
while "`1'"!="" {
cap ChkIn `1' "`res'"
if "`s(k)'"=="0" {
local res `res' `1'
}
mac shift
}
local eq`j' `res'
}
}
}
}
}
* Show prediction equations at first imputation
if "`first'"!="" {
local errs 0
local longstring 55 // max display length of variables in equation
local off 13 // blanks to col 13 on continuation lines
if "`showeq'"=="" {
noi di as text _n " Variable {c |} Command {c |} Prediction equation" _n ///
"{hline 12}{c +}{hline 9}{c +}{hline `longstring'}"
}
forvalues i=1/`nvar' {
if "`exp`i''"!="" & `nimp`i''>0 {
local eq "[Passively imputed from `exp`i'']"
local formatoutput 0
}
else if "`eq`i''"=="" {
local eq "[No missing data in estimation sample]"
local formatoutput 0
}
else {
local eq `eq`i''
local formatoutput 1
}
if "`showeq'"=="" {
if `formatoutput' {
formatline, n(`eq') maxlen(`longstring')
local nlines=r(lines)
forvalues j=1/`nlines' {
if `j'==1 noi di as text %11s abbrev("`x`i''",11) ///
" {c |} " %-8s "`cmd`i''" "{c |} `r(line`j')'"
else noi di as text _col(`off') ///
"{c |}" _col(23) "{c |} `r(line`j')'"
}
}
else noi di as text %11s abbrev("`x`i''",11) ///
" {c |} " %-8s "`cmd`i''" "{c |} `eq'"
}
// Check for invalid equation - xvar on both sides
if "`eq`i''"!="" {
if `: list x`i' in eq`i'' {
noi di as err "Error!" as inp " `x`i''" ///
as err " found on both sides of prediction equation"
local ++errs
}
}
}
if `errs' {
di as err _n `errs' " error(s) found. Consider using the passive() option to fix the problem"
exit 198
}
if "`dryrun'"!="" {
exit
}
noi di as text _n "Imputing " _cont
}
if `to_imp'==1 | "`on'"!="" {
local cycles 1
}
* Update recalculated variables
if `"`passive'"'!="" & `recalc'>0 {
forvalues i=1/`nvar' {
if "`exp`i''"!="" {
replace `x`i''=`exp`i''
}
}
}
* Impute sequentially `cycles' times by regression switching (van Buuren et al)
tempvar y imputed
* Sort variables on number of missing values, from low to high numbers.
* Of benefit to the mice algorithm since less missings get imputed first.
listsort "`nimp'"
forvalues i=1/`nvar' {
local r`i' `s(index`i')'
}
if `"`trace'"'!="" {
tempname tmp
* create names
local postvl cycle
forvalues r=1/`nvar' {
local i `r`r'' // antirank: vars with small #missing come first
if `main`i'' local postvl `postvl' `x`i''_mean
}
postfile `tmp' `postvl' using `"`trace'"', replace
}
forvalues j=1/`cycles' {
if `"`trace'"'!="" local posts (`j')
forvalues r=1/`nvar' {
local i `r`r'' // antirank, ensuring vars with small #missing come first
if `main`i'' {
* Each var is reimputed based on imputed values of other vars
local type: type `x`i''
gen `type' `y'=`x`i'' if `miss`i''==0 & `touse'==1
if "`on'"=="" {
local vars `eq`i''
}
else local vars `on'
* uvis is derived from uvisamp4.ado
cap uvis `cmd`i'' `y' `vars' `wgt' if `touse', ///
gen(`imputed') `boot`i'' `match`i'' `constant'
if _rc {
noi di as err _n(2) "Error running -uvis-"
noi di as err "I detected a problem with running uvis with command `cmd`i'' on response `x`i''"
noi di as err "and covariates `vars'."
if "`cmd`i''"=="mlogit" {
noi di as inp "The troublesome regression command is mlogit."
noi di as inp "Try reducing the number of categories of `x`i'' or using ologit if appropriate"
}
exit 198
}
if `"`trace'"'!="" {
summarize `imputed' if missing(`y') & `touse'==1
local mean=r(mean)
local posts `posts' (`mean')
/*
noi di as text %11s abbrev("`x`i''",10) %7.0g `mean' _cont
foreach v of var `vars' {
if "`v'"=="`x`i''" {
noi di as result " ." _cont
}
else noi di as result _skip(1) %7.0g _b[`v'] _cont
}
noi di
*/
}
replace `x`i''=`imputed'
drop `y' `imputed'
}
}
if `"`trace'"'!="" post `tmp' `posts'
if `recalc'>0 { // update covariates needing recalculation
forvalues i=1/`nvar' {
if "`exp`i''"!="" & `nimp`i''>0 {
replace `x`i''=`exp`i''
}
}
}
if `to_imp'==1 & "`first'"!="" {
noi di as text _n "[Only 1 variable to be imputed, therefore no cycling needed.]"
}
}
}
if `"`trace'"'!="" postclose `tmp'
* Save to file with cases in original order
quietly {
local impvl /* list of newvars containing imputations */
sort `order'
forvalues i=1/`nvar' {
return scalar ni`i'=`nimp`i''
if "`genmiss'"!="" {
cap drop `genmiss'`x`i''
rename `mvar`i'' `genmiss'`x`i''
}
if `main`i'' {
local impvl `impvl' `x`i''
lab var `x`i'' "`lab`i''"
cap drop `miss`i''
}
}
drop `touse'
if "`mitools'"=="" {
* Save list of imputed variables with imputations to char _dta[mi_ivar]
char _dta[mi_ivar] `impvl'
char _dta[mi_id] `id'
rename `order' `id'
return local impvl `impvl'
return scalar imputed=`to_imp'
}
else drop `order'
save `"`using'"', replace
}
end
*! v 1.0.0 PR 01Jun2001.
program define sampmis
version 7
* Duplicates nonmissing obs of `exp' into missing ones, in random order.
* This routine always reproduces the same sort order among the missings.
* Note technique to avoid Stata creating arbitrary sort order for missing
* observations of `exp'; affects entire reproducibility of mvi sampling.
syntax newvarname =/exp
quietly {
tempvar u
* Sort non-missing data at random, sort missing data systematically
gen double `u'=cond(missing(`exp'), _n, uniform())
sort `u'
count if !missing(`exp')
local nonmis=r(N)
drop `u'
local type: type `exp'
gen `type' `varlist'=`exp'
local blocks=int((_N-1)/`nonmis')
forvalues i=1/`blocks' {
local j=`nonmis'*`i'
local j1=`j'+1
local j2=min(`j'+`nonmis',_N)
replace `varlist'=`exp'[_n-`j'] in `j1'/`j2'
}
}
end
program define ChkIn, sclass
version 7
* Returns s(k) = index # of target variable v in varlist, or 0 if not found.
args v varlist
sret clear
local k: list posof "`v'" in varlist
sret local k `k'
if `s(k)'==0 {
di as err "`v' is not a valid covariate"
exit 198
}
end
*! version 1.0.0 PR 20dec2004.
program define ParsExp, sclass
version 8
tokenize `*', parse(" +-/^()[]{}.*=<>!$%&|~`'")
local vl
while "`1'"!="" {
cap confirm var `1'
if _rc==0 {
if index("`vl'", "`1'")==0 {
local vl `vl' `1'
}
}
mac shift
}
sreturn local result `vl'
end
program define detangle
version 8
/*
Disentangle varlist:string clusters---e.g. for DF.
Returns values in $S_*.
If `4' is null, `3' is assumed to contain rhs
and lowest and highest value checking is disabled.
Heavily based on frac_dis.ado, but "=" disallowed as separator
and "\" allowed (for use by passive()).
*/
args target tname rhs separator
if "`separator'"=="" {
local separator ","
}
unab rhs:`rhs'
local nx: word count `rhs'
forvalues j=1/`nx' {
local n`j': word `j' of `rhs'
}
tokenize "`target'", parse("`separator'")
local ncl 0 /* # of separator-delimited clusters */
while "`1'"!="" {
if "`1'"=="`separator'" {
mac shift
}
local ncl=`ncl'+1
local clust`ncl' "`1'"
mac shift
}
if "`clust`ncl''"=="" {
local --ncl
}
if `ncl'>`nx' {
di as err "too many `tname'() values specified"
exit 198
}
/*
Disentangle each varlist:string cluster
*/
forvalues i=1/`ncl' {
tokenize "`clust`i''", parse(":")
if "`2'"!=":" {
if `i'>1 {
noi di as err "invalid `clust`i'' in `tname'() (syntax error)"
exit 198
}
local 2 ":"
local 3 `1'
local 1
forvalues j=1/`nx' {
local 1 `1' `n`j''
}
}
local arg3 `3'
unab arg1:`1'
tokenize `arg1'
while "`1'"!="" {
ChkIn `1' "`rhs'"
local v`s(k)' `arg3'
mac shift
}
}
forvalues j=1/`nx' {
if "`v`j''"!="" {
global S_`j' `v`j''
}
else global S_`j'
}
end
*! Based on artformatnos.ado v 1.0.0 PR 26Feb2004
program define formatline, rclass
version 8
syntax, N(string) Maxlen(int) [ Format(string) Leading(int 1) Separator(string) ]
if `leading'<0 {
di as err "invalid leading()"
exit 198
}
if "`separator'"!="" {
tokenize "`n'", parse("`separator'")
}
else tokenize "`n'"
local n 0
while "`1'"!="" {
if "`1'"!="`separator'" {
local ++n
local n`n' `1'
}
macro shift
}
local j 0
local length 0
forvalues i=1/`n' {
*noi di in red "format=`format' i=`i' item=`n`i''"
if "`format'"!="" {
capture local out: display `format' `n`i''
if _rc {
di as err "invalid format attempted for: " `"`n`i''"'
exit 198
}
}
else local out `n`i''
if `leading'>0 {
local out " `out'"
}
local l1=length("`out'")
local l2=`length'+`l1'
if `l2'>`maxlen' {
local ++j
return local line`j'="`line'"
local line "`out'"
local length `l1'
}
else {
local length `l2'
local line "`line'`out'"
}
}
local ++j
return local line`j'="`line'"
return scalar lines=`j'
end
*! version 1.1.0 PR 02aug2005.
program define listsort, sclass
version 6
gettoken p 0 : 0, parse(" ,")
if `"`p'"'=="" {
exit
}
sret clear
syntax , [ Reverse Lexicographic ]
local lex="`lexicog'"!=""
if "`reverse'"!="" { local comp < }
else local comp >
local np: word count `p'
local i 1
while `i'<=`np' {
local p`i': word `i' of `p'
local index`i' `i'
if !`lex' { confirm number `p`i'' }
local i=`i'+1
}
* Apply shell sort (Kernighan & Ritchie p 58)
local gap=int(`np'/2)
while `gap'>0 {
local i `gap'
while `i'<`np' {
local j=`i'-`gap'
while `j'>=0 {
local j1=`j'+1
local j2=`j'+`gap'+1
if `lex' { local swap=(`"`p`j1''"' `comp' `"`p`j2''"') }
else local swap=(`p`j1'' `comp' `p`j2'')
if `swap' {
local temp `p`j1''
local p`j1' `p`j2''
local p`j2' `temp'
* swap indexes
local temp `index`j1''
local index`j1' `index`j2''
local index`j2' `temp'
}
local j=`j'-`gap'
}
local i=`i'+1
}
local gap=int(`gap'/2)
}
local p
local index
local i 1
while `i'<=`np' {
sret local i`i' `p`i''
sret local index`i' `index`i''
local p `p' `p`i''
local index `index' `index`i''
local i=`i'+1
}
/* Find antirank of each obs
forvalues i=1/`np' {
forvalues j=1/`np' {
if
*/
sret local list `p'
sret local index `index'
end
exit
sort `c'
local i 0
while `i'<`nx' {
local i=`i'+1
/*
Store positions of sorted predictors in user's list
*/
local j 0
while `j'<`nx' {
local j=`j'+1
if `i'==`n'[`j'] {
local r`j' `i'
local j `nx'
}
}
}

562
Modules/ado/plus/i/ice.hlp Normal file
View File

@ -0,0 +1,562 @@
{smcl}
{* 30aug2005}{...}
{hline}
help for {hi:ice}, {hi:uvis}{right:(SJ5-4: st0067_2; SJ5-2: st0067_1; SJ4-3: st0067)}
{hline}
{title:Multiple imputation by the MICE system of chained equations}
{p 8 17 2}
{cmd:ice}
{it:mainvarlist}
{cmd:using} {it:filename}[{cmd:.dta}]
{ifin}
{weight}
[{cmd:,}
{cmdab:bo:ot}[{cmd:(}{it:varlist}{cmd:)}]
{cmd:cc(}{it:varlist}{cmd:)}
{cmdab:cm:d(}{it:cmdlist}{cmd:)}
{cmdab:cy:cles(}{it:#}{cmd:)}
{cmdab:dry:run}
{cmd:eq(}{it:eqlist}{cmd:)}
{cmdab:g:enmiss(}{it:string}{cmd:)}
{cmdab:i:d(}{it:string}{cmd:)}
{cmd:m(}{it:#}{cmd:)}
{cmdab:ma:tch}[{cmd:(}{it:varlist}{cmd:)}]
{cmdab:nocons:tant}
{cmdab:nosh:oweq}
{cmd:on(}{it:varlist}{cmd:)}
{cmdab:pass:ive(}{it:passivelist}{cmd:)}
{cmdab:sub:stitute(}{it:sublist}{cmd:)}
{cmd:replace}
{cmdab:se:ed(}{it:#}{cmd:)}
{cmdab:tr:ace(}{it:filename}{cmd:)}]
{p 8 17 2}
{cmd:uvis}
{it:regression_cmd}
{it:yvar}
{it:xvarlist}
{ifin}
{weight}
{cmd:,}
{cmdab:g:en(}{it:newvarname}{cmd:)}
[{cmdab:bo:ot}
{cmdab:ma:tch}
{cmdab:nocons:tant}
{cmd:replace}
{cmdab:se:ed(}{it:#}{cmd:)}]
{p 4 4 2}
where
{p 8 8 2}
{it:regression_cmd} may be
{helpb logistic},
{helpb logit},
{helpb mlogit},
{helpb ologit},
or
{helpb regress}.
{p 4 4 2}
All weight types supported by {it:regression_cmd} are allowed; see {help weight}.
{title:Description}
{p 4 4 2}
{cmd:ice} imputes missing values
in {it:mainvarlist} by using switching regression, an iterative multivariable
regression technique. The abbreviation MICE means multiple imputation by
chained equations and was apparently coined by Steff van Buuren. {cmd:ice}
implements MICE for Stata. Sets of imputed and nonimputed variables are
stored to a new file called {it:filename}. Any number of complete imputations
may be created.
{p 4 4 2}
{cmd:uvis} (univariate imputation sampling) imputes missing values in the
single variable {it:yvar} based on multiple regression on {it:xvarlist}.
{cmd:uvis} is called repeatedly by {cmd:ice} in a regression switching mode to
perform multivariate imputation.
{p 4 4 2}
The missing observations are assumed to be missing at random (MAR) or
missing completely at random (MCAR), according to the jargon. See, for
example, van Buuren et al. (1999) for an explanation of these concepts.
{p 4 4 2}
Please note that {cmd:ice} and {cmd:uvis} require Stata 8 or later.
There have been incompatibility issues with Stata 7 and earlier.
{title:Options for ice}
{p 4 8 2}
{cmd:boot}[{cmd:(}{it:varlist}{cmd:)}] instructs that each member of
{it:varlist}, a subset of {it:mainvarlist}, be imputed with the {cmd:boot}
option of {cmd:uvis} activated. If {cmd:(}{it:varlist}{cmd:)} is omitted,
all members of {it:mainvarlist} with missing observations are imputed using
the {cmd:boot} option of {cmd:uvis}.
{p 4 8 2}
{cmd:cc(}{it:varlist}{cmd:)} prevents imputation of missing data in
{it:mainvarlist} for cases in which any member of {it:varlist} has a missing
value. "cc" signifies "complete case". Note that members of {it:varlist} are
used for imputation if they appear in {it:mainvarlist}, but not otherwise. Use
of this option is equivalent to entering {cmd:if}
{cmd:~missing(}{it:var1}{cmd:) &} {cmd:~missing(}{it:var2}{cmd:)} ..., where
{it:var1}, {it:var2}, ... denote the members of {it:varlist}.
{p 4 8 2}
{cmd:cmd(}{it:cmdlist}{cmd:)} defines the regression commands to be used for
each variable in {it:mainvarlist}, when it becomes the dependent variable in
the switching regression procedure used by {cmd:uvis} (see {hi:Remarks}). The
first item in {it:cmdlist} may be a command, such as {cmd:regress}, or may have
the syntax {it:varlist}{cmd::}{it:cmd}, specifying that command {it:cmd}
applies to all the variables in {it:varlist}. Subsequent items in
{it:cmdlist} must follow the latter syntax, and each item should be followed
by a comma.
{p 8 8 2}
The default {it:cmd} for a variable is {cmd:logit} when there are two distinct
values, {cmd:mlogit} when there are 3-5 and {cmd:regress} otherwise.
{p 8 18 2} Example: {cmd:cmd(regress)} specifies that all variables are
to be imputed by {cmd:regress}, overriding the defaults.
{p 8 18 2} Example: {cmd:cmd(x1 x2:logit, x3:regress)} specifies that
{cmd:x1} and {cmd:x2} are to be imputed by {cmd:logit}, {cmd:x3} by
{cmd:regress} and all others by their default choices.
{p 4 8 2}
{cmd:cycles(}{it:#}{cmd:)} determines the number of cycles of regression
switching to be carried out. The default is {cmd:cycles(10)}.
{p 4 8 2}
{cmd:dryrun} does a "dry run"; that is, {cmd:ice}
reports the prediction equations it has constructed from the various
inputs. No imputation is done, and no files are created. It is not
mandatory to specify an output file with {cmd:using} for a dry run.
Sometimes the prediction equation set-up needs to be carefully
checked before running what may be a lengthy imputation process.
{p 4 8 2}
{cmd:eq(}{it:eqlist}{cmd:)} allows one to define customized prediction
equations for any subset of variables in {it:mainvarlist}. The option,
particularly when used with {cmd:passive()}, allows
great flexibility in the possible imputation schemes. The
syntax of {it:eqlist} is {it:varname1}{cmd::}{it:varlist1}
[{cmd:,}{it:varname2}{cmd::}{it:varlist2} ...], where each
{it:varname#} (or {it:varlist#})
is a member (or subset) of {it:mainvarlist}. It is your responsibility to ensure
that each equation is sensible. {cmd:ice} places no restrictions
except to check that all variables mentioned are indeed in
{it:mainvarlist} and that an equation is not defined
for a variable specified to be passively imputed
(see the {cmd:passive()} option. Note that {cmd:eq()} takes
precedence over all default definitions and assumptions about
the way a given variable in {cmd:mainvarlist} will be imputed.
The default, if the {cmd:passive()} and {cmd:substitute()}
options are not invoked, is that each
variable in {it:mainvarlist} with any missing data is imputed from all
the other variables in {it:mainvarlist}.
{p 4 8 2}
{cmd:genmiss(}{it:string}{cmd:)} creates an indicator variable for the
missingness of data in any variable in {it:mainvarlist} for which at least one
value has been imputed. The indicator variable is set to missing for
observations excluded by {cmd:if}, {cmd:in}, etc. The indicator variable for
{it:xvar} is named {it:string}{it:xvar}.
{p 4 8 2}
{cmd:id(}{it:string}{cmd:)} creates a variable called {it:string} containing
the original sort order of the data. The default {it:string} is {cmd:_i}.
{p 4 8 2}
{cmd:m(}{it:#}{cmd:)} defines {it:#} as the number of imputations required
(minimum 1, no upper limit). The default is {cmd:m(1)}.
{p 4 8 2}
{cmd:match}[{cmd:(}{it:varlist}{cmd:)}] instructs that each member of
{it:varlist} be imputed with the {cmd:match} option of {cmd:uvis}.
This provides prediction matching for each member of {it:varlist}.
If {cmd:(}{it:varlist}{cmd:)} is omitted then all relevant variables are
imputed with the {cmd:match} option of {cmd:uvis}. The default, if
{cmd:match()} is not specified, is to draw from the posterior
predictive distribution of each variable requiring imputation.
{p 4 8 2}
{cmd:noconstant} suppresses the regression constant in all regressions.
{p 4 8 2}
{cmd:noshoweq} suppresses the presentation of the prediction equations.
{p 4 8 2}
{cmd:on(}{it:varlist}{cmd:)} changes the operation of {cmd:ice} in a major
way. With this option, {cmd:uvis} imputes each member of {it:mainvarlist}
univariately on {it:varlist}. This provides a convenient way of producing
multiple imputations when imputation for each variable in {it:mainvarlist} is
to be done univariately on a set of complete predictors.
{p 4 8 2}
{cmd:passive(}{it:passivelist}{cmd:)} allows the use of "passive" imputation
of variables that depend on other variables, some of which are imputed.
The syntax of {it:passivelist} is {it:varname}{cmd::}{it:exp}
[{cmd:\}{it:varname}{cmd::}{it:exp} ...]. Notice the requirement to use
"\" as a separator between items in {it:passivelist}, rather than the usual comma;
the reason is that a comma may be a valid part of an expression.
The option is most easily explained by example. Suppose x1 is a categorical variable
with 3 levels, and that two dummy variables x1a, x1b have been created by the commands
{p 8 8 2}
{cmd:. generate byte x1a=(x1==2)}{break}
{cmd:. generate byte x1b=(x1==3)}
{p 8 8 2}
Now suppose that x1 is to be imputed by the {cmd:mlogit} command and is
to be treated as the two dummy variables x1a and x1b when predicting other
variables. Use of {cmd:mlogit} is achieved by the option
{cmd:cmd(x1:mlogit)}. When x1 is imputed, we want x1a and x1b to be updated
with new values which depend on the imputed values of x1. This may be
achieved by specifying {cmd:passive(x1a:x1==2 \ x1b:x1==3)}. It is necessary
also to remove x1 from the list of predictors when variables other than x1 are
being imputed, and this is done by using the {cmd:substitute()} option; in the
present example, you would specify {cmd:substitute(x1:x1a x1b)}.
{p 8 8 2}
Note that although in this example x1a will take the (possibly
unintended) value of 0 when x1 is missing, {cmd:ice} is careful to
ensure that x1a (and x1b) inherit the missingness of x1 and are
passively imputed following active imputation of missing values
of x1. If this were not done, incorrect results could occur. The
responsibility of the user is to create x1a and x1b before running
{cmd:ice} such that their missing values are identical
to those of x1.
{p 8 8 2}
A second example is multiplicative interactions between variables, for
example, between x1 and x2 (e.g., x12=x1*x2); this could be entered as
{cmd:passive(x12:x1*x2)}. It would cause the interaction term
x12 to be omitted when either x1 or x2 was being imputed, since it would
make no sense to impute x1 from its interaction with x2.
{cmd:substitute()} is not needed here.
{p 8 8 2}
It should be stressed that variables to be imputed passively must already
exist and must be included in {it:mainvarlist}; otherwise, they will not be
recognized.
{p 4 8 2}
{cmd:substitute(}{it:sublist}{cmd:)} is typically used with the
{cmd:passive()} option to represent multilevel categorical variables
as dummy variables in models for predicting other variables. See
{cmd:passive()} for more details. The syntax of {it:sublist} is
{it:varname}{cmd::}{it:dummyvarlist}
[{cmd:,}{it:varname}{cmd::}{it:dummyvarlist} ...], where {it:varname} is the
name of a variable to be substituted and {it:dummyvarlist} is the list of
dummy variables representing it.
{p 4 8 2}
{cmd:replace} permits {it:filename} to be overwritten with new data.
{p 4 8 2}
{cmd:seed(}{it:#}{cmd:)} sets the random-number seed to {it:#}.
To reproduce a set of imputations, the same random-number seed should be used.
The default is {cmd:seed(0)}, meaning no seed is set by the program.
{p 4 8 2}
{cmd:trace(}{it:filename}{cmd:)} monitors the convergence of the imputation
algorithm. For each original variable with missing values, the mean of the
imputed values is stored as a variable in {it:filename}, together
with the cycle number at which that
mean was calculated. The results are stored only for the final imputation.
For diagnostic purposes, it is sensible to run {cmd:trace()}
with {cmd:m(1)} and many cycles, such as {cmd:cycles(100)}.
When the run is complete, it is helpful to load {it:filename}
into memory and plot the mean for each imputed
variable against the cycle number. If necessary, smoothing may be applied
to clarify any apparent pattern. Convergence is judged to have occurred
when the pattern of the imputed means is random.
The number of cycles needed for convergence is usually obvious from the appearance
of the plot.
{title:Options for uvis}
{p 4 8 2}
{cmd:gen(}{it:newvar}{cmd:)} is not optional. {it:newvar} contains original
(nonmissing) and imputed (originally missing) values of {it:yvar}.
{p 4 8 2}
{cmd:boot} invokes a bootstrap method for creating imputed values (see Remarks).
{p 4 8 2}
{cmd:match} creates imputations by prediction matching. The default is to draw
imputations at random from the posterior distribution of the missing values of
{it:yvar}, conditional on the observed values and the members of
{it:xvarlist}. See Remarks for further details.
{p 4 8 2}
{cmd:noconstant} suppresses the regression constant in all regressions.
{p 4 8 2}
{cmd:replace} permits {it:newvar} (see {cmd:gen(}{it:newvar}{cmd:)})
to be overwritten with new data. {cmd:replace} may not be abbreviated.
{p 4 8 2}
{cmd:seed(}{it:#}{cmd:)} sets the random-number seed to {it:#}.
See {hi:Remarks} for comments on how to ensure reproducible imputations
by using the {cmd:seed()} option.
The default is {cmd:seed(0)}, meaning no seed is set by the program.
{title:Remarks}
{p 4 4 2}
{cmd:uvis} imputes {it:yvar} from {it:xvarlist} according to the following
algorithm (see van Buuren et al. (1999, section 3.2) for further technical
details):
{p 8 12 2}
1. Estimate the vector of coefficients (beta) and the residual variance
by regressing the nonmissing values of {it:yvar} on the current "completed"
version of {it:xvarlist}. Predict the fitted values {it:etaobs} at the
nonmissing observations of {it:yvar}.
{p 8 12 2}
2. Draw at random a value (sigma_star) from the posterior distribution of the
residual standard deviation.
{p 8 12 2}
3. Draw at random a value (beta_star) from the posterior distribution of beta,
allowing, through sigma_star, for uncertainty in beta.
{p 8 12 2}
4. Use beta_star to predict the fitted values {it:etamis}
at the missing observations of {it:yvar}.
{p 8 12 2}
5. The imputed values are predicted directly from beta_star, sigma_star and
the covariates. When imputation is by linear regression ({cmd:regress}
command), this step assumes that {it:yvar} is Normally distributed, given the
covariates. For other types of imputation, samples are drawn from the
appropriate distribution.
{p 4 4 2}
With the {cmd:match} option, step 5 is replaced by the following.
For each missing observation of {it:yvar} with prediction {it:etamis},
find the non-missing observation of {it:yvar} whose prediction
({it:etaobs}) on observed data is closest to {it:etamis}. This closest
non-missing observation is used to impute the missing value of {it:yvar}.
{p 4 4 2}
The default draw method is not robust to departures from Normality and
may produce implausible imputations. For example, if the original distribution
is skew and positive-valued, the imputed distribution will not necessarily
have the appropriate amount of skewness, nor will all the imputed values
necessarily be positive. Log transformation of positive variables may greatly
improve the appropriateness of the imputations.
{p 4 4 2}
The alternative {cmd:match} method is recommended only for continuous variables
when the Normality assumption is clearly untenable, even approximately.
It is not necessary, nor is it recommended, for binary, ordered categorical or
nominal variables. {cmd:match} may work well when the distribution of a
continuous variable is very non-Normal, but it may sometimes result in biased
imputations.
{p 4 4 2}
With the {cmd:boot} option, steps 2-4 are replaced by a bootstrap estimation of
beta_star; beta_star
is estimated by regressing {it:yvar} on {it:xvarlist} after taking a bootstrap sample
of the non-missing observations. This has the advantage of robustness since the
distribution of beta is no longer assumed to be multivariate normal.
{p 4 4 2}
Note that {cmd:uvis} will not impute observations for which a value
of a variable in {it:xvarlist} is missing. However, all original
(missing or nonmissing) observations of {it:yvar} will be copied into
{it:newvarname} in such cases. This is a change from the first release of
{cmd:uvis} (with {cmd:mvis}). Previously, {it:newvarname} would be set to
missing whenever a value of a variable in {it:xvarlist} was missing,
irrespective of the value of {it:yvar}.
{p 4 4 2}
Missing data for ordered (or unordered) categorical covariates should
be imputed by using the {cmd:ologit} (or {cmd:mlogit}) command. In these cases,
prediction matching is done on the scale of the mean absolute difference
in the predicted class probabilities, preceded by logit transformation.
{p 4 4 2}
{cmd:ice} carries out multivariate imputation in {it:mainvarlist} using
regression switching (van Buuren et al. 1999) as follows:
{p 8 12 2}
1. Ignore any observations for which {it:mainvarlist} has only missing values,
or if the {cmd:ccvarlist(}{it:varlist}{cmd:)} option has been specified, for
which any member of {it:varlist} has a missing value.
{p 8 12 2}
2. For each variable in {it:mainvarlist} with any missing data, randomly order
that variable and replicate the observed values across the missing cases.
This step initializes the iterative procedure by ensuing that no relevant
values are missing.
{p 8 12 2}
3. For each variable in {it:mainvarlist} in turn, impute missing values by
applying {cmd:uvis} with the remaining variables as covariates.
{p 8 12 2}
4. Repeat step 3 {cmd:cycles()} times, replacing the imputed values with updated
values at the end of each cycle.
{p 4 4 2}
A single imputation sample is created for each variable with any relevant
missing values.
{p 4 4 2}
Van Buuren recommends {cmd:cycles(20)} but goes on to say that 10 or even 5
iterations are probably sufficient. We have chosen a compromise default of 10.
{p 4 4 2}
"Multiple imputation" (MI) implies the creation and analysis of several
imputed datasets. To do this, one would run {cmd:ice} with {it:m} set
to a suitable number, for example 5. To obtain final estimates
of the parameters of interest and their standard errors,
one would fit a model in
each imputation and carry out the appropriate post-MI averaging procedure
on the results from the {it:m} separate imputations. A suitable
estimation tool for this purpose is {helpb micombine}.
{title:Handling categorical variables}
{p 4 4 2}
Binary variables present no difficulty: by default, in the MICE
procedure, when such a variable is the response, it is
predicted from other variables by using logistic regression;
when it is a covariate, it is modeled in the only way possible,
effectively as a single dummy variable. Categorical variables with 3 or
more levels may in principle be treated in different ways.
By default, in {cmd:ice} variables with 3-5 levels are modeled
using multinomial logistic regression ({cmd:mlogit} command) when
the response, and as a single linear term when a covariate. The
same behavior occurs with the ordered logistic model ({cmd:ologit}
command), requested via the {cmd:cmd()} option. The use of dummy variables
instead of a single linear term may be imposed as described under
the {cmd:passive()} option. The requisite dummy variables
must be created before {cmd:ice} is invoked. Variables with 6 or
more levels are treated as ordered and continuous, but again
different choices may be imposed by use of the {cmd:cmd()},
{cmd:passive()} and {cmd:substitute()} options.
{p 4 4 2}
You should be aware that
unless the dataset is large, use of the {cmd:mlogit} command may produce
unstable estimates if the number of levels is too large, and
may compromise the accuracy of the imputations. It is hard to
predict when this will occur.
{p 4 4 2}
Note that due to a peculiarity of the way the {cmd:mlogit} command works,
variables with score labels cause problems to {cmd:ice}
and {cmd:uvis} when missing data are imputed using {cmd:mlogit}.
Score labels for such variables are removed in the file of imputed
data. See also the related comment on {hi:Postestimation prediction} in
{helpb micombine}.
{title:Further notes}
{p 4 4 2}
{cmd:ice} determines the order of imputing variables in the round
of chained equations according to the amount of missing data.
Variables with the least missingness are imputed first.
{p 4 4 2}
An important application of MI is to investigate possible models, for example
prognostic models, in which selection of influential variables is required
(Clark and Altman 2003). For example, the stability of the final model across
the imputation samples is of interest. This area of inquiry is in its infancy.
{p 4 4 2}
In survival analysis, it is recommended to include the censoring indicator
and the log of the survival time in the variables to be used for imputation.
Van Buuren et al. (1999) give a detailed discussion of the different types
of covariate that can be included in the imputation model and discuss the
important issue of how to deal with variables which are missing completely at
random (MCAR), missing at random (MAR), and missing not at random (MNAR).
{p 4 4 2}
See also Van Buuren's web site http://www.multiple-imputation.com for further
information and software sources.
{title:Examples}
{p 4 10 2}
{cmd:. uvis regress y x1 x2 x3, gen(ym)}
{p 4 10 2}
{cmd:. ice x1 x2 x3 using imputed, m(5)}
{p 4 10 2}
{cmd:. ice x1 x2 x3 using imputed, m(5) cycles(20) cc(x4 x5)}
{p 4 10 2}
{cmd:. ice x1-x5 using imputed, m(10) boot match(x1 x2 x3) cmd(x1 x2:mlogit, x3:ologit) id(pid) seed(101) genmiss(m_)}
{p 4 10 2}
{cmd:. ice x1 x1a x1b x2 x3 x23 using imputed, m(5) cmd(x1:ologit) passive(x1a:x1==2 \x1b:x1==3 \x23=x2*x3) substitute(x1:x1a x1b)}
{p 4 10 2}
{cmd:. ice y1 y2 y3 x1 x2 x3 x4 using imputed, m(5) eq(y1:x1 x2 y2, y2:y1 x3 x4, y3:y1 y2) match(y3)}
{title:Acknowledgement}
{p 4 4 2}
I am grateful to Gillian Raab for pointing out certain issues with the prediction
matching approach, particularly that it is only useful with continuous variables.
As a result, the default imputation method has been
changed from matching to drawing from the predictive distribution. Gillian also
suggested imputing the variables in reverse order of the amount of missingness,
and selecting the imputed value at random from the set determined by the available
matching predictions. Both suggestions have been implemented in this software update.
{title:Author}
{p 4 4 2}
Patrick Royston, MRC Clinical Trials Unit, London.{break}
patrick.royston@ctu.mrc.ac.uk
{title:References}
{p 4 8 2}
van Buuren S., H. C. Boshuizen and D. L. Knook. 1999. Multiple imputation of
missing blood pressure covariates in survival analysis.
{it:Statistics in Medicine} {cmd:18}:681-694.
Also see http://www.multiple-imputation.com.
{p 4 8 2}
Carlin J. B., N. Li, P. Greenwood, and C. Coffey. 2003. Tools for analyzing
multiple imputed datasets. {it:Stata Journal} 3(3): 226-244.
{p 4 8 2}
Clark T. G. and D. G. Altman. 2003. Developing a prognostic model
in the presence of missing data: an ovarian cancer case-study.
{it:Journal of Clinical Epidemiology} 56: 28-37.
{p 4 8 2}
Royston P. 2004. Multiple imputation of missing values.
{it:Stata Journal} 4(3): 227-241.
{title:Also see}
{psee}
Online: {helpb mijoin}, {helpb micombine}, {helpb mitools}, and related programs,
if installed
{p_end}

View File

@ -0,0 +1,53 @@
*! version 1.1.1 STB-35 sg65
program define iclassr
version 4.0
local varlist "req ex min(2) max(2)"
local if "opt"
local in "opt"
local weight "aweight"
local options "Center(string) Ems NOIsily"
parse "`*'"
parse "`varlist'", parse(" ")
local weight "[`weight'`exp']"
local wt : word 2 of `exp'
tempvar use
quietly {
mark `use' `if' `in'
markout `use' `varlist' `wt'
}
tempname gr df fm
if "`ems'" != "" {
preserve
qui keep if `use'
sort `2'
if "`wt'" == "" {
tempvar Wt
qui gen byte `Wt' = 1
local wt "`Wt'"
}
tempvar sw
qui by `2': gen double `sw' = sum(`wt')
qui summ `sw' if `2' < `2'[_n+1]
scalar `df' = _result(1) - 1
scalar `gr' = _result(1) * _result(3)
scalar `gr' = (`gr' - _result(3) - _result(4)*`df'/`gr')/`df'
capture `noisily' oneway `1' `2' `weight'
}
else {
capture `noisily' oneway `1' `2' `weight' if `use'
scalar `gr' = _result(1)/ (_result(3) + 1)
}
if _rc == 134 { error(134) }
scalar `df' = 1
if "`center'" == "mean" { scalar `df' = _result(5)/(_result(5)-2) }
else if "`center'" == "med" {
scalar `df' = invfprob(_result(3), _result(5), 0.5)
}
scalar `fm' = max(_result(6) - `df', 0)
global S_1 = `fm' / (`fm' + `df'* `gr')
global S_2 = `fm' / (`fm' + `df')
di _new in gr "Intra-`2' r =" in ye %7.4f $S_1 _new in gr /*
*/ "Estimated reliability of a `2' mean (n=" in ye %3.2f `gr' in gr /*
*/ ") =" in ye %7.4f $S_2
end

View File

@ -0,0 +1 @@
.h l1way

View File

@ -0,0 +1,38 @@
*! version 1.1.1 STB-35 sg65
program define iclassr2
version 4.0
local varlist "req ex min(2) max(2)"
local if "opt"
local in "opt"
local options "Center(string)"
parse "`*'"
parse "`varlist'", parse(" ")
local weight "[`weight'`exp']"
tempvar use
quietly {
mark `use' `if' `in'
markout `use' `varlist'
tempname m k f
tempvar tt
gen `tt' = `1' + `2' if `use'
summ `tt' /* `weight' */
if !_result(1) { error 2000 }
scalar `k' = _result(1)
scalar `f' = _result(4)
replace `tt' = `1' - `2' if `use'
summ `tt' /* `weight' */
scalar `f' = `f'/((`k'-1)*_result(4)/`k' + _result(3)*_result(3))
scalar `m' = 1
if "`center'" == "mean" { scalar `m' = `k'/(`k'-2) }
else if "`center'" == "med" {
scalar `m' = invfprob(`k'-1, `k', 0.5)
}
global S_1 = (`f' - `m') / (`f' + `m')
global S_2 = (`f' - `m') / `f'
}
di _new in gr "Intra-class r =" in ye %7.4f $S_1 in gr /*
*/ " Number of classes =", in ye `k' _new in gr /*
*/ "Estimated reliability of a class mean (n=2) =" in ye %7.4f $S_2
end

View File

@ -0,0 +1,48 @@
.-
help for ^iclassr2^
.-
Intra-class correlation for paired measures [STB-35 sg65]
-------------------------------------------
^iclassr^ response_var1 response_var2 [^if^ exp] [^in^ range] [^, c^enter^(^Fpos^)^]
Description
-----------
^iclassr2^ calculates the intra-class correlation for paired measurements or as-
sessments stored in response_var1 and response_var2. ^iclassr2^ also reports the
reliability of the mean of the two measurements as estimated from the Spearman-
Brown prophecy formula. Missing values are handled by casewise deletion.
@loneway@ also calculates intra-class correlations, but requires data in long,
rather than wide format. ^iclassr2^ is also much faster than ^loneway^, and offers
a small sample adjustment to the intra-class r; see the option ^center^, below.
Options
-------
^center^ chooses a reference point in the F distribution to center the observed
F statistic in estimating the intra-class correlation. The argument Fpos
may be 1 (the default), or "med" or "mean" to choose the median or the mean
of the appropriate F distribution. This option has little effect unless the
number of pairs is small.
Examples
--------
. ^iclassr2 judge1 judge2^ (inter-judge reliability)
. ^iclassr2 judge1 judge2, c(med)^ (intra-judge reliability, center the ob-
served F on its median)
Also see
--------
STB: sg65 (STB-35)
Manual: ^[R] loneway^
On-line: help for @loneway@

View File

@ -0,0 +1,188 @@
*! version 2.1 24 November 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2006-2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int 0)]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==0 {
local max=`nbitems'
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&`r(max)'!=1 {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"=="" {
local method pms
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

View File

@ -0,0 +1,68 @@
{smcl}
{* 30June2008}{...}
{hline}
help for {hi:imputeitems}{right:Jean-Benoit Hardouin}
{hline}
{title:Imputation of missing item responses}
{p 8 14 2}{cmd:imputeitems} {it:varlist} [{it:if}] [,{cmdab:pref:ix}({it:string}) {cmdab:meth:od}({it:string}) {cmdab:rand:om} {cmdab:max}({it:#})]
{title:Description}
{p 4 4 2}{cmd:imputeitems} imputes missing item responses by different ways : Item Mean Substitution (IMS), Person Mean Substitution (PMS), Corrected Item Mean Substiutution (CIM), Interitem Correlation Substitution (ICS), logistic model (LOG) and Worst Case (WORST). A random process can be added to several methods.
{title:Options}
{p 4 8 2}{cmd:prefix} defines the prefix to use to name the imputted variables (this prefix is followed by the name of the initial variable). By default, this prefix is "imp".
{p 4 8 2}{cmd:method} defines the method to impute missing data :
{p 8 8 2}{it:pms} computes the proportion of positive response of each individual on non missing items, and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:ims} computes the proportion of positive response to each items, and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:cim} computes the proportion of positive response to each items, corrected by the ability of the individual and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:ics} searchs for each item the more correlated item and replaces a missing data by the data of this more correlated item (if the other response is missing too, there is no imputation),
{p 8 8 2}{it:log} explains the responses of each item by a logistic model where the independent variables are the responses to the others items. Only significant variables are rettained (5%). These methods impute a deterministic result (if p<.5 then 0, else 1) [{it:log}] to missing responses (if the response to an independant variable is missing, there is no imputation),
{p 8 8 2}{it:worst} replaces the missing data by a 0.
{p 4 8 2}{cmd:random} adds a random effect to the imputation process (available only with {it:pms}, {it:ims}, {it:cim} or {it:log}). In these cases, the imputed value is randomly drawed from a binomial distribution using the parameter p.
{p 4 8 2}{cmd:max} allows imputing missing values only for individuals with a maximal number of missing values defined with this option.
{p 4 8 2}By default, {it:pms} method is working.
{p 4 8 2}Old names of methods ({it:bip}, {bii}, {it:bic} and {it:bil} continues to run. They actually correspond to the add of the {cmd:random} option to the {it:pms}, {it:ims}, {it:cim} and {it:log} methods.
{title:Example}
{cmd:. imputeitems itemA*} /*PMS method, IMP prefix*/
{cmd:. imputeitems itemA*, prefix(cim) method(cim)}
{cmd:. imputeitems itemA*, method(log) random}
{title:Reference}
{p 4 8 2}{cmd:Huisman M.} (2000), Imputation of missing item responses: some simple techniques. {it: Quality & Quantity}, {cmd:34}, 331-351.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}

View File

@ -0,0 +1,78 @@
program def inslist, rclass
*! NJC 1.1.0 14 December 2000
* NJC 1.0.0 7 November 2000
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
local nlist : word count `list'
syntax , Insert(string) Pos(numlist sort int >=-`nlist' <=`nlist') /*
*/ [ Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
local np1 = `nlist' + 1
tknz `pos' `np1', s(p)
local np : word count `pos'
* negative indexes to positive
local i = 1
while `p`i'' < 0 {
local p`i' = `nlist' + 1 + `p`i''
local i = `i' + 1
}
local nins : word count `insert'
if `nins' < `np' {
local rep = 1 + int( `np' / `nins')
local insert : di _dup(`rep') "`insert' "
local nins : word count `insert'
}
tknz `insert', s(i)
local j = 1
while `p`j'' == 0 {
local newlist "`newlist'`i`j'' "
local j = `j' + 1
}
tokenize `list'
local i = 1
while `i' <= `nlist' {
local newlist "`newlist'``i'' "
while `i' == `p`j'' & `j' <= `np' {
local newlist "`newlist'`i`j'' "
local j = `j' + 1
}
local i = `i' + 1
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end
program def tknz, rclass
* NJC 1.1.0 2 June 2000
version 6.0
gettoken list 0 : 0, parse(",")
syntax , Stub(str) [ * ]
tokenize `"`list'"' , `options'
local i = 1
while "``i''" != "" {
c_local `stub'`i' `"``i''"'
local i = `i' + 1
}
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,27 @@
*! NJC 1.0.0 20 Sept 2005
program isvar, rclass
version 8
syntax anything
foreach v of local anything {
capture unab V : `v'
if _rc == 0 local varlist `varlist' `V'
else local badlist `badlist' `v'
}
di
if "`varlist'" != "" {
local n : word count `varlist'
local what = plural(`n', "variable")
di as txt "{p}`what': " as res "`varlist'{p_end}"
return local varlist "`varlist'"
}
if "`badlist'" != "" {
local n : word count `badlist'
local what = plural(`n', "not variable")
di as txt "{p}`what': " as res "`badlist'{p_end}"
return local badlist "`badlist'"
}
end

View File

@ -0,0 +1,56 @@
{smcl}
{* 20sep2005}{...}
{hline}
help for {hi:isvar}
{hline}
{title:Filter names into variable names and others}
{p 8 17 2}
{cmdab:isvar} {it:possiblevarlist}
{title:Description}
{p 4 4 2}
{cmd:isvar} takes a list of names that might name variables in
your dataset and filters it into a list of those names that are indeed
variable names and a list of the others.
{p 4 4 2}
One application of {cmd:isvar} arises when you are moving between similar
datasets, especially if they are large, but are not sure which variables are
defined in which dataset. Commands such as {help describe} would fail at the
first name not in fact a variable name. {cmd:isvar} offers a more direct way to
establish existence or non-existence of several possible variables.
{title:Saved results}
{p 4 8 2}r(varlist) names of variables in current dataset{p_end}
{p 4 8 2}r(badlist) names that do not correspond to variables in current dataset
{title:Examples}
{p 4 8 2}{cmd:. isvar mpg rep78 rep77}{p_end}
{p 4 8 2}{cmd:. local OKlist "`r(varlist)'"}{p_end}
{p 4 8 2}{cmd:. su `OKlist'}
{title:Author}
{p 4 4 2}Nicholas J. Cox, Durham University, U.K.{break}
n.j.cox@durham.ac.uk
{title:Acknowledgements}
{p 4 4 2}This problem was suggested by Amadou Diallo.
{title:Also see}
{p 4 13 2}
Online: help for {help describe}; {help unab}