File restructure #2

This commit is contained in:
2024-04-19 16:48:36 +02:00
parent ecac05b9c4
commit 0e9e01eca8
702 changed files with 272561 additions and 1 deletions

View File

@ -0,0 +1,235 @@
*! version 1.0.2 13oct2009 caplog by roywada@hotmail.com
*! captures a log file, possibly for use with logout or dataout
program define caplog
version 6
local logfile : log
version 7
* invisible to Stata 7
local Version7 ""
cap local Version7 `c(stata_version)'
if "`Version7'"=="" {
* it is version 7
local bind ""
*noi di in yel "limited functions under Stata 7"
}
else if `Version7'>=8.2 {
version 8.2
local bind "bind"
}
*qui log query
*if `"`r(status)'"'=="on" {
* qui log close
* local filename `"`r(filename)'"'
*}
if `"`logfile'"'~="" {
di ""
qui log close
local filename `"`logfile'"'
}
* embbed to avoid log being open
_caplog `0'
cap log close
*** c_locals coming back
* clickables
if "`tempfile'"~="tempfile" {
if "`smcl'"=="" {
local cl_text `"{browse `"`using1'"'}"'
noi di as txt `"`cl_text'"'
}
else {
local cl_text `"{stata `"view `using1'"':`using1'}"'
noi di as txt `"`cl_text'"'
}
}
cap log close
*if `"`filename'"'~="" {
* log using `"`filename'"', append
*}
end
********************************************************************************************
program define _caplog
version 7
local Version7 ""
cap local Version7 `c(stata_version)'
if "`Version7'"=="" {
* it is version 7
local bind ""
*noi di in yel "limited functions under Stata 7"
}
else if `Version7'>=8.2 {
version 8.2
local bind "bind"
}
* encase the colon in file name in quotes, avoiding string function length limits
local behind `"`0'"'
local 0 ""
gettoken front behind: behind, parse(" ,")
local 0 ""
local done 0
while `"`front'"'~="" & `done'==0 {
if `"`front'"'=="using" {
gettoken rest behind: behind, parse(" ,")
* strip off quotes
gettoken first second: rest, parse(" ")
cap local rest: list clean local(rest)
* take off colon at the end
local goldfish ""
if index(`"`rest'"',":")~=0 {
local end=substr(`"`rest'"',length(`"`rest'"'),length(`"`rest'"'))
if "`end'"==":" {
local rest=substr(`"`rest'"',1,`=length(`"`rest'"')-1')
local goldfish " : "
}
}
* colon reattached with a space at the end
* .txt attached here, SMCL TO BE FIXED LATER
local rabbit `"""'
if index(`"`rest'"', ".")==0 {
local using `"`rabbit'`rest'.txt`rabbit'`goldfish'"'
}
else {
local using `"`rabbit'`rest'`rabbit'`goldfish'"'
}
local 0 `"`0' using `using' `behind'"'
local done 1
}
else {
local 0 `"`0' `front'"'
gettoken front behind: behind, parse(" ,")
}
}
gettoken first second : 0, parse(":") `bind' match(par) quotes
local 0 `"`first'"'
while `"`first'"'~=":" & `"`first'"'~="" {
gettoken first second : second, parse(":") `bind' match(par) quotes
}
if `"`0'"'==":" {
* colon only when shorthand combined with prefix
local 0
}
else {
local _0 `"`0'"'
}
*** shorthand syntax if [using] is missing
syntax using/ [, replace append tempfile text smcl subspace]
if "`smcl'"=="smcl" {
if index(`"`using'"', ".txt")~=0 {
local temp=substr(`"`using'"',1,length(`"`using'"')-4)
local using `"`temp'.smcl"'
}
}
if "`text'"~="" & "`smcl'"~="" {
di "cannot choose both {opt text} and {opt smcl}"
exit 198
}
if "`text'"=="" & "`smcl'"=="" {
local text "text"
}
cap confirm file `"`using'"'
if !_rc & "`replace'"~="replace" & "`append'"~="append" {
* it exists
noi di in red `"`using' exists; specify {opt replace} or {opt append}"'
exit 198
}
* goes with `second'
if `"`second'"'~="" {
local _colon ":"
}
qui {
if "`subspace'"=="subspace" {
* fix the gaps in the value labels
ds8
foreach var in `r(varlist)'{
local temp : var label `var'
local temp = subinstr(`"`temp'"'," ","_",.)
label var `var' `"`temp'"'
}
}
}
* regular stuff
if `"`using'"'~="" {
* prefix use using file
qui log using `"`using'"', `replace' `append' `text' `smcl'
`second'
}
else {
* prefix use temp file
qui log using `"`using'"', `replace' `append' `text' `smcl'
`second'
}
* clickables
c_local smcl "`smcl'"
c_local using1 `"`using'"'
c_local tempfile `"`tempfile'"'
end
********************************************************************************************
*** ripped from outreg2 Mar 2009
program define ds8
* get you the list of variable like -ds- does for version 8
version 7.0
qui ds
if "`r(varlist)'"=="" {
local dsVarlist ""
foreach var of varlist _all {
local dsVarlist "`dsVarlist' `var'"
}
c_local dsVarlist `dsVarlist'
}
else {
c_local dsVarlist `r(varlist)'
}
end
/*
* version 1.0.1 May2009 caplog by roywada@hotmail.com
smcl accepted
version control fixed
1.0.2 close the log file at the end to avoid the possibility of it being left open

View File

@ -0,0 +1,64 @@
{smcl}
{* 13oct2009}{...}
{cmd:help caplog}
{hline}
{title:Title}
{p2colset 5 16 22 2}{...}
{p2col :{hi: caplog} {hline 2}}captures a ASCII log file (for use with {cmd:{help logout}}){p_end}
{marker s_Syntax}
{title:Syntax}
{p 4 4 6}
{cmdab:caplog} using filename [, {it:options}] : command
{marker s_Description}
{title:Description}
{p 4 4 6}
{cmd:caplog} provides a fast and easy way to capture text-based log file, possibly for use with
{cmd:{help logout}}. The default is text log files.
{title:Command}
{p 4 12 6}Any command accepted, i.e. { help estimation commands}, tabulation, summary, descrition, etc.
{marker s_Options}
{title:Options}
{dlgtab:Main}
{p 4 12 6}{opt replace} Replace pre-exiting log files.{p_end}
{p 4 12 6}{opt append} Append. {p_end}
{p 4 12 6}{opt smcl} Save smcl file instead of text file. {p_end}
{marker s_0}
{title:Examples}
{p 4 4 6}{cmd:* table}{p_end}
{p 4 4 6}{stata sysuse auto, clear}{p_end}
{p 4 4 6}{stata `"caplog using mystuff.txt, replace: table trunk rep78 , c(n mpg mean mpg sd mpg median mpg)"'}{p_end}
{p 4 4 6}{stata `"caplog using mystuff.txt, append: table trunk rep78 , c(n mpg mean mpg sd mpg median mpg)"'}{p_end}
{title:Remarks}
{p 4 12 6}PREVIOUSLY OPENED LOG FILES WILL BE CLOSED BY CAPLOG.{p_end}
{p 4 12 6}version 7: do not include colon characters (:) in the file path. Use -cd- instead.{p_end}
{title:Author}
{p 4 4 6}Roy Wada{p_end}
{p 4 4 6}roywada@hotmail.com{p_end}

View File

@ -0,0 +1,167 @@
*! 1.0.1 - allow either y() or choice(); yrank() or rank()
* 1.0.0 - revise option names: case->casevar; id->case
* 0.2.4 - small text formatting error
* 0.2.3 - change _optnum to altnum and add as option
* 0.2.2 - allow id() to specify new variable instead of using _id - 15Jul2005
* 0.2.1 - change specification of csv from varlist to case() - 11Jul2005
program define case2alt
version 9
syntax , [y(varname numeric) YRank(name) case(name) ///
Generate(name) REplace CASEVars(namelist) Alt(namelist) noNames altnum(name) ///
Choice(name) Rank(name)] // allow choice() and rank() to sub for y() and yrank()
** CHANGE 9/8/05: allow choice() instead of y() and allow rank() instead of yrank()
if "`choice'" != "" {
local y "`choice'"
local choice ""
}
if "`rank'" != "" {
local yrank "`rank'"
local rank ""
}
* either y() or yrank() must be specified
if "`y'" == "" & "`yrank'" == "" {
di as err "option y() or yrank() must be specified"
exit 198
}
* figure out if rankdata is being specified
local rankdata "yes"
if "`y'" != "" {
local rankdata "no"
}
if "`rankdata'" == "no" & "`generate'" == "" & "`replace'" == "" {
di as txt "(note: " as res "choice " as txt "used for outcome since no variable specified by gen())"
confirm new variable choice
local generate "choice"
}
local choice "`generate'"
if "`case'" == "" {
di as txt "(note: variable " as res "_id" as txt " used since case() not specified)"
confirm new variable _id
gen _id = _n
local case "_id"
}
if "`altnum'" == "" {
di as txt "(note: variable " as res "_altnum" as txt " used since altnum() not specified)"
confirm new variable _altnum
local altnum "_altnum"
}
capture confirm variable `case'
if _rc != 0 {
gen `case' = _n
}
if "`rankdata'" == "no" {
_pecats `y'
local catval = r(catvals)
local catnms = r(catnms)
local numcats = r(numcats)
forvalues i = 1(1)`numcats' {
local y`i' : word `i' of `catval'
local ynm`i' : word `i' of `catnms'
confirm integer number `y`i''
gen _ytemp`y`i'' = 1
}
local ytemp "_ytemp"
}
if "`rankdata'" == "yes" {
capture label drop `altnum'
foreach rvar of varlist `yrank'* {
local rvarnum = subinstr("`rvar'", "`yrank'", "", 1)
local rvarnumchk = real("`rvarnum'")
if "`rvarnum'" == "`rvarnumchk'" {
local rvarlabel : variable label `rvar'
label define `altnum' `rvarnum' "`rvarlabel'", modify
}
}
}
qui reshape long `ytemp' `yrank' `alt', i(`case') j(`altnum')
capture drop _ytemp
if "`rankdata'" == "no" {
tempvar ychosen
gen `ychosen' = `y'
if "`replace'" == "replace" {
drop `y'
local choice "`y'"
}
gen `choice' = (`altnum' == `ychosen') if `altnum' < .
}
if "`rankdata'" == "yes" {
label define `altnum', modify
label values `altnum' `altnum'
_pecats `altnum'
local catval = r(catvals)
local catnms = r(catnms)
local numcats = r(numcats)
forvalues i = 1(1)`numcats' {
local y`i' : word `i' of `catval'
local ynm`i' : word `i' of `catnms'
confirm integer number `y`i''
}
* if generate option specified, rename stub to that
if "`generate'" != "" {
rename `yrank' `generate'
local yrank "`generate'"
}
}
forvalues i = 1(1)`numcats' {
local name = "y`y`i''"
if "`names'" != "nonames" & "`ynm`i''" != "`y`i''" {
local name "`ynm`i''"
}
qui gen ytemp`y`i'' = (`altnum' == `y`i'') if `altnum' < .
label variable ytemp`y`i'' "`ynm`i''"
if "`casevars'" != "" {
foreach var of varlist `casevars' {
qui gen `name'X`var' = `var' * ytemp`y`i''
}
}
local ylist "`ylist' `name'*"
clonevar `name' = ytemp`y`i''
drop ytemp`y`i''
}
* output
if "`rankdata'" == "no" {
di _n as txt "choice indicated by:" as res " `choice'"
}
if "`rankdata'" == "yes" {
di _n as txt "ranks indicated by:" as res " `yrank'"
}
di as txt "case identifier:" as res " `case'"
di as txt "case-specific interactions:" as res "`ylist'"
if "`alt'" != "" {
di as txt "alternative-specific variables:" as res " `alt'"
}
end

View File

@ -0,0 +1,121 @@
{smcl}
{* 05Dec2005jf}
{hline}
help for {hi:case2alt}{right:03Nov2005}
{hline}
{p 4 4 2}
{title:Convert data from one observation per case to one observation per alternative per case}
{p 4 12 2}{cmd:case2alt} {cmd:,}
{{cmdab:choice(}{it:varname}{cmd:)} | {cmdab:rank(}{it:stubname}{cmd:)}}
[{cmdab:a:lt(}{it:stubnames}{cmd:)}
{cmdab:casev:ars(}{it:varlist}{cmd:)}
{cmdab:case(}{it:varname}{cmd:)}
{cmdab:g:enerate(}{it:newvar}{cmd:)}
{cmdab:rep:lace}
{cmdab:altnum(}{it:varname}{cmd:)}
{cmdab:non:ames}]
{title:Description}
{p 4 4 2}
{cmd:case2alt} is intended to be used to configure data for the estimation of estimation
of regression models for alternative-specific data, such as {helpb clogit},
{helpb rologit} or {helpb asmprobit}. {cmd:case2alt} presumes that you have data
where each observation corresponds to an individual case and that you want to convert
the data to the form in which each observation corresponds to an alternative for a specific case.
{p 4 4 2}
Imagine that you have data with an outcome that has four alternatives, with values 1, 2, 3 and 8.
{cmd:case2alt} will reshape the data so that there are n*4 observations. If you specify an
identifying variable with the {cmd:casevars()} option, this variable will continue to identify
unique cases; otherwise, new variable _id will identify cases.
{p 4 4 2}
A new variable, called either _altnum or the name specified in {cmd:altnum()},
will identify the alternatives within a case. Additionally, however, {cmd:case2alt}
will create dummy variables y{it:value} that also identify alternatives.
In our example the new dummy variables y1, y2, y3 and y8 will be created.
Interactions will also be created with these dummies and any variables
specified in {cmd:case()}. For the variable educ, {cmd:case2alt} will create
variables y1_educ, y2_educ, y3_educ, and y8_educ.
{p 4 4 2}
If we have simple choice variable, then {cmd:case2alt} will create an outcome
variable based on {cmd:y()} that contains 1 in the observation corresponding to
the selected alternative and 0 for other alternatives. We can specify the name of
this new outcome variable using the {cmd:gen()} option, or we can have it be the
same as {opt y()} using the {cmd:replace} option, or (by default) we
can have it be named choice.
{p 4 4 2}
After using {cmd:case2alt}, we would be able to estimate models like {helpb clogit} by typing, e.g.,:
{p 4 4 2}
{cmd:. clogit choice y1* y2* y3*, group(_id)}
{p 4 4 2}
Alternative-specific variables are specified using the {cmd:alt()} option.
The contents of {cmd:alt()} should be {it:stubnames}, corresponding to a series
of variables that contain the alternative-specific values.
Specifying {cmd:alt(time)}, in our example, would imply that there are
variables time1, time2, time3 and time8 in our case-level data.
{p 4 4 2}
Case-specific variables are specified using the {cmd:casevars()} option,
where the contents should be a {varlist}. Neither the outcome nor id variable
should be included in {cmd:casevars()}.
{p 4 4 2}
If we have ranked data, we can specify the ranked outcome with the
{cmd:rank()} outcome. The content of rank should again be a stubname.
Specifying {cmd:rank(}rank{cmd:)} in our example would assume there are
variables rank1, rank2, rank3, rank8 that contain the relevant information
on the ranks of each alternative.
{title:Options}
{p 4 8 2}{opth choice(varname)} or {opth rank(stubname)} is required. {varname} is the
variable that indicates the value of the selected alternative.
In the case of ranked outcome, {it:stubname} with {opt rank()} will contain
the stub for the names of variables that contain information about ranks of alternatives.
{p 4 8 2}{opth case(varname)} indicates the variable, either existing or to be
created, that identifies individual cases.
If {varname} is unspecified, a new variable named _id will be created.
{p 4 8 2}{cmd:alt(}{it:stubnames}{cmd:)} contains the {it:stubnames} for
alternative-specific variables. This requires that variables {it:stubname}# exist
for each value of an alternative.
{p 4 8 2}{opth casevars(varlist)} contains the names of the case-specific variables
(not including the id or outcome variable).
{p 4 8 2}{opth gen:erate(newvar)} and {opt replace} are used to name the variable that
contain 1 for the selected alternative and 0 for non-selected alternatives.
The variable will be named {newvar} if {newvar} is specified; the name of
the variable specified in {cmd: y()} if {opt replace} is specified; and will be named {it:choice} otherwise.
In the case of ranked data, the ranks will be contained in variable specified as the
stub in {opt yrank()} and {opt gen:erate()} or {opt replace} will be ignored.
{p 4 8 2}{opth altnum(varname)} indicates the name of the new variable used to indicate
the alternatives. _altnum will be used if altnum() is not specified.
{p 4 8 2}{opt non:ames} indicates that the case-specific interactions should be named
y# instead of using the value labels of the outcome variable.
{title:Example}
{p 4 4 2}{cmd:. use "http://www.stata-press.com/data/lfr/nomocc2.dta", clear}{break}
{cmd:. mlogit occ white ed exper}{break}
{cmd:. case2alt, choice(ed) casevars(white ed exper) replace nonames}{break}
{cmd:. clogit occ y1* y2* y3* y4*, group(_id)}{break}
{p 4 4 2}{cmd:. case2alt, rank(rank92) casevars(hntest) alt(rank04) case(id)}
{title:Authors}
Jeremy Freese and J. Scott Long
www.indiana.edu/~jslsoc/spost.htm
spostsup@indiana.edu

679
Modules/ado/plus/c/cfa1.ado Normal file
View File

@ -0,0 +1,679 @@
*! Confirmatory factor analysis with a single factor: v.2.2
*! Stas Kolenikov, skolenik-gmail-com
program define cfa1, eclass
version 9.1
if replay() {
if ("`e(cmd)'" != "cfa1") error 301
Replay `0'
}
else Estimate `0'
end
program Estimate `0', eclass
syntax varlist(numeric min=3) [if] [in] [aw pw / ] ///
, [unitvar FREE POSvar FROM(str) CONSTRaint(numlist) LEVel(int $S_level) ///
ROBust VCE(string) CLUster(passthru) SVY SEArch(passthru) * ]
* syntax: cfa1 <list of the effect indicators>
* untivar is for the identification condition of the unit variance of the latent variable
unab varlist : `varlist'
tokenize `varlist'
local q: word count `varlist'
marksample touse
preserve
qui keep if `touse'
* weights!
global CFA1N = _N
/*
* we'll estimate the means instead
qui foreach x of varlist `varlist' {
sum `x', meanonly
replace `x' = `x'-r(mean)
* deviations from the mean
}
*/
if "`weight'" != "" {
local mywgt [`weight'=`exp']
}
if "`robust'`cluster'`svy'`weight'"~="" {
local needed 1
}
else {
local needed 0
}
Parse `varlist' , `unitvar'
local toml `r(toml)'
if "`from'" == "" {
local from `r(tostart)', copy
}
* identification
constraint free
global CFA1constr `r(free)'
if "`unitvar'" ~= "" {
* identification by unit variance
constraint $CFA1constr [phi]_cons = 1
}
else if "`free'"=="" {
* identification by the first variable
constraint $CFA1constr [`1']_cons = 1
}
else {
* identification imposed by user
global CFA1constr
}
local nconstr : word count `constraint'
global CFA1PV = ("`posvar'" != "")
if "`posvar'" ~= "" {
di as text _n "Fitting the model without restrictions on error variances..."
}
* variance estimation
local vce = trim("`vce'")
if "`vce'" == "boot" local vce bootstrap
if "`vce'" == "sbentler" {
global CFA1SBV = 1
local vce
}
else {
if index("robustoimopg","`vce'") {
local vce vce(`vce')
}
else {
di as err "`vce' estimation is not supported"
if "`vce'" == "bootstrap" | "`vce'" == "boot" {
di as err "try {help bootstrap} command directly"
}
exit 198
}
}
tempname ilog1 ilog2
Tryit ml model lf cfa1_lf `toml' `mywgt', constraint($CFA1constr `constraint') ///
init (`from') maximize nooutput `options' `search' ///
`svy' `cluster' `robust' `vce'
/*
ml model lf cfa1_lf `toml' `mywgt', ///
constraint($CFA1constr `constraint') `svy' `robust' `cluster' ///
maximize
* ml check
* ml search, rep(5)
ml init `from', copy
cap noi ml maximize , `options'
*/
mat `ilog1' = e(ilog)
local nz = 0
if $CFA1PV {
* determine if refitting the model is needed
tempname ll_unr
scalar `ll_unr' = e(ll)
forvalues i=1/`q' {
if [``i''_v]_cons <0 {
constraint free
local ccc = `r(free)'
const define `ccc' [``i''_v]_cons = 0
local zerolist `zerolist' `ccc'
local ++nz
}
}
local zerolist = trim("`zerolist'")
global CFA1constr $CFA1constr `zerolist'
if "`zerolist'" ~= "" {
di as text _n "Fitting the model with some error variances set to zero..."
Tryit ml model lf cfa1_lf `toml' `mywgt' , constraint($CFA1constr `constraint') ///
init (`from') maximize nooutput `options' `search' ///
`svy' `robust' `cluster' `vce'
mat `ilog2' = e(ilog)
}
* adjust degrees of freedom!
}
* we better have this before Satorra-Bentler
if "`unitvar'" ~= "" {
ereturn local normalized Latent Variance
}
else if "`free'"=="" {
ereturn local normalized `1'
}
* work out Satorra-Bentler estimates
if "$CFA1SBV"!="" {
* repost Satorra-Bentler covariance matrix
tempname SBVar SBV Delta Gamma
cap SatorraBentler
if _rc {
di as err "Satorra-Bentler standard errors are not supported for this circumstance; revert to vce(oim)"
global CFA1SBV
}
else {
mat `SBVar' = r(SBVar)
mat `Delta' = r(Delta)
mat `Gamma' = r(Gamma)
mat `SBV' = r(SBV)
ereturn repost V = `SBVar'
ereturn matrix SBGamma = `Gamma', copy
ereturn matrix SBDelta = `Delta', copy
ereturn matrix SBV = `SBV', copy
ereturn local vce SatorraBentler
}
}
* get the covariance matrix and the number of observations!
***********************************************************
tempname lambda vars phi S Sindep Sigma trind eb
qui mat accum `S' = `varlist', dev nocons
mat `S' = `S' / $CFA1N
* implied matrix
mat `eb' = e(b)
mat `lambda' = `eb'[1,1..`q']
mat `vars' = `eb'[1,`q'+1..2*`q']
scalar `phi' = `eb'[1,3*`q'+1]
mat `Sigma' = `lambda''*`phi'*`lambda' + diag(`vars')
mat `Sindep' = diag(vecdiag(`S'))
* test against independence
mat `trind' = trace( syminv(`Sindep') * `S' )
local trind = `trind'[1,1]
ereturn scalar ll_indep = -0.5 * `q' * $CFA1N * ln(2*_pi) - 0.5 * $CFA1N * ln(det(`Sindep')) - 0.5 * $CFA1N * `trind'
ereturn scalar lr_indep = 2*(e(ll)-e(ll_indep))
ereturn scalar df_indep = `q'-`nz'-`nconstr'
ereturn scalar p_indep = chi2tail(e(df_indep),e(lr_indep))
* goodness of fit test
ereturn scalar ll_u = -0.5 * `q' * $CFA1N * ln(2*_pi) - 0.5 * $CFA1N * ln(det(`S')) - 0.5 * `q' * $CFA1N
ereturn scalar lr_u = -2*(e(ll)-e(ll_u))
ereturn scalar df_u = `q'*(`q'+1)*.5 - (2*`q' - `nz' - `nconstr')
* wrong if there are any extra constraints in -constraint- command!!!
ereturn scalar p_u = chi2tail(e(df_u),e(lr_u))
ereturn matrix ilog1 `ilog1'
cap ereturn matrix ilog2 `ilog2'
* Satorra-Bentler corrections
if "$CFA1SBV"!="" {
* compute the corrected tests, too
* Satorra-Bentler 1994
tempname U trUG2 Tdf
mat `U' = `SBV' - `SBV'*`Delta'*syminv(`Delta''*`SBV'*`Delta')*`Delta''*`SBV'
ereturn matrix SBU = `U'
mat `U' = trace( e(SBU)*`Gamma' )
ereturn scalar SBc = `U'[1,1]/e(df_u)
ereturn scalar Tscaled = e(lr_u)/e(SBc)
ereturn scalar p_Tscaled = chi2tail( e(df_u), e(Tscaled) )
mat `trUG2' = trace( e(SBU)*`Gamma'*e(SBU)*`Gamma')
ereturn scalar SBd = `U'[1,1]*`U'[1,1]/`trUG2'[1,1]
ereturn scalar Tadj = ( e(SBd)/`U'[1,1]) * e(lr_u)
ereturn scalar p_Tadj = chi2tail( e(SBd), e(Tadj) )
* Yuan-Bentler 1997
* weights!
ereturn scalar T2 = e(lr_u)/(1+e(lr_u)/e(N) )
ereturn scalar p_T2 = chi2tail( e(df_u), e(T2) )
}
if "`posvar'" ~= "" {
ereturn scalar lr_zerov = 2*(`ll_unr' - e(ll))
ereturn scalar df_zerov = `nz'
local replay_opt posvar llu(`ll_unr')
}
ereturn local cmd cfa1
Replay , `replay_opt' level(`level')
Finish
restore
ereturn repost, esample(`touse')
end
program define Tryit
cap noi `0'
local rc=_rc
if `rc' {
Finish
exit `rc'
}
end
program define Finish
* finishing off
constraint drop $CFA1constr
global CFA1S
global CFA1N
global CFA1PV
global CFA1theta
global CFA1arg
global CFA1data
global CFA1constr
global CFA1vars
global CFA1SBV
end
program define Replay
syntax, [posvar llu(str) level(passthru)]
di _n as text "Log likelihood = " as res e(ll) _col(59) as text "Number of obs = " as res e(N)
di as text "{hline 13}{c TT}{hline 64}"
di as text " {c |} Coef. Std. Err. z P>|z| [$S_level% Conf. Interval]"
di as text "{hline 13}{c +}{hline 64}"
tempname vce
mat `vce' = e(V)
local q = colsof(`vce')
local q = (`q'-1)/3
local a : colfullnames(`vce')
tokenize `a'
di as text "Lambda{col 14}{c |}"
forvalues i = 1/`q' {
gettoken v`i' : `i' , parse(":")
_diparm `v`i'' , label("`v`i''") prob `level'
}
di as text "Var[error]{col 14}{c |}"
forvalues i = 1/`q' {
_diparm `v`i''_v , label("`v`i''") prob `level'
}
di as text "Means{col 14}{c |}"
forvalues i = 1/`q' {
_diparm `v`i''_m , label("`v`i''") prob `level'
}
di as text "Var[latent]{col 14}{c |}"
_diparm phi , label("phi1") prob
di as text "{hline 13}{c +}{hline 64}"
di as text "R2{col 14}{c |}"
forvalues i = 1/`q' {
di as text %12s "`v`i''" "{col 14}{c |}{col 20}" ///
as res %6.4f (_b[`v`i'':_cons]^2*_b[phi:_cons]) / ///
(_b[`v`i'':_cons]^2*_b[phi:_cons] + _b[`v`i''_v:_cons])
}
di as text "{hline 13}{c BT}{hline 64}"
if e(df_u)>0 {
di as text _n "Goodness of fit test: LR = " as res %6.3f e(lr_u) ///
as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > LR] = " as res %6.4f e(p_u)
}
else {
di as text "No degrees of freedom to perform the goodness of fit test"
}
di as text "Test vs independence: LR = " as res %6.3f e(lr_indep) ///
as text _col(40) "; Prob[chi2(" as res %2.0f e(df_indep) as text ") > LR] = " as res %6.4f e(p_indep)
if "`e(vce)'" == "SatorraBentler" & e(df_u)>0 {
* need to report all those corrected statistics
di as text _n "Satorra-Bentler Tbar" _col(26) "= " as res %6.3f e(Tscaled) ///
as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > Tbar] = " as res %6.4f e(p_Tscaled)
di as text "Satorra-Bentler Tbarbar" _col(26) "= " as res %6.3f e(Tadj) ///
as text _col(40) "; Prob[chi2(" as res %4.1f e(SBd) as text ") > Tbarbar] = " as res %6.4f e(p_Tadj)
di as text "Yuan-Bentler T2" _col(26) "= " as res %6.3f e(T2) ///
as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > T2] = " as res %6.4f e(p_T2)
}
if "`posvar'" ~= "" {
* just estimated?
if "`llu'" == "" {
di as err "cannot specify -posvar- option, need to refit the whole model"
}
else {
if e(df_zerov)>0 {
di as text "Likelihood ratio against negative variances: LR = " as res %6.3f e(lr_zerov)
di as text "Conservative Prob[chi2(" as res %2.0f e(df_zerov) as text ") > LR] = " ///
as res %6.4f chi2tail(e(df_zerov),e(lr_zerov))
}
else {
di as text "All variances are non-negative, no need to test against zero variances"
}
}
}
end
program define Parse , rclass
* takes the list of variables and returns the appropriate ml model statement
syntax varlist , [unitvar]
global CFA1arg
global CFA1theta
global CFA1vars
local q : word count `varlist'
* lambdas
forvalues i = 1/`q' {
local toml `toml' (``i'': ``i'' = )
local tostart `tostart' 1
global CFA1arg $CFA1arg g_``i''
global CFA1theta $CFA1theta l_`i'
global CFA1vars $CFA1vars ``i''
}
* variances
forvalues i = 1/`q' {
local toml `toml' (``i''_v: )
local tostart `tostart' 1
global CFA1arg $CFA1arg g_``i''_v
global CFA1theta $CFA1theta v_`i'
}
* means
forvalues i = 1/`q' {
local toml `toml' (``i''_m: )
qui sum ``i'', mean
local mean = r(mean)
local tostart `tostart' `mean'
global CFA1arg $CFA1arg g_``i''_m
global CFA1theta $CFA1theta m_`i'
}
* variance of the factor
local toml `toml' (phi: )
local tostart `tostart' 1
global CFA1arg $CFA1arg g_Phi
global CFA1theta $CFA1theta phi
* done!
return local toml `toml'
return local tostart `tostart'
end
**************************** Satorra-Bentler covariance matrix code
program SatorraBentler, rclass
version 9.1
syntax [, noisily]
* assume the maximization completed, the results are in memory as -ereturn data-
* we shall just return the resulting matrix
if "`e(normalized)'" == "" {
di as err "cannot compute Satorra-Bentler variance estimator with arbitrary identification... yet"
exit 198
}
* assume sample is restricted to e(sample)
* preserve
* keep if e(sample)
* get the variable names
tempname VV bb
mat `VV' = e(V)
local q = rowsof(`VV')
local p = (`q'-1)/3
local eqlist : coleq `VV'
tokenize `eqlist'
forvalues k=1/`p' {
local varlist `varlist' ``k''
}
* compute the implied covariance matrix
tempname Lambda Theta phi Sigma
mat `bb' = e(b)
mat `Lambda' = `bb'[1,1..`p']
mat `Theta' = `bb'[1,`p'+1..2*`p']
scalar `phi' = `bb'[1,`q']
mat `Sigma' = `Lambda''*`phi'*`Lambda' + diag(`Theta')
* compute the empirical cov matrix
tempname SampleCov
qui mat accum `SampleCov' = `varlist' , nocons dev
* weights!!!
mat `SampleCov' = `SampleCov' / (r(N)-1)
* compute the matrix Gamma
`noisily' di as text "Computing the Gamma matrix of fourth moments..."
tempname Gamma
SBGamma `varlist'
mat `Gamma' = r(Gamma)
return add
* compute the duplication matrix
* Dupl `p'
* let's call it from within SBV!
* compute the V matrix
`noisily' di as text "Computing the V matrix..."
SBV `SampleCov' `noisily'
tempname V
mat `V' = r(SBV)
return add
* compute the Delta matrix
`noisily' di as text "Computing the Delta matrix..."
tempname Delta
mata : SBDelta("`bb'","`Delta'")
*** put the pieces together now
tempname DeltaId
* enact the constraints!
SBconstr `bb'
mat `DeltaId' = `Delta' * diag( r(Fixed) )
* those should be in there, but it never hurts to fix!
if "`e(normalized)'" == "Latent Variance" {
* make the last column null
mat `DeltaId' = ( `DeltaId'[1...,1...3*`p'] , J(rowsof(`Delta'), 1, 0) )
}
else if "`e(normalized)'" ~= "" {
* normalization by first variable
local idvar `e(normalized)'
if "`idvar'" ~= "`1'" {
di as err "cannot figure out the identification variable"
exit 198
}
mat `DeltaId' = ( J(rowsof(`Delta'), 1, 0) , `DeltaId'[1...,2...] )
}
local dcnames : colfullnames `bb'
local drnames : rownames `Gamma'
mat colnames `DeltaId' = `dcnames'
mat rownames `DeltaId' = `drnames'
return matrix Delta = `DeltaId', copy
tempname VVV
mat `VVV' = ( `DeltaId'' * `V' * `DeltaId' )
mat `VVV' = syminv(`VVV')
mat `VVV' = `VVV' * ( `DeltaId'' * `V' * `Gamma' * `V' * `DeltaId' ) * `VVV'
* add the covariance matrix for the means, which is just Sigma/_N
* weights!
tempname CovM
mat `CovM' = ( J(2*`p',colsof(`bb'),0) \ J(`p',2*`p',0) , `Sigma', J(`p',1,0) \ J(1, colsof(`bb'), 0) )
mat `VVV' = (`VVV' + `CovM')/_N
return matrix SBVar = `VVV'
end
* of satorrabentler
program define SBGamma, rclass
syntax varlist
unab varlist : `varlist'
tokenize `varlist'
local p: word count `varlist'
forvalues k=1/`p' {
* make up the deviations
* weights!!!
qui sum ``k'', meanonly
tempvar d`k'
qui g double `d`k'' = ``k'' - r(mean)
local dlist `dlist' `d`k''
}
local pstar = `p'*(`p'+1)/2
forvalues k=1/`pstar' {
tempvar b`k'
qui g double `b`k'' = .
local blist `blist' `b`k''
}
* convert into vech (z_i-bar z)(z_i-bar z)'
mata : SBvechZZtoB("`dlist'","`blist'")
* blist now should contain the moments around the sample means
* we need to get their covariance matrix
tempname Gamma
qui mat accum `Gamma' = `blist', dev nocons
* weights!
mat `Gamma' = `Gamma'/(_N-1)
mata : Gamma = st_matrix( "`Gamma'" )
* make nice row and column names
forvalues i=1/`p' {
forvalues j=`i'/`p' {
local namelist `namelist' ``i''_X_``j''
}
}
mat colnames `Gamma' = `namelist'
mat rownames `Gamma' = `namelist'
return matrix Gamma = `Gamma'
end
* of computing Gamma
program define SBV, rclass
args A noisily
tempname D Ainv V
local p = rowsof(`A')
`noisily' di as text "Computing the duplication matrix..."
mata : Dupl(`p',"`D'")
mat `Ainv' = syminv(`A')
mat `V' = .5*`D''* (`Ainv' # `Ainv') * `D'
return matrix SBV = `V'
end
* of computing V
* need to figure out whether a constraint has the form parameter = value,
* and to nullify the corresponding column
program define SBconstr, rclass
args bb
tempname Iq
mat `Iq' = J(1,colsof(`bb'),1)
tokenize $CFA1constr
while "`1'" ~= "" {
constraint get `1'
local constr `r(contents)'
gettoken param value : constr, parse("=")
* is the RHS indeed a number?
local value = substr("`value'",2,.)
confirm number `value'
* parse the square brackets and turn them into colon
* replace the opening brackets with nothing, and closing brackets, with :
local param = subinstr("`param'","["," ",1)
local param = subinstr("`param'","]",":",1)
local param = trim("`param'")
local coln = colnumb(`bb',"`param'" )
mat `Iq'[1,`coln']=0
mac shift
}
return matrix Fixed = `Iq'
end
cap mata : mata drop SBvechZZtoB()
cap mata : mata drop Dupl()
cap mata : mata drop SBDelta()
mata:
void SBvechZZtoB(string dlist, string blist) {
// view the deviation variables
st_view(data=.,.,tokens(dlist))
// view the moment variables
// blist=st_local("blist")
st_view(moments=.,.,tokens(blist))
// vectorize!
for(i=1; i<=rows(data); i++) {
B = data[i,.]'*data[i,.]
moments[i,.] = vech(B)'
}
}
void Dupl(scalar p, string Dname) {
pstar = p*(p+1)/2
Ipstar = I(pstar)
D = J(p*p,0,.)
for(k=1;k<=pstar;k++) {
D = (D, vec(invvech(Ipstar[.,k])))
}
st_matrix(Dname,D)
}
void SBDelta(string bbname, string DeltaName) {
bb = st_matrix(bbname)
p = (cols(bb)-1)/3
Lambda = bb[1,1..p]
Theta = bb[1,p+1..2*p]
phi = bb[1,cols(bb)]
Delta = J(0,cols(bb),.)
for(i=1;i<=p;i++) {
for(j=i;j<=p;j++) {
DeltaRow = J(1,cols(Delta),0)
for(k=1;k<=p;k++) {
// derivative wrt lambda_k
DeltaRow[k] = (k==i)*Lambda[j]*phi + (j==k)*Lambda[i]*phi
// derivative wrt sigma^2_k
DeltaRow[p+k] = (i==k)*(j==k)
}
DeltaRow[cols(Delta)] = Lambda[i]*Lambda[j]
Delta = Delta \ DeltaRow
}
}
st_matrix(DeltaName,Delta)
}
end
* of mata piece
***************************** end of Satorra-Bentler covariance matrix code
exit
History:
v.1.0 -- May 19, 2004: basic operation, method d0
v.1.1 -- May 19, 2004: identification by -constraint-
common -cfa1_ll-
from()
v.1.2 -- May 21, 2004: method -lf-, robust
constraint free
v.1.3 -- unknown
v.1.4 -- Feb 15, 2005: pweights, arbitrary constraints
v.2.0 -- Feb 28, 2006: update to version 9 using Mata
v.2.1 -- Apr 11, 2006: whatever
v.2.2 -- Apr 13, 2006: Satorra-Bentler standard errors and test corrections
-vce- option
Apr 14, 2006: degrees of freedom corrected for # constraints
July 5, 2006: minor issue with -from(, copy)-

159
Modules/ado/plus/c/cfa1.hlp Normal file
View File

@ -0,0 +1,159 @@
{smcl}
{.-}
help for {cmd:cfa1} {right:author: {browse "http://stas.kolenikov.name/":Stas Kolenikov}}
{.-}
{title:Confirmatory factor analysis with a single factor}
{p 8 27}
{cmd:cfa1}
{it:varlist}
[{cmd:if} {it:exp}] [{cmd:in} {it:range}]
[{cmd:aw|pw =} {it:weight}]
[{cmd:,}
{cmd:unitvar}
{cmd:free}
{cmdab:pos:var}
{cmdab:constr:aint(}{it:numlist}{cmd:)}
{cmdab:lev:el(}{it:#}{cmd:)}
{cmdab:rob:ust}
{cmd:vce(robust|oim|opg|sbentler}{cmd:)}
{cmd:cluster(}{it:varname}{cmd:)}
{cmd:svy}
{cmdab:sea:rch(}{it:searchspec}{cmd:)}
{cmd:from(}{it:initspecs}{cmd:)}
{it:ml options}
]
{title:Description}
{p}{cmd:cfa1} estimates simple confirmatory factor
analysis model with a single factor. In this model,
each of the variables is assumed to be an indicator
of an underlying unobserved factor with a linear
dependence between them:
{center:{it:y_i = m_i + l_i xi + delta_i}}
{p}where {it:y_i} is the {it:i}-th variable
in the {it:varlist}, {it:m_i} is its mean,
{it:l_i} is the latent variable loading,
{it:xi} is the latent variable/factor,
and {it:delta_i} is the measurement error.
{p}The model is estimated by the maximum likelihood
procedure.
{p}As with all latent variable models, a number
of identifying assumptions need to be made about
the latent variable {it:xi}. It is assumed
to have mean zero, and its scale is determined
by the first variable in the {it:varlist}
(i.e., l_1 is set to equal 1). Alternatively,
identification can be achieved by setting the
variance of the latent variable to 1 (with option
{it:unitvar}). More sophisticated identification
conditions can be achieved by specifying option
{it:free} and then providing the necessary
{it:constraint}.
{title:Options}
{ul:Identification:}
{p 0 4}{cmd:unitvar} specifies identification by setting
the variance of the latent variable to 1.
{p 0 4}{cmd:free} requests to relax all identifying constraints.
In this case, the user is responsible for provision
of such constraints; otherwise, the estimation process
won't converge.
{p 0 4}{cmdab:pos:var} specifies that if one or more of the
measurement error variances were estimated to be
negative (known as Heywood cases), the model
needs to be automatically re-estimated by setting
those variances to zero. The likelihood ratio test
is then reported comparing the models with and without
constraints. If there is only one offending estimate,
the proper distribution to refer this likelihood
ratio to is a mixture of chi-squares; see
{help j_chibar:chi-bar test}. A conservative
test is provided by a reference to the chi-square
distribution with the largest degrees of freedom.
The p-value is then overstated.
{p 0 4}{cmdab:constr:aint(}{it:numlist}{cmd:)} can be used
to supply additional constraints. The degrees of freedom
of the model may be wrong, then.
{p 0 4}{cmdab:lev:el(}{it:#}{cmd:)} -- see
{help estimation_options##level():estimation options}
{ul:Standard error estimation:}
{p 0 4}{cmd:vce(oim|opg|robust|sbentler}
specifies the way to estimate the standard errors.
See {help vce_option}. {cmd:vce(sbentler)} is an
additional Satorra-Bentler estimator popular in
structural equation modeling literature that relaxes
the assumption of multivariate normality while
keeping the assumption of proper structural specification.
{p 0 4}{cmd:robust} is a synonum for {cmd:vce(robust)}.
{p 0 4}{cmd:cluster(}{it:varname}{cmd:)}
{p 0 4}{cmd:svy} instructs {cmd:cfa1} to respect the complex
survey design, if one is specified.
{ul:Maximization options: see {help maximize}}
{title:Returned values}
{p}Beside the standard {help estcom:estimation results}, {cmd:cfa1}
also performs the overall goodness of fit test with results
saved in {cmd:e(lr_u)}, {cmd:e(df_u)} and {cmd:e(p_u)}
for the test statistic, its goodness of fit, and the resulting
p-value. A test vs. the model with the independent data
is provided with the {help ereturn} results with {cmd:indep}
suffix. Here, under the null hypothesis,
the covariance matrix is assumed diagonal.
{p}When {cmd:sbentler} is specified, Satorra-Bentler
standard errors are computed and posted as {cmd:e(V)},
with intermediate matrices saved in {cmd:e(SBU)},
{cmd:e(SBV)}, {cmd:e(SBGamma)} and {cmd:e(SBDelta)}.
Also, a number of corrected overall fit test statistics
is reported and saved: T-scaled ({cmd:ereturn} results
with {cmd:Tscaled} suffix) and T-adjusted
({cmd:ereturn} resuls with {cmd:Tadj} suffix;
also, {cmd:e(SBc)} and {cmd:e(SBd)} are the
scaling constants, with the latter also
being the approximate degrees of freedom
of the chi-square test)
from Satorra and Bentler (1994), and T-double
bar from Yuan and Bentler (1997)
(with {cmd:T2} suffix).
{title:References}
{p 0 4}{bind:}Satorra, A. and Bentler, P. M. (1994)
Corrections to test statistics and standard errors in covariance structure analysis,
in: {it:Latent variables analysis}, SAGE.
{p 0 4}{bind:}
Yuan, K. H., and Bentler, P. M. (1997)
Mean and Covariance Structure Analysis: Theoretical and Practical Improvements.
{it:JASA}, {bf:92} (438), pp. 767--774.
{title:Also see}
{p 0 21}{bind:}Online: help for {help factor}
{title:Contact}
Stas Kolenikov, kolenikovs {it:at} missouri.edu

View File

@ -0,0 +1,69 @@
*! Log likelihood for cfa1: linear form; v.2.1
program define cfa1_lf
args lnf $CFA1theta
* $CFA1theta contains all the names needed:
* $CFA1theta == l_1 ... l_q v_1 ... v_q m_1 ... m_q phi
gettoken lnf allthenames : 0
tempvar lnl
qui g double `lnl' = .
nobreak mata: CFA1_NormalLKHDr( "`allthenames'", "$CFA1vars", "`lnl'")
qui replace `lnf' = `lnl'
end
*! NormalLKHDr: normal likelihood with normal deviates in variables
*! v.2.1 Stas Kolenikov skolenik@gmail.com
cap mata: mata drop CFA1_NormalLKHDr()
mata:
void CFA1_NormalLKHDr(
string parnames, // the parameter names
string varnames, // the variables
string loglkhd // where the stuff is to be returned
) {
// declarations
real matrix data, lnl, parms // views of the data
real matrix lambda, means, vars, phi // parameters
real matrix Sigma, WorkSigma, InvWorkSigma, SS // the covariance matrices and temp matrix
real scalar p, n // dimension, no. obs
// get the data in
st_view(data=., ., tokens(varnames) )
st_view(lnl=., ., tokens(loglkhd) )
st_view(parms=., 1, tokens(parnames) )
n=rows(data)
p=cols(data)
// get the parameters in
lambda= parms[1,1..p]
vars = parms[1,p+1..2*p]
means = parms[1,2*p+1..3*p]
phi = parms[1,3*p+1]
Sigma = lambda'*lambda*phi + diag(vars)
SS = cholesky(Sigma)
InvWorkSigma = solvelower(SS,I(rows(SS)))
InvWorkSigma = solveupper(SS',InvWorkSigma)
ldetWS = 2*ln(dettriangular(SS))
for( i=1; i<=n; i++ ) {
lnl[i,1] = -.5*(data[i,.]-means)*InvWorkSigma*(data[i,.]-means)' - .5*ldetWS - .5*p*ln(2*pi())
}
}
end
exit
History:
v.2.0 March 10, 2006 -- re-written for Stata 9 and Mata
v.2.1 March 10, 2006 -- everything is moved to Mata

View File

@ -0,0 +1,169 @@
*!Data management utility: check for existence of variables in a dataset.
*!Version 1.1 by Amadou B. DIALLO.
*!This version 1.2 . Updated by Amadou B. DIALLO and Jean-Benoit HARDOUIN.
*!Authors: Amadou Bassirou DIALLO (Poverty Division, World Bank) and Jean-Benoit HARDOUIN (Regional Health Observatory of Orl<72>ans).
program checkfor2 , rclass
version 8
syntax anything [if] [in] [, noList Tolerance(real 0) TAble noSUm GENMiss(namelist min=1 max=1) MISsing(string)]
marksample touse
tempname rat
local av
local unav
local manymissings
local avnum
quietly count if `touse'
local tot = r(N)
qui isvar `anything'
local badlist `r(badlist)'
local varlist `r(varlist)'
di _n
if "`table'"!="" {
if "`badlist'"!="" {
di _col(4) in green "{hline 39}"
di _col(4)in green "Unavailable variables: "
foreach i of local badlist {
di _col(4) in ye "`i'"
}
di _col(4) in green "{hline 39}"
di
}
di _col(4) in green "{hline 39}"
display _col(4) in gr "Existing" _col(15) in gr "Rate of"
display _col(4) in gr "Variable" _col(14) "missings" _col(26) "Type" _col(34) "Available"
di _col(4) in green "{hline 39}"
}
tokenize `varlist'
local nbvar : word count `varlist'
forvalues i=1/`nbvar' {
capture assert missing(``i'') if `touse'
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if !_rc {
if "`table'"=="" {
display in ye "``i''" in gr " is empty in the database." in ye " ``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye "100.00%" _col(26) "`ty'"
}
local manymissings `manymissings' ``i''
}
else {
if "`table'"=="" {
display in ye "``i''" in gr " exists and is not empty."
}
*Consider type
if "`tty'" == "str" {
qui count if (``i'' == ""|``i''=="`missing'") & `touse'
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
else {
local avnum `avnum' ``i''
capture confirm number `missing'
if _rc!=0 {
quietly count if ``i'' >= . & `touse'
}
else {
quietly count if (``i'' >= .|``i''==`missing') & `touse'
}
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
if "`table'"=="" {
display in ye "``i''" in gr " has " in ye r(N) in gr " missings."
display in gr "Ratio number of missings of" in ye " ``i''" in gr " to total number of observations: " in ye %6.2f `rat' "%"
}
if `rat' <= `tolerance' {
local av `av' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " is added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" in ye _col(15) %6.2f `rat' "%" _col(26) "`ty'" _col(34) "X"
}
}
else {
local manymissings `manymissings' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " has too many missings, compared to the tolerance level."
display in ye "``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye %6.2f `rat' "%" _col(26) "`ty'"
}
}
}
if "`table'"=="" {
di
}
}
if "`table'"!="" {
di _col(4) in green "{hline 39}"
}
return local available `av'
return local unavailable `badlist'
return local manymissings `manymissings'
if "`avnum'" ~= ""&"`sum'"=="" {
display _newline
display in ye _col(14) "Unweighted summary statistics for available variables:" _n
capture confirm number `missing'
if _rc!=0 {
summarize `avnum' if `touse'
}
else {
foreach i of local avnum {
summarize `i' if `touse'&`i'!=`missing'
}
}
}
if "`list'"== "" {
display _newline
display in ye _d(97) "_"
display _newline
if "`badlist'"~="" {
display in gr "Unavailable variables: " in ye _col(45) "`badlist'" _n
}
if "`av'"~="" {
display in gr "Available variables: " in ye _col(45) "`av'" _n
}
if "`manymissings'"~="" {
display in gr "Available variables but with too missings: " in ye _col(45) "`manymissings'" _n
}
display in ye _d(97) "_"
}
if "`genmiss'" !="" {
capture confirm variable `genmiss'
if _rc!=0 {
qui gen `genmiss' = 0
local nbav : word count `av'
tokenize `av'
forvalues i=1/`nbav' {
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if "`tty'" == "str" {
qui replace `genmiss'=`genmiss'+1 if ``i''=="."
}
else {
qui replace `genmiss'=`genmiss'+1 if ``i''>=.
}
}
}
else {
di in green "The variable" in ye " `genmiss' " in green "already exists".
}
}
end

View File

@ -0,0 +1,88 @@
{smcl}
{hline}
help for {cmd:checkfor2} {right:Amadou B. DIALLO}
{right:Jean-Benoit HARDOUIN}
{hline}
{title:Module to check whether a variable exists or not in a dataset.}
{p 4 8 2}{cmd:checkfor2} {it:anything} [{cmd:,}
{cmdab:t:olerance}({it:#}) {cmdab:ta:ble} {cmdab:nol:ist} {cmdab:nosu:m}
{cmdab:genm:iss}({it:newvarname}) {cmdab:mis:sing}({it:string})]
{title:Description}
{p 4 4 2}{cmd:checkfor2} is a data management routine to check for existence of variables
within a (usually big) data set.
{p 4 4 2}{cmd:checkfor2} searchs through the data whether each variable exists.
The variables are clustered between unavailable variables, available variables with
a little amount of missing values and available variables with too many missing values.
{p 4 4 2}{cmd:isvar} must be installed ({stata ssc install isvar:ssc install isvar}).
{title:Options}
{p 4 4 2}{it:anything} is composed of variable names or lists of variables,
{p 4 4 2}{cmd:tolerance} is the tolerance level (in percentage) to consider a variable as available, with default 0,
{p 4 4 2}{cmd:nolist} avoids displaying availability status at the end of the process,
{p 4 4 2}{cmd:nosum} avoids displaying summary statistics of available variables,
{p 4 4 2}{cmd:table} displays the results in a table (instead as text),
{p 4 4 2}{cmd:genmiss} creates a new variable containing the number of missing values among the available variables,
{p 4 4 2}{cmd:missing} defines a specific value or string considered as a missing value.
{title:Saved results}
{p 4 4 2} {cmd:r(unavailable)} names of unavailable variables.{p_end}
{p 4 4 2} {cmd:r(available)} names of available variables with a small amount of missing values.{p_end}
{p 4 4 2} {cmd:r(manymissings)} names of variables present but with too missings.{p_end}
{title:Examples}
{p 4 4 2}{cmd:. use mydata, clear }{p_end}
{p 4 4 2}{cmd:. checkfor2 x y z , mis(99) genmiss(countmiss) }{p_end}
{p 4 4 2}{cmd:. su `r(available)' }{p_end}
{p 4 4 2}{cmd:. tab countmiss }{p_end}
{p 4 4 2}{cmd:. u bigdataset in 1/100, clear // Big data set}{p_end}
{p 4 4 2}{cmd:. checkfor2 v1 v2 v3 xx yy , nosum tol(5) tab}{p_end}
{p 4 4 2}{cmd:. use `r(available)' using bigdataset, clear }{p_end}
{title:Remarks}
{p 4 4 2}{cmd:checkfor2} and its primary version ({cmd:checkfor}) have been primarily written for comparable surveys such as the Demography and
Health Surveys (DHS) or the Multiple Indicator Cluster Surveys (MICS). But this could easily applied
to any other survey.
{title:Authors}
{p 4 4 2}Amadou Bassirou DIALLO.
Poverty and Health Division, PREM, The World Bank.{p_end}
{p 4 4 2}Email: {browse "mailto:adiallo5@worldbank.org":adiallo5@worldbank.org}
{p 4 4 2}Jean-Benoit HARDOUIN.
Regional Health Observatory of Orl<72>ans, France.{p_end}
{p 4 4 2}Email: {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
{title:Aknowledgements}
{p 4 4 2}We would like to thank Christophe Rockmore and also Nick Cox
and Kit Baum for their comments.
{title:Also see}
{p 4 13 2}Online: help for {help checkfor}, {help isvar}, {help nmissing}, {help npresent}, {help missing} and {help dropmiss} if installed.{p_end}

View File

@ -0,0 +1,140 @@
program def choplist, rclass
*! NJC 1.4.0 13 December 2000
* NJC 1.3.0 29 June 2000
* NJC 1.2.0 7 June 2000
* NJC 1.1.0 22 Dec 1999
* NJC 1.0.0 20 Dec 1999 after discussion with Kit Baum
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , [ Pos(str) Value(str asis) Length(str) Char(int 0) /*
*/ Noisily Global(str) ]
if "`global'" != "" {
tokenize `global'
args global1 global2 global3
if "`global3'" != "" {
di in r "global( ) must contain at most 2 names"
exit 198
}
if (length("`global1'") > 8) | (length("`global2'") > 8) {
di in r "global name must be <=8 characters"
exit 198
}
}
local nopts = /*
*/ ("`pos'" != "") + (`"`value'"' != "") + /*
*/ ("`length'" != "") + (`char' != 0)
if `nopts' != 1 {
di in r "must specify pos( ), value( ), length( ) or char( )"
exit 198
}
* as string <= contains quote
local asstr = index(`"`value'"', `"""')
tokenize `list'
local n : word count `list'
if "`length'" != "" | `char' != 0 {
local i = 1
while `i' <= `n' {
local len = length("``i''")
if `len' > 80 {
di in r "cannot handle word length > 80"
exit 498
}
local i = `i' + 1
}
}
local i = 1
if "`pos'" != "" {
local negsign = index("`pos'", "-")
if `negsign' {
local pos1 = substr("`pos'",1,`negsign' - 1)
local pos2 = substr("`pos'",`negsign', .)
local pos2 = `n' + 1 + `pos2'
local pos "`pos1'`pos2'"
capture confirm integer number `pos'
if _rc == 0 { local pos ">= `pos'" }
}
else {
capture confirm integer number `pos'
if _rc == 0 { local pos "<= `pos'" }
}
while `i' <= `n' {
if `i' `pos' { local list1 "`list1' ``i''" }
else local list2 "`list2' ``i''"
local i = `i' + 1
}
}
else if "`value'" != "" {
capture confirm number `value'
if _rc == 0 { local value "<= `value'" }
if `asstr' {
while `i' <= `n' {
if "``i''" `value' {
local list1 `"`list1' ``i''"'
}
else local list2 `"`list2' ``i''"'
local i = `i' + 1
}
}
else {
while `i' <= `n' {
if ``i'' `value' {
local list1 "`list1' ``i''"
}
else local list2 "`list2' ``i''"
local i = `i' + 1
}
}
}
else if "`length'" != "" {
capture confirm number `length'
if _rc == 0 { local length "<= `length'" }
while `i' <= `n' {
if length("``i''") `length' {
local list1 "`list1' ``i''"
}
else local list2 "`list2' ``i''"
local i = `i' + 1
}
}
else {
if `char' >= 0 {
while `i' <= `n' {
local one = substr("``i''",1,`char')
local two = substr("``i''",`char'+1,.)
local list1 "`list1' `one'"
local list2 "`list2' `two'"
local i = `i' + 1
}
}
else if `char' < 0 {
while `i' <= `n' {
local one = substr("``i''",`char',.)
local ltwo = length("``i''") + `char'
local two = substr("``i''",1,`ltwo')
local list1 "`list1' `one'"
local list2 "`list2' `two'"
local i = `i' + 1
}
}
}
if "`noisily'" != "" {
di in g "list 1: " in y `"`list1'"'
di in g "list 2: " in y `"`list2'"'
}
if "`global1'" != "" { global `global1' `"`list1'"' }
if "`global2'" != "" { global `global2' `"`list2'"' }
return local list1 `"`list1'"'
return local list2 `"`list2'"'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,66 @@
*! version 1.0.4 PR 30sep2005
* Based on private version 6.1.4 of frac_chk, PR 25aug2004
program define cmdchk, sclass
version 7
local cmd `1'
mac shift
local cmds `*'
sret clear
if substr("`cmd'",1,3)=="reg" {
local cmd regress
}
if "`cmds'"=="" {
tokenize clogit cnreg cox ereg fit glm logistic logit poisson probit /*
*/ qreg regress rreg weibull xtgee streg stcox stpm stpmrs /*
*/ ologit oprobit mlogit nbreg
}
else tokenize `cmds'
sret local bad 0
local done 0
while "`1'"!="" & !`done' {
if "`1'"=="`cmd'" {
local done 1
}
mac shift
}
if !`done' {
sret local bad 1
*exit
}
/*
dist=0 (normal), 1 (binomial), 2 (poisson), 3 (cox), 4 (glm),
5 (xtgee), 6 (ereg/weibull), 7 (stcox, streg, stpm, stpmrs).
*/
if "`cmd'"=="logit" | "`cmd'"=="probit" /*
*/ |"`cmd'"=="clogit"| "`cmd'"=="logistic" /*
*/ |"`cmd'"=="mlogit"| "`cmd'"=="ologit" | "`cmd'"=="oprobit" {
sret local dist 1
}
else if "`cmd'"=="poisson" {
sret local dist 2
}
else if "`cmd'"=="cox" {
sret local dist 3
}
else if "`cmd'"=="glm" {
sret local dist 4
}
else if "`cmd'"=="xtgee" {
sret local dist 5
}
else if "`cmd'"=="cnreg" | "`cmd'"=="ereg" | "`cmd'"=="weibull" | "`cmd'"=="nbreg" {
sret local dist 6
}
else if "`cmd'"=="stcox" | "`cmd'"=="streg" | "`cmd'"=="stpm" | "`cmd'"=="stpmrs" {
sret local dist 7
}
else if substr("`cmd'",1,2)=="st" {
sret local dist 7
}
else sret local dist 0
sret local isglm = (`s(dist)'==4)
sret local isqreg = ("`cmd'"=="qreg")
sret local isxtgee= (`s(dist)'==5)
sret local isnorm = ("`cmd'"=="regress"|"`cmd'"=="fit"|"`cmd'"=="rreg")
end

View File

@ -0,0 +1,33 @@
program def collist
*! NJC 1.0.0 22 Dec 1999
version 6
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax [ , Format(str) NUmber ]
tokenize `list'
if "`number'" != "" {
local n : word count `list'
local ndigits = length("`n'")
local number = 0
}
* asstr >0 string format
* asstr 0 numeric format
* asstr -1 default
local asstr = cond("`format'" != "",index("`format'", "s"), -1)
while "`1'" != "" {
if "`number'" != "" {
local number = `number' + 1
local num : di %`ndigits'.0f `number' ". "
}
if `asstr' { di in g "`num'" `format' in y `"`1'"' }
else di in g "`num'" `format' in y `1'
mac shift
}
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,433 @@
*! 3.0.7 9aug2007 TJS & NJC fixed Stata 10 bug by adding missing -version-
* 3.0.6 14may2006 TJS & NJC fixed Stata 9 bug with -by()-
// 3.0.5 22aug2005 Stata 8 - fixed legend(off)
// 3.0.4 27jan2005 TJS & NJC rewrite for Stata 8 + more for graphs
// 3.0.3 6oct2004 TJS & NJC rewrite for Stata 8
// 3.0.2 6oct2004 TJS & NJC rewrite for Stata 8
// 3.0.1 4oct2004 TJS & NJC rewrite for Stata 8 + fixes + noref
// 3.0.0 27jul2004 TJS & NJC rewrite for Stata 8
// 3.0.0 19jul2004 TJS & NJC rewrite for Stata 8
// 2.2.9 14jan2003 TJS & NJC handling of [ ] within connect()
// 2.2.8 2jan2003 TJS & NJC handling of [ ] within symbol()
// 2.2.7 30jan2002 TJS & NJC rho_se corrected (SJ2-2: st0015)
// 2.2.6 10dec2001 TJS & NJC bug fixes (labels, diag line)
// 2.2.5 23apr2001 TJS & NJC sortpreserve
// 2.2.4 24jan2001 TJS & NJC l1title for loa
// 2.2.3 8sep2000 TJS & NJC bug fixes & mods (STB-58: sg84.3)
// 2.2.0 16dec1999 TJS & NJC version 6 changes (STB-54: sg84.2)
// 2.1.6 18jun1998 TJS & NJC STB-45 sg84.1
// 2.0.2 6mar1998 TJS & NJC STB-43 sg84
//
// syntax: concord vary varx [fw] [if] [in] [,
// BY(varname) Summary LEvel(real `c(level)')
// CCC(str) LOA(str) QNORMD(str) ]
program concord, rclass sortpreserve
// Syntax help
if "`1'" == "" {
di "{p}{txt}Syntax is: {inp:concord} " ///
"{it:vary varx} [{it:weight}] " ///
"[{inp:if} {it:exp}] [{inp:in} {it:range}] " ///
"[, {inp:by(}{it:byvar}{inp:)} " ///
"{inp:summary level(}{it:#}{inp:)} " ///
"{inp:ccc}[{inp:(noref} {it:ccc_options}{inp:)}] " ///
"{inp:loa}[{inp:(noref regline} {it:loa_options}{inp:)}] " ///
"{inp:qnormd}[{inp:(}{it:qnormd_options}{inp:)}] {p_end}"
exit 0
}
// Setup
version 8
syntax varlist(numeric min=2 max=2) ///
[fw] ///
[if] [in] ///
[ , BY(varname) ///
Summary ///
LEvel(real `c(level)') ///
ccc(str asis) ///
CCC2 ///
loa(str asis) ///
LOA2 ///
qnormd(str asis) ///
QNORMD2 * ]
marksample touse
qui count if `touse'
if r(N) == 0 error 2000
tokenize `varlist'
// Set temporary names
tempvar d d2 db dll dul m byg kk bylabel
tempname dsd zv k xb yb sx2 sy2 r rp sxy p u sep z zp
tempname t set ll ul llt ult rdm Fdm zt ztp sd1 sd2 sl
// Set up wgt
if "`weight'" != "" local wgt "[`weight'`exp']"
// Generate CI z-value and label from Level()
if `level' < 1 local level = `level' * 100
scalar `zv' = -1 * invnorm((1 - `level' / 100) / 2)
local rl = `level'
local level : di %7.0g `level'
local level = ltrim("`level'")
// Generate BY groups
qui {
bysort `touse' `by' : gen byte `byg' = _n == 1 if `touse'
if "`by'" != "" gen `kk' = _n if `byg' == 1
replace `byg' = sum(`byg')
local byn = `byg'[_N]
// Generate `by' labels -- if required
if "`by'" != "" {
capture decode `by', gen(`bylabel')
if _rc != 0 {
local type : type `by'
gen `type' `bylabel' = `by'
}
}
}
// Print title
di
di as txt "Concordance correlation coefficient (Lin, 1989, 2000):"
// Do calculations
forval j = 1/`byn' { /* start of loop for each `by' group */
di
if "`by'" != "" {
sort `kk'
di as txt "{hline}"
di as txt "-> `by' = " `bylabel'[`j'] _n
local byl : di "`by' = " `bylabel'[`j']
}
// LOA (Bland & Altman) calculations
qui {
gen `d' = `1' - `2'
gen `d2' = `d'^2
su `d' if `byg' == `j' `wgt'
gen `db' = r(mean)
scalar `dsd' = r(sd)
gen `dll' = `db' - `zv' * `dsd'
gen `dul' = `db' + `zv' * `dsd'
gen `m' = (`1' + `2') / 2
}
// Concordance calculations
qui su `1' if `byg' == `j' `wgt'
scalar `k' = r(N)
scalar `yb' = r(mean)
scalar `sy2' = r(Var) * (`k' - 1) / `k'
scalar `sd1' = r(sd)
qui su `2' if `byg' == `j' `wgt'
scalar `xb' = r(mean)
scalar `sx2' = r(Var) * (`k' - 1) / `k'
scalar `sd2' = r(sd)
qui corr `1' `2' if `byg' == `j' `wgt'
scalar `r' = r(rho)
scalar `sl' = sign(`r') * `sd1' / `sd2'
scalar `rp' = min(tprob(r(N) - 2, r(rho) * sqrt(r(N) - 2) ///
/ sqrt(1 - r(rho)^2)) ,1)
scalar `sxy' = `r' * sqrt(`sx2' * `sy2')
scalar `p' = 2 * `sxy' / (`sx2' + `sy2' + (`yb' - `xb')^2)
scalar `u' = (`yb' - `xb') / (`sx2' * `sy2')^.25
// --- variance, test, and CI for asymptotic normal approximation
// scalar `sep' = sqrt(((1 - ((`r')^2)) * (`p')^2 * (1 -
// ((`p')^2)) / (`r')^2 + (4 * (`p')^3 * (1 - `p') * (`u')^2
// / `r') - 2 * (`p')^4 * (`u')^4 / (`r')^2 ) / (`k' - 2))
// Corrected se: per Lin (March 2000) Biometrics 56:325-5.
#delimit ;
scalar `sep' = sqrt(((1 - ((`r')^2)) * (`p')^2 * (1 -
((`p')^2)) / (`r')^2 + (2 * (`p')^3 * (1 - `p') * (`u')^2
/ `r') - .5 * (`p')^4 * (`u')^4 / (`r')^2 ) / (`k' - 2));
#delimit cr
scalar `z' = `p' / `sep'
scalar `zp' = 2 * (1 - normprob(abs(`z')))
scalar `ll' = `p' - `zv' * `sep'
scalar `ul' = `p' + `zv' * `sep'
// --- statistic, variance, test, and CI for inverse hyperbolic
// tangent transform to improve asymptotic normality
scalar `t' = ln((1 + `p') / (1 - `p')) / 2
scalar `set' = `sep' / (1 - ((`p')^2))
scalar `zt' = `t' / `set'
scalar `ztp' = 2 * (1 - normprob(abs(`zt')))
scalar `llt' = `t' - `zv' * `set'
scalar `ult' = `t' + `zv' * `set'
scalar `llt' = (exp(2 * `llt') - 1) / (exp(2 * `llt') + 1)
scalar `ult' = (exp(2 * `ult') - 1) / (exp(2 * `ult') + 1)
// Print output
di as txt " rho_c SE(rho_c) Obs [" _c
if index("`level'",".") {
di as txt %6.1f `level' "% CI ] P CI type"
}
else di as txt " `level'% CI ] P CI type"
di as txt "{hline 63}"
di as res %6.3f `p' %10.3f `sep' %8.0f `k' %10.3f `ll' _c
di as res %7.3f `ul' %9.3f `zp' as txt " asymptotic"
di as res _dup(24) " " %10.3f `llt' %7.3f `ult' %9.3f `ztp' _c
di as txt " z-transform"
di _n as txt "Pearson's r =" as res %7.3f `r' _c
di as txt " Pr(r = 0) =" as res %6.3f `rp' _c
di as txt " C_b = rho_c/r =" as res %7.3f `p' / `r'
di as txt "Reduced major axis: Slope = " as res %9.3f `sl' _c
di as txt " Intercept = " as res %9.3f `yb'-`xb'*`sl'
di _n as txt "Difference = `1' - `2'"
di _n as txt " Difference" _c
if index("`level'", ".") {
di _col(33) as txt %6.1f `level' "% Limits Of Agreement"
}
else di _col(33) as txt " `level'% Limits Of Agreement"
di as txt " Average Std Dev. (Bland & Altman, 1986)"
di as txt "{hline 63}"
di as res %10.3f `db' %12.3f `dsd' _c
di as res " " %11.3f `dll' %11.3f `dul'
qui corr `d' `m' if `byg' == `j' `wgt'
scalar `rdm' = r(rho)
di _n as txt "Correlation between difference and mean =" _c
local fmt = cond(r(rho) < 0, "%7.3f", "%6.3f")
di as res `fmt' r(rho)
su `d2' if `byg' == `j' `wgt', meanonly
local sumd2 = r(sum)
qui reg `d' `m' if `byg' == `j' `wgt'
scalar `Fdm' = ((`sumd2' - e(rss)) / 2) / (e(rss) / e(df_r))
di _n as txt "Bradley-Blackwood F = " ///
as res %4.3f `Fdm' ///
as txt " (P = " %6.5f ///
as res 1 - F(2, e(df_r), `Fdm') ///
as txt ")"
if "`summary'" != "" su `1' `2' if `byg' == `j' `wgt'
// setup local options for passing to graph routines
if "`byl'" != "" local byls byl("`byl'")
if "`level'" != "" local levs level(`level')
// set more if needed
if (`"`loa'`loa2'"' != "" & `"`qnormd'`qnormd2'"' != "") | ///
(`"`loa'`loa2'"' != "" & `"`ccc'`ccc2'"' != "") | ///
(`"`ccc'`ccc2'"' != "" & `"`qnormd'`qnormd2'"' != "") {
local moreflag "more"
}
// loa graph
if `"`loa'`loa2'"' != "" {
gphloa `2' `1' `dll' `db' `dul' `d' `m' `byg' ///
`wgt', j(`j') byn(`byn') `byls' `levs' `loa' `options'
`moreflag'
}
// qnormd graph
if `"`qnormd'`qnormd2'"' != "" {
gphqnormd `2' `1' `d' `byg' ///
`wgt', j(`j') byn(`byn') `byls' `levs' `qnormd' `options'
`moreflag'
}
// ccc graph
if `"`ccc'`ccc2'"' != "" {
local sll = `sl'
local xbl = `xb'
local ybl = `yb'
gphccc `1' `2' `byg' `wgt', j(`j') ///
xb(`xbl') yb(`ybl') sl(`sll') byn(`byn') `byls' `ccc' `options'
}
if `byn' > 1 {
capture drop `d'
capture drop `d2'
capture drop `db'
capture drop `dll'
capture drop `dul'
capture drop `m'
}
} /* end of loop for each `by' group */
// save globals
if `byn' == 1 {
return scalar N = `k'
return scalar rho_c = `p'
return scalar se_rho_c = `sep'
return scalar asym_ll = `ll'
return scalar asym_ul = `ul'
return scalar z_tr_ll = `llt'
return scalar z_tr_ul = `ult'
return scalar C_b = `p' / `r'
return scalar diff = `db'
return scalar sd_diff = `dsd'
return scalar LOA_ll = `dll'
return scalar LOA_ul = `dul'
return scalar rdm = `rdm'
return scalar Fdm = `Fdm'
// double save globals
// now undocumented as of 3.0.0
global S_1 = `k'
global S_2 = `p'
global S_3 = `sep'
global S_4 = `ll'
global S_5 = `ul'
global S_6 = `llt'
global S_7 = `ult'
global S_8 = `p' / `r'
global S_9 = `db'
global S_10 = `dsd'
global S_11 = `dll'
global S_12 = `dul'
}
end
program gphloa
// loa graph
version 8
syntax varlist(numeric min=2) [fw] ///
[ , J(int 1) BYN(int 1) BYL(str) REGline LEvel(real `c(level)') ///
plot(str asis) noREF * ]
tokenize `varlist'
args two one dll db dul d m byg
if "`weight'" != "" local wgt "[`weight'`exp']"
if `"`byl'"' != "" local t2title `"t2title(`byl')"'
local name2 : variable label `2'
local name1 : variable label `1'
local lnth = length(`"`name2'"') + length(`"`name1'"')
if `"`name2'"' == `""' | `lnth' > 50 local name2 "`2'"
if `"`name1'"' == `""' | `lnth' > 50 local name1 "`1'"
qui if "`regline'" != "" {
tempvar fit
regress `d' `m' if `byg' == `j' `wgt'
predict `fit'
}
if "`ref'" == "" {
local ord 2 3
if "`regline'" != "" local ord 2 3 4
local zero yli(0, lstyle(refline)) yscale(range(0)) ylabel(0, add) ///
legend(on order(`ord') label(2 observed average agreement) ///
label(3 `"`level'% limits of agreement"') label(4 regression line)) ///
caption("y=0 is line of perfect average agreement")
}
graph twoway line `dll' `db' `dul' `fit' `m' if `byg' == `j', ///
clcolor(red purple red green) sort ///
|| scatter `d' `m' if `byg' == `j' ///
, ms(oh) `t2title' ///
yti(`"Difference of `name2' and `name1'"') ///
xti(`"Mean of `name2' and `name1'"') ///
caption("`level'% Limits Of Agreement") legend(off) `zero' ///
`options' ///
|| `plot'
if `byn' > 1 more
end
program gphqnormd, sort
// normal prob plot
// note: logic pilfered from qnorm
version 8
syntax varlist(numeric min=2) [fw] ///
[ , J(int 1) BYN(int 1) BYL(str) LEvel(real `c(level)') ///
plot(str asis) * ]
args two one d byg
if "`weight'" != "" local wgt "[`weight'`exp']"
else local exp 1
local name2 : variable label `2'
local name1 : variable label `1'
local lnth = length(`"`name2'"') + length(`"`name1'"')
if `"`name2'"' == `""' | `lnth' > 50 local name2 "`2'"
if `"`name1'"' == `""' | `lnth' > 50 local name1 "`1'"
tempvar Z Psubi touse2
mark `touse2' if `byg' == `j'
qui {
gsort -`touse2' `d'
gen `Psubi' = sum(`touse2' * `exp')
replace `Psubi' = cond(`touse2' == 0, ., `Psubi'/(`Psubi'[_N] + 1))
su `d' if `touse2' == 1 `wgt'
gen float `Z' = invnorm(`Psubi') * r(sd) + r(mean)
label var `Z' "Inverse Normal"
local xttl : var label `Z'
local yttl `"Difference of `name2' and `name1'"'
}
if `"`byl'"' != "" local t2title `"t2title(`byl')"'
graph twoway ///
(scatter `d' `Z', ///
sort ///
ytitle(`"`yttl'"') ///
xtitle(`"`xttl'"') ///
`t2title' ///
`options' ///
) ///
(function y=x, ///
range(`Z') ///
n(2) ///
clstyle(refline) ///
yvarlabel("Reference") ///
yvarformat(`fmt') ///
) ///
, legend(off) ///
|| `plot'
if `byn' > 1 more
end
program gphccc
version 8
//-----------------------------------------------------
// ccc graph
// ----------------------------------------------------
syntax varlist(numeric min=2) [fw] [ , J(int 1) XB(real 0) noREF ///
YB(real 0) SL(real 0) BYN(int 1) BYL(str) plot(str asis) LEGEND(str) * ]
tokenize `varlist'
tempvar byg rmaxis
local byg `3'
if "`weight'" != "" local wgt "[`weight'`exp']"
local yttl : variable label `1'
if `"`yttl'"' == "" local yttl "`1'"
local xttl : variable label `2'
if `"`xttl'"' == "" local xttl "`2'"
if "`ref'" == "" local lopc || function y = x, ra(`2') clstyle(refline) ///
legend(on order(2 "reduced major axis" 3 "line of perfect concordance"))
if "`legend'" != "" {
local legnd "legend(`legend')"
}
// Graph concordance plot
qui gen `rmaxis' = `sl' * (`2' - `xb') + `yb'
graph twoway scatter `1' `rmaxis' `2' ///
if `byg' == `j' `wgt', ///
sort connect(none line) ms(oh none) ///
yti(`"`yttl'"') xti(`"`xttl'"') legend(off) `lopc' ///
`options' ///
|| `plot'
if `byn' > 1 more
end

View File

@ -0,0 +1,183 @@
/* Dialog (Version 3.0.1) by: T. J. Steichen, steichen@triad.rr.com
concord VERSION 3.0.3 6oct2004
syntax: concord vary varx [fw] [if] [in] [,
BY(varname) Summary LEvel(real 95) ccc[(noREF ccc_graph_options)]
loa[(noREF REGline loa_graph_options)] qnormd[(qnormd_graph_options}] ]
Install in User Statistics menu via:
. window menu append item "stUserStatistics" "&concord (Concordance correlation coefficient)" "db concord3"
. window menu refresh
To permanently install, place the commands in your -profile.do- file.
*/
VERSION 8.0
INCLUDE _std_small
INCLUDE header
HELP hlp1, view("help concord")
RESET res1
DIALOG main, label("concord - Concordance correlation coefficient") tabtitle("Main")
BEGIN
TEXT tx_yvar _lft _top 150 ., ///
label("Y Variable:")
VARNAME vn_yvar @ _ss @ ., ///
label("Y Variable")
TEXT tx_xvar 180 _top 150 ., ///
label("X Variable:")
VARNAME vn_xvar @ _ss @ ., ///
label("X Variable")
CHECKBOX cb_by 10 70 50 ., ///
label("By:") ///
onclickon(main.vn_by.enable) ///
onclickoff(main.vn_by.disable)
VARNAME vn_by 60 70 270 ., ///
label("By Variable") ///
option("by")
CHECKBOX cb_summary 10 100 300 ., ///
label("Show Summary") ///
option("summary")
CHECKBOX cb_level 10 130 50 ., ///
label("Level:") ///
onclickon(main.ed_level.enable) ///
onclickoff(main.ed_level.disable)
EDIT ed_level 60 @ 40 ., ///
label("Level") ///
numonly default(global S_level) ///
option("level")
END
INCLUDE ifin
INCLUDE weights_f
DIALOG graph, tabtitle("Graph")
BEGIN
CHECKBOX cb_ccc 20 10 150 ., ///
label("Concordance") ///
onclickon(script ccc_on) ///
onclickoff(script ccc_off) ///
option("ccc")
CHECKBOX cb_ccc_ref 40 30 140 ., ///
label("No CCC Reference Line")
CHECKBOX cb_ccc_opt 40 50 90 ., ///
label("CCC Options:") ///
onclickon(graph.ed_ccc_opt.enable) ///
onclickoff(graph.ed_ccc_opt.disable)
EDIT ed_ccc_opt 130 50 200 ., ///
label("CCC Opts")
CHECKBOX cb_loa 20 70 150 ., ///
label("Limits of Agreement") ///
onclickon(script loa_on) ///
onclickoff(script loa_off) ///
option("loa")
CHECKBOX cb_loa_ref 40 90 140 ., ///
label("No LOA Reference Line")
CHECKBOX cb_loa_reg 40 110 140 ., ///
label("Regression Line")
CHECKBOX cb_loa_opt 40 130 90 ., ///
label("LOA Options:") ///
onclickon(graph.ed_loa_opt.enable) ///
onclickoff(graph.ed_loa_opt.disable)
EDIT ed_loa_opt 130 130 200 ., ///
label("LOA Opts")
CHECKBOX cb_qnormd 20 150 150 ., ///
label("Differences Normal plot") ///
onclickon(graph.cb_qnormd_opt.enable) ///
onclickoff(graph.cb_qnormd_opt.disable) ///
option("qnormd")
CHECKBOX cb_qnormd_opt 40 170 90 ., ///
label("QND Options:") ///
onclickon(graph.ed_qnormd_opt.enable) ///
onclickoff(graph.ed_qnormd_opt.disable)
EDIT ed_qnormd_opt 130 170 200 ., ///
label("QND Opts")
END
SCRIPT ccc_on
BEGIN
graph.cb_ccc_ref.enable
graph.cb_ccc_opt.enable
END
SCRIPT ccc_off
BEGIN
graph.cb_ccc_ref.disable
graph.cb_ccc_opt.disable
END
SCRIPT loa_on
BEGIN
graph.cb_loa_ref.enable
graph.cb_loa_reg.enable
graph.cb_loa_opt.enable
END
SCRIPT loa_off
BEGIN
graph.cb_loa_ref.disable
graph.cb_loa_reg.disable
graph.cb_loa_opt.disable
END
PROGRAM command
BEGIN
put "concord "
varlist main.vn_yvar main.vn_xvar
INCLUDE _weights_pr
INCLUDE _ifin_pr
beginoptions
optionarg main.vn_by
option main.cb_summary
optionarg main.ed_level
if graph.cb_ccc {
put "ccc"
if graph.cb_ccc_ref | graph.cb_ccc_opt {
put "("
}
if graph.cb_ccc_ref {
put "noref "
}
if graph.cb_ccc_opt {
put graph.ed_ccc_opt
}
if graph.cb_ccc_ref | graph.cb_ccc_opt {
put ") "
}
}
if graph.cb_loa {
put "loa"
if graph.cb_loa_ref | graph.cb_loa_opt | graph.cb_loa_reg {
put "("
}
if graph.cb_loa_ref {
put "noref "
}
if graph.cb_loa_reg {
put "regline "
}
if graph.cb_loa_opt {
put graph.ed_loa_opt
}
if graph.cb_loa_ref | graph.cb_loa_opt | graph.cb_loa_reg {
put ") "
}
}
if graph.cb_qnormd {
put "qnormd"
if graph.cb_qnormd_opt {
put "("
put graph.ed_qnormd_opt
put ") "
}
}
endoptions
END

View File

@ -0,0 +1,275 @@
{smcl}
{* 6oct2004/23aug2005/14may2006/9aug2007}
{hline}
{hi:help concord} {right:(SJ7-3: st0015_4)}
{hline}
{title:Title}
{p2colset 5 16 18 2}{...}
{p2col:{hi:concord} {hline 2}}Concordance correlation coefficient and associated measures, tests, and graphs{p_end}
{p2colreset}{...}
{title:Syntax}
{p 8 17 2}
{cmd:concord}
{it:vary} {it:varx}
{ifin}
{weight}
[{cmd:,}
{cmd:by(}{it:byvar}{cmd:)}
{cmdab:s:ummary}
{cmdab:le:vel(}{it:#}{cmd:)}
{cmd:ccc}[{cmd:(noref} {it:ccc_options}{cmd:)}]
{cmd:loa}[{cmd:(noref} {cmdab:reg:line} {it:loa_options}{cmd:)}]
{cmd:qnormd}[{cmd:(}{it:qnormd_options}{cmd:)}]]
{title:Description}
{p 4 4 2}
{cmd:concord} computes Lin's (1989, 2000) concordance correlation coefficient
for agreement on a continuous measure obtained by two persons or methods. (The
measure was introduced earlier by Krippendorff (1970).) The concordance
correlation coefficient combines measures of both precision and accuracy to
determine how far the observed data deviate from the line of perfect
concordance (i.e., the line at 45 degrees on a square scatter plot). Lin's
coefficient increases in value as a function of the nearness of the data's
reduced major axis to the line of perfect concordance (the accuracy of the
data) and of the tightness of the data about its reduced major axis (the
precision of the data). The Pearson correlation coefficient, r, the
bias-correction factor, C_b, and the equation of the reduced major axis are
reported to show these components. Note that the concordance correlation
coefficient, rho_c, can be expressed as the product of r, the measure of
precision, and C_b, the measure of accuracy.
{p 4 4 2}
{cmd:concord} also provides results for Bland and Altman's
limits-of-agreement, "loa", procedure (1986). The loa, a data-scale assessment
of the degree of agreement, is a complementary approach to the
relationship-scale approach of Lin.
{p 4 4 2}
Finally, two other results are reported:
{p 8 8 2}
1. The correlation between difference and mean. In one interpretation
this is a test statistic for a null hypothesis of equal variances given
bivariate normality (Pitman 1939; also see Snedecor and Cochran 1989, 192-193).
Alternatively, it is an exploratory diagnostic.
A value near zero implies concordance.
{p 8 8 2}
2. An F test of equality of means and variances. Note that this too
assumes bivariate normality (Bradley and Blackwood 1989).
See also Hsu (1940) and Reynolds and Gregoire (1991).
Nonsignificance implies concordance.
{p 4 4 2}
The user provides the pairs of measurements for a single property as
observations in variables {it:vary} and {it:varx}. Frequency weights may be
specified and used. Missing values (if any) are deleted in a casewise manner.
{p 4 4 2}
Various associated graphs may be obtained through options.
See below for explanations of the options {cmd:ccc}, {cmd:loa}, and
{cmd:qnormd}.
{title:Options}
{p 4 8 2}
{cmd:by(}{it:byvar}{cmd:)} produces separate results for groups of
observations defined by {it:byvar}.
{p 4 8 2}
{cmd:summary} requests summary statistics.
{p 4 8 2}
{cmd:level(}{it:#}{cmd:)} sets the confidence level % for the CI; default is
{cmd:c(level)}.
{p 4 8 2}
{cmd:ccc} requests a graphical display of the data and the reduced major axis
of the data. The reduced major axis or SD line goes through the intersection of
the means and has slope given by the sign of Pearson's r and the ratio of the
standard deviations. The SD line serves as a summary of the center of the data.
{p 8 8 2}
{cmd:ccc()} suboption {cmd:noref} suppresses the reference line of
perfect concordance, y=x.
{p 8 8 2}
{cmd:ccc()} may also be specified with other options, which should
be options of {helpb scatter}. For example, the scheme may be changed by a
call such as {cmd:ccc(scheme(lean1)}.
{p 4 8 2}
{cmd:loa} requests a graphical display of the loa, the mean difference, and
the data presented as paired differences plotted against pair-wise means.
{p 8 8 2}
{cmd:loa()} suboption {cmd:noref} suppresses the reference line of perfect
average agreement, y=0.
{p 8 8 2}
{cmd:loa()} suboption {cmd:regline} adds a regression line to the loa plot
fitting the paired differences to the pair-wise means.
{p 8 8 2}
{cmd:loa()} may also be specified with other options, which should normally
be options of {helpb scatter}. For example,
the reference line of perfect average agreement was generated as
{cmd:loa(yline(0, lstyle(refline)) yscale(range(0)) ylabel(0, add))}.
{p 4 8 2}
{cmd:qnormd} requests a normal plot of differences.
{p 8 8 2}
{cmd:qnormd()} may also be specified with options, which should
be options of {helpb scatter}. For example,
{cmd:qnormd(title(Normal plot of differences))} adds a title to the graph.
{title:Comments}
{p 4 4 2}
Lin (2000) reported typographical errors in his original 1989 paper
that affected calculation of the standard error of rho_c. These corrections
were included in {cmd:concord} in version 2.2.7 (January 2002) when the erratum
was brought to the attention of the program authors by Dr. Benjamin Littenberg.
We thank Dr. Littenberg.
{p 4 4 2}Kevan Polkinghorne pointed out a problem with {cmd:loa()}.
Mark Marshall pointed out a problem with {cmd:by()} under Stata 9.
{p 4 4 2}
Dunn (2004) contains a bibliography on related work.
Cox (2004) discusses other graphical approaches to this and related
problems. Cox (2006) discusses concordance correlation and
other numerical and graphical methods for assessing agreement
with various scientific examples.
{title:Saved results}
{p 4 4 2}
The following items are returned in {cmd:r()}, if the {cmd:by()} option was
not used:
{p 8 18}{cmd:r(N)}{space 9}number of observations compared{p_end}
{p 8 18}{cmd:r(rho_c)}{space 5}concordance correlation coefficient rho_c{p_end}
{p 8 18}{cmd:r(se_rho_c)}{space 2}standard error of rho_c{p_end}
{p 8 18}{cmd:r(asym_ll)}{space 3}lower CI limit (asymptotic){p_end}
{p 8 18}{cmd:r(asym_ul)}{space 3}upper CI limit (asymptotic){p_end}
{p 8 18}{cmd:r(z_tr_ll)}{space 3}lower CI limit (z-transform){p_end}
{p 8 18}{cmd:r(z_tr_ul)}{space 3}upper CI limit (z-transform){p_end}
{p 8 18}{cmd:r(C_b)}{space 7}bias-correction factor C_b{p_end}
{p 8 18}{cmd:r(diff)}{space 6}mean difference{p_end}
{p 8 18}{cmd:r(sd_diff)}{space 3}standard deviation of mean difference{p_end}
{p 8 18}{cmd:r(LOA_ll)}{space 4}lower loa CI limit{p_end}
{p 8 18}{cmd:r(LOA_ul)}{space 4}upper loa CI limit{p_end}
{p 8 18}{cmd:r(rdm)}{space 7}correlation between difference and mean{p_end}
{p 8 18}{cmd:r(Fdm)}{space 7}F from Bradley-Blackwood test{p_end}
{title:Examples}
{p 4 8 2}
{cmd:. concord rater1 rater2}
{p 4 8 2}
{cmd:. concord rater1 rater2 [fw=freq]}
{p 4 8 2}
{cmd:. concord rater1 rater2, summary ccc}
{p 4 8 2}
{cmd:. concord rater1 rater2, summary ccc(noref)}
{p 4 8 2}
{cmd:. concord rater1 rater2, level(90) by(grp)}
{p 4 8 2}
{cmd:. concord rater1 rater2, loa(regline noref)}
{p 4 8 2}
{cmd:. concord rater1 rater2, qnormd(title(Normal plot))}
{title:Authors}
{p 4 4 2}
Thomas J. Steichen, Winston-Salem, NC, USA, steichen@triad.rr.com
{p 4 4 2}
Nicholas J. Cox, Durham University, UK, n.j.cox@durham.ac.uk
{title:References}
{p 4 8 2}
Bland, J. M., and D. G. Altman. 1986. Statistical methods for assessing
agreement between two methods of clinical measurement. {it:Lancet} I:
307{c -}310.
{p 4 8 2}
Bradley, E. L., and L. G. Blackwood. 1989. Comparing paired data:
a simultaneous test for means and variances. {it:American Statistician}
43: 234{c -}235.
{p 4 8 2}
Cox, N. J. 2004. Graphing agreement and disagreement.
{it:Stata Journal} 4: 329{c -}349.
{p 4 8 2}
------. 2006.
Assessing agreement of measurements and predictions in geomorphology.
{it:Geomorphology} 76: 332{c -}346.
{p 4 8 2}
Dunn, G. 2004.
{it: Statistical Evaluation of Measurement Errors: Design and Analysis of Reliability Studies.}
London: Arnold.
{p 4 8 2}
Hsu, C. T. 1940. On samples from a normal bivariate population.
{it:Annals of Mathematical Statistics} 11: 410{c -}426.
{p 4 8 2}
Krippendorff, K. 1970. Bivariate agreement coefficients for reliability of data.
In Borgatta, E.F. and G.W. Bohrnstedt (eds)
{it:Sociological Methodology}. San Francisco: Jossey-Bass, 139{c -}150.
[a.k.a. {it:Sociological Methodology} 2: 139{c -}150]
{p 4 8 2}
Lin, L. I-K. 1989. A concordance correlation coefficient to evaluate
reproducibility. {it:Biometrics} 45: 255{c -}268.
{p 4 8 2}
------. 2000. A note on the concordance correlation coefficient.
{it:Biometrics} 56: 324{c -}325.
{p 4 8 2}
Pitman, E. J. G. 1939. A note on normal correlation.
{it:Biometrika} 31: 9{c -}12.
{p 4 8 2}
Reynolds, M., and T. G. Gregoire. 1991. Comment on Bradley and Blackwood.
{it:American Statistician} 45: 163{c -}164.
{p 4 8 2}
Snedecor, G. W., and W. G. Cochran. 1989. {it:Statistical Methods.}
Ames, IA: Iowa State University Press.
{title:Also see}
{p 4 13 2}
STB: STB-43 sg84; STB-45 sg84.1; STB-54 sg84.2; STB-58 sg84.3
{psee}
SJ:{space 3}SJ2-2 st0015; SJ4-4 st0015_1; SJ5-3: st0015_2; SJ6-2: st0015_3
{p_end}

1090
Modules/ado/plus/c/confa.ado Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,503 @@
*! Mata definitions for confa package; 3 March 2009; v.2.0
set matalnum on
mata:
mata set matastrict on
mata clear
//////////////////////////////////////////////
// needed by confa.ado
real CONFA_NF( string input ) {
real scalar nopenpar, nclosepar, ncolon;
// opening and closing parentheses
nopenpar = length(tokens(input, ")" ))
nclosepar = length(tokens(input, "(" ))
// n*par will be 2*nf
ncolon = length(tokens(input, ":" ))
// ncolon will be 2*nf + 1
if ( (nopenpar == nclosepar) & (nopenpar == ncolon-1 ) ) {
if (mod(nopenpar,2) == 0) {
return( nopenpar/2 )
}
}
// if everything was OK, should've exited by now
// if something's wrong, return zero
return(0)
}
matrix CONFA_StrucToSigma(real vector parms) {
real scalar CONFA_loglevel, nobsvar, nfactors, eqno;
real matrix Lambda, Phi, Theta, Sigma, CONFA_Struc;
// loglevel
CONFA_loglevel = strtoreal( st_global("CONFA_loglevel"))
CONFA_Struc = st_matrix("CONFA_Struc")
if (CONFA_loglevel>4) CONFA_Struc
if (CONFA_loglevel>4) {
printf("{txt}Current parameter values:\n")
parms
}
// the length should coinicde with the # pars from CONFA_Struc
if ( cols(parms) ~= rows(CONFA_Struc) ) {
// something's wrong, let's just drop out with an empty matrix
if (CONFA_loglevel>4) {
printf("{txt}Expected parameters: {res}%3.0f{txt}; received parameters: {res}%3.0f\n",
rows(CONFA_Struc),cols(parms))
}
return(J(0,0,0))
}
// # observed variables: max entry in the number of means
nobsvar = colmax( select(CONFA_Struc[,3], !(CONFA_Struc[,1]-J(rows(CONFA_Struc),1,1)) ) )
if (CONFA_loglevel>4) printf("{txt}No. of observed variables: {res}%3.0f\n",nobsvar)
// # observed factors: max entry in the phi indices
nfactors = colmax( select(CONFA_Struc[,3], !(CONFA_Struc[,1]-J(rows(CONFA_Struc),1,3)) ) )
if (CONFA_loglevel>4) printf("{txt}No. of latent factors: {res}%3.0f\n",nfactors)
// set up the matrices
Lambda = J(nobsvar,nfactors,0)
Phi = J(nfactors,nfactors,0)
Theta = J(nobsvar,nobsvar,0)
// fill the stuff in
for(eqno=nobsvar+1;eqno<=rows(CONFA_Struc);eqno++) {
if (CONFA_Struc[eqno,1] == 2) {
// a lambda-type entry
Lambda[ CONFA_Struc[eqno,3], CONFA_Struc[eqno,4] ] = parms[eqno]
}
if (CONFA_Struc[eqno,1] == 3) {
// a phi-type entry
Phi[ CONFA_Struc[eqno,3], CONFA_Struc[eqno,4] ] = parms[eqno]
Phi[ CONFA_Struc[eqno,4], CONFA_Struc[eqno,3] ] = parms[eqno]
}
if (CONFA_Struc[eqno,1] == 4) {
// a theta-type entry
Theta[ CONFA_Struc[eqno,3], CONFA_Struc[eqno,3] ] = parms[eqno]
}
if (CONFA_Struc[eqno,1] == 5) {
// a theta-type correlated errors entry
Theta[ CONFA_Struc[eqno,3], CONFA_Struc[eqno, 4] ] = parms[eqno]
Theta[ CONFA_Struc[eqno,4], CONFA_Struc[eqno, 3] ] = parms[eqno]
}
}
if (CONFA_loglevel > 4) {
printf("{txt}Loadings:\n")
Lambda
printf("{txt}Factor covariances:\n")
Phi
printf("{txt}Residual variances:\n")
Theta
}
Sigma = Lambda*Phi*Lambda' + Theta
if (CONFA_loglevel > 4) {
printf("{txt}Implied moments:\n")
Sigma
}
if (CONFA_loglevel == -1) {
// post matrices to Stata
st_matrix("CONFA_Lambda",Lambda)
st_matrix("CONFA_Phi",Phi)
st_matrix("CONFA_Theta",Theta)
st_matrix("CONFA_Sigma",Sigma)
}
// done with model structure, compute and return implied matrix
return( Sigma )
}
// vech covariance matrix, for Satorra-Bentler
void SBvechZZtoB(string dlist, string blist) {
real matrix data, moments, B;
real scalar i;
// view the deviation variables
st_view(data=.,.,tokens(dlist))
// view the moment variables
// blist=st_local("blist")
st_view(moments=.,.,tokens(blist))
// vectorize!
for(i=1; i<=rows(data); i++) {
B = data[i,.]'*data[i,.]
moments[i,.] = vech(B)'
}
}
// duplication matrix, for Satorra-Bentler
void Dupl(scalar p, string Dname) {
real scalar pstar, k;
real matrix Ipstar, D;
pstar = p*(p+1)/2
Ipstar = I(pstar)
D = J(p*p,0,.)
for(k=1;k<=pstar;k++) {
D = (D, vec(invvech(Ipstar[.,k])))
}
st_matrix(Dname,D)
}
// Satorra-Bentler Delta matrix
// Delta = \frac \partial{\partial \theta} vech \Sigma(\theta)
void SBStrucToDelta(string DeltaName) {
real scalar CONFA_loglevel, p, t, varno, facno, i, j, k, fac1, fac2, k1, k2;
// log level, # obs vars, # parameters, current var, current factor, cycle indices, temp indices
real matrix Lambda, Phi, Theta, Sigma, CONFA_Struc, Delta, DeltaRow;
// must be self-explanatory
real matrix U, E;
// identity matrices of the size #factors and #obs vars
// loglevel
CONFA_loglevel = strtoreal( st_global("CONFA_loglevel"))
// need the CONFA matrices
CONFA_Struc = st_matrix("CONFA_Struc")
Sigma = st_matrix("CONFA_Sigma")
Lambda = st_matrix("CONFA_Lambda")
Phi = st_matrix("CONFA_Phi")
// Theta = st_matrix("CONFA_Theta")
if (CONFA_loglevel>4) CONFA_Struc
// # parameters in the model
t = rows(CONFA_Struc)
// cols(Delta) = t = # pars
// rows(Delta) = pstar = p*(p+1)/2 = length( vech( Sigma ) )
// but that should be accumulated one by one...
Delta = J(0,t,.)
// sources of u and e vectors
p = rows( Sigma )
U = I( p )
E = I( rows(Phi) )
for(i=1;i<=p;i++) {
for(j=i;j<=p;j++) {
if (CONFA_loglevel > 4) printf("{txt}Working with pair ({res}%2.0f{txt},{res}%2.0f{txt})\n",i,j)
DeltaRow = J(1,t,0)
// parse Struc matrix and see how each parameter affects Cov(X_i,X_j)
for(k=1;k<=t;k++) {
if (CONFA_Struc[k,1] == 1) {
// a mean-type entry
// for the moment, assume it does not affect anything
}
if (CONFA_Struc[k,1] == 2) {
// a lambda-type entry
// CONFA_Struc[k,.] = (2, equation #, variable #, factor #)
varno = CONFA_Struc[k,3]
facno = CONFA_Struc[k,4]
DeltaRow[1,k] = U[i,.] * U[.,varno] * E[facno,.] * Phi * Lambda' * U[.,j] +
U[i,.] * Lambda * Phi * E[.,facno] * U[varno,.] * U[.,j]
}
if (CONFA_Struc[k,1] == 3) {
// a phi-type entry
// CONFA_Struc[k,.] = (3, equation #, `factor`kk'', `factor`k'')
fac1 = CONFA_Struc[k,3]
fac2 = CONFA_Struc[k,4]
DeltaRow[1,k] = U[i,.] * Lambda * E[.,fac1] * E[fac2,.] * Lambda' * U[.,j]
}
if (CONFA_Struc[k,1] == 4) {
// a theta-type entry
// CONFA_Struc[k,.] = (4, equation #, variable #, 0)
varno = CONFA_Struc[k,3]
DeltaRow[1,k] = (i==j) & (i==varno)
}
if (CONFA_Struc[k,1] == 5) {
// a theta_{jk}-type entry
// CONFA_Struc[k,.] = (5, equation #, variable k1, variable k2)
k1 = CONFA_Struc[k,3]
k2 = CONFA_Struc[k,4]
DeltaRow[1,k] = ((i==k1) & (j==k2) ) | ((i==k2) & (j==k1))
}
}
Delta = Delta \ DeltaRow
}
}
st_matrix(DeltaName,Delta)
}
///////////////////////////////////////////
// needed by confa_p.ado
void CONFA_P_EB(string Fnames, string ObsVarNames, string ToUseName) {
real matrix ff, xx;
// views
real matrix bb, Sigma, Lambda, Theta, Phi;
// substantive matrices
real scalar p
// view on the newly generated factors
st_view(ff=.,.,tokens(Fnames),ToUseName)
// view on the observed variables
st_view(xx=.,.,tokens(ObsVarNames),ToUseName)
// get the estimated matrices
bb = st_matrix("e(b)")
Sigma = st_matrix("e(Sigma)")
Theta = st_matrix("e(Theta)")
Lambda = st_matrix("e(Lambda)")
Phi = st_matrix("e(Phi)")
// # observed vars
p = rows(Sigma)
// prediction
ff[,] = (xx-J(rows(xx),1,1)*bb[1..p]) * invsym(Sigma) * Lambda * Phi
}
void CONFA_P_MLE(string Fnames, string ObsVarNames, string ToUseName) {
real matrix ff, xx;
// views
real matrix bb, Sigma, Lambda, Theta, Phi, ThetaInv;
// substantive matrices
real scalar p
// view on the newly generated factors
st_view(ff=.,.,tokens(Fnames),ToUseName)
// view on the observed variables
st_view(xx=.,.,tokens(ObsVarNames),ToUseName)
// get the estimated matrices
bb = st_matrix("e(b)")
Sigma = st_matrix("e(Sigma)")
Theta = st_matrix("e(Theta)")
Lambda = st_matrix("e(Lambda)")
Phi = st_matrix("e(Phi)")
// # observed vars
p = rows(Sigma)
// Theta is the vector of diagonal elements,
// so the inverse is easy!
ThetaInv = diag( 1:/Theta )
// prediction
ff[,] = (xx-J(rows(xx),1,1)*bb[1..p]) * ThetaInv * Lambda * invsym(Lambda' * ThetaInv * Lambda)
}
//////////////////////////////////
// needed by confa_lf.ado
void CONFA_NormalLKHDr( string ParsName, string lnfname) {
// ParsName are the parameters
// lnfname is the name of the likelihood variable
// the observed variables are in $CONFA_obsvar
real scalar CONFA_loglevel, nobsvar, ldetWS, i;
// log level, # obs vars, log determinant, cycle index
real matrix Sigma, means, SS, InvWorkSigma;
// intermediate computations
string scalar obsvar, touse;
// list of observed variables
real matrix data, lnl, parms;
// views
CONFA_loglevel = strtoreal( st_global("CONFA_loglevel"))
obsvar = st_global("CONFA_obsvar")
nobsvar = length(tokens(obsvar))
touse = st_global("CONFA_touse")
st_view(data=., ., tokens(obsvar), touse )
st_view(lnl=., ., tokens(lnfname), touse)
st_view(parms=., ., tokens(ParsName), touse)
// using the set up where the means are the first nobsvar entries of the parameter vector,
means = parms[1,1..nobsvar]
Sigma = CONFA_StrucToSigma(parms[1,.])
if (CONFA_loglevel > 2) {
parms[1,.]
means
Sigma
}
// do some equilibration??
SS = cholesky(Sigma)
InvWorkSigma = solvelower(SS,I(rows(SS)))
InvWorkSigma = solveupper(SS',InvWorkSigma)
ldetWS = 2*ln(dettriangular(SS))
for( i=1; i<=rows(data); i++ ) {
lnl[i,1] = -.5*(data[i,.]-means)*InvWorkSigma*(data[i,.]-means)' - .5*ldetWS - .5*nobsvar*ln(2*pi())
}
if (CONFA_loglevel>2) {
sum(lnl)
}
}
// normal likelihood with missing data
void CONFA_NormalLKHDrMiss( string ParsName, string lnfname) {
// ParsName are the parameters
// lnfname is the name of the likelihood variable
// the observed variables are in $CONFA_obsvar
real scalar CONFA_loglevel, nobsvar, thisldetWS, i, j;
// log level, # obs vars, log determinant, cycle index
real matrix Sigma, means, thisSigma, thisSS, thisInvSigma, thispattern, parms;
// intermediate computations
string scalar obsvar, misspat, touse;
// list of observed variables; the names of the missing patterns and touse tempvars
real matrix data, lnl, parmview, pattern, mdata, mlnl, info;
// views
CONFA_loglevel = strtoreal( st_global("CONFA_loglevel"))
obsvar = st_global("CONFA_obsvar")
nobsvar = length(tokens(obsvar))
misspat = st_global("CONFA_miss")
touse = st_global("CONFA_touse")
st_view(pattern=., ., misspat, touse )
st_view(data=., ., tokens(obsvar), touse )
st_view(lnl=., ., lnfname, touse )
// STILL USING THE FIRST OBSERVATIONS TO GET THE PARAMETERS!!!
st_view(parmview=., ., tokens(ParsName), touse )
parms = parmview[1,1..cols(parmview)]
if (CONFA_loglevel>2) {
obsvar
parms
}
// using the set up where the means are the first nobsvar entries of the parameter vector,
means = parms[1..nobsvar]
Sigma = CONFA_StrucToSigma(parms)
// utilize an existing set up of the missing data patterns
// data assumed to be sorted by the patterns of missing data
info = panelsetup( pattern, 1 )
for (i=1; i<=rows(info); i++) {
panelsubview(mdata=., data, i, info)
panelsubview(mlnl=., lnl, i, info)
// mdata should contain the portion of the data with the same missing data pattern
// mlnl will be conforming to mdata
// OK, now need to figure out that pattern
thispattern = J(1, cols(data), 1) - colmissing( mdata[1,] )
if (CONFA_loglevel > 2) {
printf("{txt}Pattern #{res}%5.0f{txt} :", i)
thispattern
};
// modify the matrices
thisSigma = select( select( Sigma, thispattern), thispattern' )
thisSS = cholesky(thisSigma)
thisInvSigma = solvelower(thisSS,I(rows(thisSS)))
thisInvSigma = solveupper(thisSS',thisInvSigma)
thisldetWS = 2*ln(dettriangular(thisSS))
if (CONFA_loglevel > 3) {
thisSigma
thisInvSigma
};
for( j=1; j<=rows(mdata); j++ ) {
// this is actually a single line broken by arithmetic operator signs
// that's bad style but it works
mlnl[j,1] = -.5*(select(data[j,.],thispattern)-select(means,thispattern)) *
thisInvSigma *
(select(data[j,.],thispattern)-select(means,thispattern))' -
.5*thisldetWS - .5*sum(thispattern)*ln(2*pi())
}
if (CONFA_loglevel>3) {
mlnl
};
}
}
// Bollen-Stine bootstrap rotation
void CONFA_BSrotate(
string SigmaName, // the parameter matrix name
string varnames // the variable names
) {
// declarations
real matrix data // views of the data
real matrix Sigma, SS, S2, SS2 // the covariance matrices and temp matrices
real matrix means // the means -- need modifications for weighted data!!!
real scalar n // dimension, no. obs
// get the data in
st_view(data=., ., tokens(varnames) )
n=rows(data)
Sigma = st_matrix(SigmaName)
// probability weights!!!
means = colsum(data)/n
SS = (cross(data,data)-n*means'*means)/(n-1)
S2 = cholesky(Sigma)
SS2 = cholesky(SS)
SS2 = solveupper(SS2',I(rows(SS)))
data[,] = data*SS2*S2'
}
// build a library
mata mlib create lconfa, replace
mata mlib add lconfa *()
mata mlib index
end
// of mata
exit
// don't need this:
string scalar CONFA_UL( string input ) {
string rowvector s;
real scalar i,j,n;
// tokenize input into a string vector
s = tokens( input )
n = cols( s )
for(i=1;i<=n;i++) {
// as I go over the elements, compare to the previous ones
for(j=1;j<i;j++) {
if ( s[i] == s[j] ) {
s[i] = ""
continue
}
}
}
// assemble back into a string scalar
return( stritrim(invtokens( s ) ) )
}

View File

@ -0,0 +1,299 @@
{smcl}
{* *! version 1.2.10 15may2007}{...}
{cmd:help confa} {right: ({browse "http://www.stata-journal.com/article.html?article=st0169":SJ9-3: st0169})}
{hline}
{title:Title}
{p2colset 5 14 16 2}{...}
{p2col :{hi:confa} {hline 2}}Confirmatory factor analysis{p_end}
{p2colreset}{...}
{title:Syntax}
{p 8 11 2}
{cmd:confa}
{it:factorspec} [{it:factorspec ...}]
{ifin}
{weight}
[{cmd:,} {it:options}]
{pstd}
{it:factorspec} is{p_end}
{p 8 27}
{cmd:(}{it:factorname}{cmd::} {it:varlist}{cmd:)}{p_end}
{synoptset 28 tabbed}{...}
{synopthdr}
{synoptline}
{syntab:Model}
{synopt :{cmdab:corr:elated(}{it:{help confa##corr:corrspec}} [...]{cmd:)}}correlated measurement errors{p_end}
{synopt :{cmd:unitvar(}{it:factorlist}|{cmd:_all}{cmd:)}}set variance of the factor(s) to 1{p_end}
{synopt :{opt free}}do not impose any constraints by default; seldom used{p_end}
{synopt :{opt constr:aint(numlist)}}user-supplied constraints;
must be used with {cmd:free}{p_end}
{synopt: {cmdab:miss:ing}}full-information maximum-likelihood
estimation with missing data{p_end}
{synopt: {cmdab:usen:ames}}alternative coefficient labeling{p_end}
{syntab:Variance estimation}
{synopt :{opth vce(vcetype)}}{it:vcetype} may be
{opt r:obust}, {opt cl:uster} {it:clustvar}, {cmd:oim}, {cmd:opg}, or
{opt sb:entler}{p_end}
{syntab:Reporting}
{synopt :{opt l:evel(#)}}set confidence level; default is {cmd:level(95)}{p_end}
{syntab:Other}
{synopt :{opt svy}}respect survey settings{p_end}
{synopt :{opth "from(confa##init_specs:init_specs)"}}control the starting values{p_end}
{synopt :{opt loglevel(#)}}specify the details of output; programmers only{p_end}
{synopt :{it:{help confa##maximize:ml_options}}}maximization options{p_end}
{synoptline}
{p2colreset}{...}
{title:Description}
{pstd}{cmd:confa} fits single-level confirmatory factor analysis (CFA) models.
In a CFA model, each of the variables is assumed to
be an indicator of underlying unobserved factor(s) with a linear dependence
between the factors and observed variables:
{center:{it:y_i} = {it:m_i} + {it:l_i1 f_1} + ... + {it:l_iK f_K} + {it:e_i}}
{pstd}where {it:y_i} is the {it:i}th variable
in the {it:varlist}, {it:m_i} is its mean,
{it:l_ik} are the latent variable loading(s),
{it:f_k} are the {it:k}th latent factor(s)
({it:k} = 1,...,{it:K}),
and {it:e_i} is the measurement error.
Thus the specification
{cmd:(}{it:factorname}{cmd::} {it:varlist}{cmd:)}
is interpreted as follows: the latent factor
{it:f_k} is given {it:factorname}
(for display purposes only);
the variables specified in
the {it:varlist} have their loadings, {it:l_ik}, estimated;
and all other observed variables in the model
have fixed loadings, {it:l_ik} = 0.
{pstd}The model is fitted by the maximum likelihood
procedure; see {helpb ml}.
{pstd}As with all latent variable models, a number
of identifying assumptions need to be made about
the latent variables {it:f_k}. They are assumed
to have mean zero, and their scales are determined
by the first variable in the {it:varlist}
(i.e., {it:l_1k} is set to equal 1 for all {it:k}).
Alternatively, identification can be achieved by setting the
variance of the latent variable to 1 (with option
{cmd:unitvar()}). More sophisticated identification
conditions can be achieved by specifying the
{cmd:free} option and then providing the necessary
constraints in the {cmd:constraint()} option.
{title:Options}
{dlgtab:Model}
{marker corr}{...}
{phang}{cmd:correlated(}{it:corrspec} [{it:corrspec} ...]{cmd:)}
specifies the correlated measurement errors {it:e_i} and {it:e_j}.
Here {it:corrspec} is of the form{p_end}
{pmore} [{cmd:(}]{it:varname_k}{cmd::}{it:varname_j}[{cmd:)}]{p_end}
{pmore}where {it:varname_k} and {it:varname_j} are some of
the observed variables in the model; that is, they must appear in at
least one
{it:factorspec} statement. If there
is only one correlation specified, the optional parentheses
shown above may be omitted. There should be no space between the colon and
{it:varname_j}.
{phang}{cmd:unitvar(}{it:factorlist}|{cmd:_all)} specifies the factors
(from those named in {it:factorspec}) that will be identified by setting
their variances to 1. The keyword {cmd:_all} can be used to specify that all
the factors have their variances set to 1 (and hence the matrix Phi can be
interpreted as a correlation matrix).
{phang}{cmd:free} frees up all the parameters in the model (making it
underidentified). It is then the user's responsibility to provide
identification constraints and adjust the degrees of freedom of the tests.
This option is seldom used.
{phang}{cmd:constraint(}{it:numlist}{cmd:)} can be used to supply additional
constraints. There are no checks implemented for
redundant or conflicting constraints, so in some rare cases, the degrees of
freedom may be incorrect. It might be wise to run the model with the
{cmd:free} and {cmd:iterate(0)} options and then look at the names in the output of
{cmd:matrix list e(b)} to find out the specific names of the parameters.
{phang}{cmd:missing} requests full-information maximum-likelihood estimation
with missing data. By default, estimation proceeds by listwise deletion.
{phang}{cmd:usenames} requests that the parameters be labeled with the names
of the variables and factors rather than with numeric values (indices of the
corresponding matrices). It is a technical detail that does not affect the
estimation procedure in any way, but it is helpful when working with several
models simultaneously, tabulating the estimation results, and transferring the
starting values between models.
{dlgtab:Variance estimation}
{phang}{cmd:vce(}{it:vcetype}{cmd:)} specifies different estimators of the
variance-covariance matrix. Common estimators ({cmd:vce(oim)},
observed information matrix, the default; {cmd:vce(robust)}, sandwich
information matrix; {cmd:vce(cluster }{it:clustvar}{cmd:)}, clustered
sandwich estimator with clustering on {it:clustvar}) are supported, along with
their aliases (the {cmd:robust} and {cmd:cluster(}{it:clustvar}{cmd:)}
options). See {manhelpi vce_option R}.
{pmore}An additional estimator specific to structural equation modeling is the Satorra-Bentler
estimator (Satorra and Bentler 1994). It is requested by
{cmd:vce(}{cmdab:sben:tler}{cmd:)} or {cmd:vce(}{cmdab:sat:orrabentler}{cmd:)}. When
this option is specified, additional Satorra-Bentler scaled and adjusted
goodness-of-fit statistics are computed and presented in the output.
{dlgtab:Reporting}
{phang}{cmd:level(}{it:#}{cmd:)} changes the confidence level for
confidence-interval reporting. See
{helpb estimation_options##level():[R] estimation options}.
{dlgtab:Other}
{phang}{cmd:svy} instructs {cmd:confa} to respect the complex
survey design, if one is specified. See {manhelp svyset SVY}.
{marker init_specs}{...}
{phang}{cmd:from(}{cmd:ones}|{cmd:2sls}|{cmd:ivreg}|{cmd:smart}|{it:ml_init_args}{cmd:)} provides the choice of starting values for the maximization
procedure. The {cmd:ml} command's internal default is to set all parameters to zero,
which leads to a noninvertible matrix, Sigma, and {cmd:ml} has to make many changes to those initial values to find anything feasible. Moreover, this
initial search procedure sometimes leads to a domain where the likelihood is
nonconcave, and optimization might fail there.
{phang2}{cmd:ones} sets all the parameters to values of one except
for covariance parameters (off-diagonal values of the Phi and Theta matrices),
which are set to 0.5. This might be a reasonable choice for data with
variances of observed variables close to 1 and positive covariances (no
inverted scales).
{phang2} {cmd:2sls} or {cmd:ivreg} requests that the initial parameters for the
freely estimated loadings be set to the two-stage least-squares
instrumental-variable estimates of Bollen (1996). This requires the model to
be identified by scaling indicators (i.e., setting one of the loadings to 1)
and to have at least three indicators for each latent variable. The instruments
used are all other indicators of the same factor. No checks for their validity
or search for other instruments is performed.
{phang2} {cmd:smart} provides an alternative set of starting values that
is often reasonable (e.g., assuming that the reliability of observed
variables is 0.5).
{pmore}Other specification of starting values, {it:ml_init_args}, should follow the format of
{cmd:ml init}. Those typically include the list of starting values of the form
{cmd:from(}{it:# #} ... {it:#}{cmd:, copy)} or a matrix of starting values
{cmd:from(}{it:matname}{cmd:,} [{cmd:copy}|{cmd:skip}]{cmd:)}. See {manhelp ml R}.
{phang}{cmd:loglevel(}{it:#}{cmd:)} specifies the details of output about
different stages of model setup and estimation,
and is likely of interest only to programmers. Higher numbers
imply more output.
{marker maximize}{...}
{phang}For other options, see {helpb maximize}.
{title:Remarks}
{pstd}{cmd:confa} relies on {cmd:listutil} for some parsing tasks. If
{cmd:listutil} is not installed in your Stata, {cmd:confa} will try to install
it from the SSC ({cmd:ssc install listutil}). If the installation is
unsuccessful, {cmd:confa} will issue an error message and stop.
{pstd}In large models, {cmd:confa} may be restricted by Stata's
{help limits:limit} of 244 characters in the string expression. You might
want to {helpb rename} your variables and give them shorter names.
{title:Examples}
{pstd}Holzinger-Swineford data{p_end}
{phang2}{cmd:. use hs-cfa}
{pstd}Basic model with different starting values{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(ones)}{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv)}{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(smart)}
{pstd}Robust and Satorra-Bentler standard errors{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) vce(sbentler)}{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) robust}
{pstd}Correlated measurement errors{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) corr( x7:x8 )}
{pstd}An alternative identification{p_end}
{phang2}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(ones) unitvar(_all) corr(x7:x8)}
{pstd}Missing data{p_end}
{phang2}{cmd:. forvalues k=1/9 {c -(}}{p_end}
{phang2}{cmd:. gen y`k' = cond( uniform()<0.0`k', ., x`k')}{p_end}
{phang2}{cmd:. {c )-}}{p_end}
{phang2}{cmd:. confa (vis: y1 y2 y3) (text: y4 y5 y6) (math: y7 y8 y9), from(iv)}{p_end}
{phang2}{cmd:. confa (vis: y1 y2 y3) (text: y4 y5 y6) (math: y7 y8 y9), from(iv) missing difficult}{p_end}
{title:Saved results}
{pstd}Aside from the standard {help estcom:estimation results}, {cmd:confa}
also performs the overall goodness-of-fit test with results
saved in {cmd:e(lr_u)}, {cmd:e(df_u)}, and {cmd:e(p_u)}
for the test statistic, its goodness of fit, and the resulting
p-value. A test versus the model with the independent data
is provided with the {helpb ereturn} results with the {cmd:indep}
suffix. Here, under the null hypothesis,
the covariance matrix is assumed diagonal.
{pstd}When {cmd:sbentler} is specified, Satorra-Bentler standard errors are
computed and posted as {cmd:e(V)}, with intermediate matrices saved in
{cmd:e(SBU)}, {cmd:e(SBV)}, {cmd:e(SBGamma)}, and {cmd:e(SBDelta)}. Also,
several corrected overall fit test statistics is reported and saved: T scaled
({cmd:ereturn} results with the {cmd:Tsc} suffix) and T adjusted ({cmd:ereturn}
results with the {cmd:Tadj} suffix). Scalars {cmd:e(SBc)} and {cmd:e(SBd)} are
the scaling constants, with the latter also being the approximate degrees of
freedom of the chi-squared test from Satorra and Bentler (1994), and T double
bar from Yuan and Bentler (1997) (with the {cmd:T2} suffix).
{title:References}
{phang}Bollen, K. A. 1996. An alternative two stage least squares (2SLS)
estimator for latent variable equations. {it:Psychometrika} 61: 109-121.
{phang}Satorra, A., and P. M. Bentler. 1994. Corrections to test statistics and standard errors in covariance structure analysis. In {it:Latent Variables Analysis}, ed. A. von Eye and C. C. Clogg, 399-419. Thousand Oaks, CA: Sage.
{phang} Yuan, K.-H., and P. M. Bentler. 1997. Mean and covariance structure analysis: Theoretical and practical improvements.
{it:Journal of the American Statistical Association} 92: 767-774.
{title:Author}
{pstd}Stanislav Kolenikov{p_end}
{pstd}Department of Statistics{p_end}
{pstd}University of Missouri{p_end}
{pstd}Columbia, MO{p_end}
{pstd}kolenikovs@missouri.edu{p_end}
{title:Also see}
{psee}
Article: {it:Stata Journal}, volume 9, number 3: {browse "http://www.stata-journal.com/article.html?article=st0169":st0169}
{psee}Online: {helpb factor}, {helpb bollenstine}, {helpb confa_estat:confa postestimation} (if installed)
{p_end}

View File

@ -0,0 +1,222 @@
*! version 2.0.2 08 Sep 2009; part of confa suite
program confa_estat, rclass
version 10
if "`e(cmd)'" != "confa" {
error 301
}
gettoken subcmd rest : 0, parse(" ,")
local lsubcmd= length("`subcmd'")
if `"`subcmd'"' == substr("fitindices", 1, max(3, `lsubcmd')) {
FitIndices `rest'
}
else if `"`subcmd'"' == substr("correlate", 1, max(4, `lsubcmd')) {
Correlate `rest'
}
else if `"`subcmd'"' == substr("aic",1, max(3, `lsubcmd')) {
FitIndices , aic
}
else if `"`subcmd'"' == substr("bic",1, max(3, `lsubcmd')) {
FitIndices , bic
}
else if `"`subcmd'"' == substr("ic",1,max(2, `lsubcmd')) {
FitIndices, aic bic
}
else if `"`subcmd'"' == substr("summarize", 1, max(2, `lsubcmd')) {
Summarize `rest'
}
else if `"`subcmd'"' == "vce" {
vce `rest'
}
else {
di as err "`subcmd' not allowed"
exit 198
}
return add
end
program Summarize
syntax , [noHEAder noWEIghts]
if "`e(wexp)'" ~= "" & "`weights'"~="noweights" local wgt [iw `e(wexp)']
sum `e(observed)' `wgt' if e(sample)
end
program Correlate, rclass
* correlation matrix of estimated factors
syntax, [level(passthru) bound]
di _n "{txt}Correlation equivalents of covariances"
if "`bound'" != "" local bound ci(atanh)
local q = rowsof( e(Phi) )
if `q'>1 {
* display the factor correlations
di as text "{hline 13}{c TT}{hline 64}"
if "`e(vcetype)'" ~= "" {
di as text " {c |} {center 15:`e(vcetype)'}"
}
di as text " {c |} Coef. Std. Err. z P>|z| [$S_level% Conf. Interval]"
di as text "{hline 13}{c +}{hline 64}"
* parse the factor names
local fnames : rownames e(Phi)
* parse the unitvar list to be used in -inlist-
local unitvarlist = `"""' + subinstr("`e(unitvar)'"," ",`"",""',.) + `"""'
_diparm __lab__ , label("Factors") eqlabel
forvalues i=1/`q' {
local i1 = `i'+1
forvalues j=`i1'/`q' {
if inlist("`: word `i' of `fnames''", `unitvarlist') & inlist("`: word `j' of `fnames''", `unitvarlist') {
* both factor variances are constrained at 1, display as is
_diparm phi_`i'_`j' , prob label("`: word `i' of `fnames''-`: word `j' of `fnames''") `level' `bound'
}
else if inlist("`: word `i' of `fnames''", `unitvarlist') & !inlist("`: word `j' of `fnames''", `unitvarlist') {
* `i' is restricted unit variance, `j' is not
_diparm phi_`i'_`j' phi_`j'_`j', ///
function( @1/sqrt(@2) ) d( 1/sqrt(@2) -0.5*@1/sqrt(@2*@2*@2) ) ///
prob label("`: word `i' of `fnames''-`: word `j' of `fnames''") `level' `bound'
}
else if !inlist("`: word `i' of `fnames''", `unitvarlist') & inlist("`: word `j' of `fnames''", `unitvarlist') {
* `j' is restricted unit variance, `i' is not
_diparm phi_`i'_`j' phi_`i'_`i', ///
function( @1/sqrt(@2) ) d( 1/sqrt(@2) -0.5*@1/sqrt(@2*@2*@2) ) ///
prob label("`: word `i' of `fnames''-`: word `j' of `fnames''") `level' `bound'
}
else {
* display correlation transform
_diparm phi_`i'_`j' phi_`i'_`i' phi_`j'_`j', ///
function( @1/sqrt(@2*@3) ) d( 1/sqrt(@2*@3) -0.5*@1/sqrt(@2*@2*@2*@3) -0.5*@1/sqrt(@2*@3*@3*@3) ) ///
prob label("`: word `i' of `fnames''-`: word `j' of `fnames''") `level' `bound'
}
}
}
}
if "`e(correlated)'" ~= "" {
if `q' < 2 {
* need to display the header
di as text "{hline 13}{c TT}{hline 64}"
if "`e(vcetype)'" ~= "" {
di as text " {c |} {center 15:`e(vcetype)'}"
}
di as text " {c |} Coef. Std. Err. z P>|z| [$S_level% Conf. Interval]"
di as text "{hline 13}{c +}{hline 64}"
}
* print out correlated measurement errors
_diparm __lab__ , label("Errors") eqlabel
local correlated `e(correlated)'
local obsvar `e(observed)'
while "`correlated'" != "" {
gettoken corrpair correlated : correlated , match(m)
gettoken corr1 corrpair : corrpair, parse(":")
unab corr1 : `corr1'
gettoken sc corr2 : corrpair, parse(":")
unab corr2 : `corr2'
poslist `obsvar' \ `corr1', global(CFA_temp)
local k1 = $CFA_temp
poslist `obsvar' \ `corr2', global(CFA_temp)
local k2 = $CFA_temp
_diparm theta_`k1'_`k2' theta_`k1' theta_`k2', ///
function( @1/sqrt(@2*@3) ) d( 1/sqrt(@2*@3) -0.5*@1/sqrt(@2*@2*@2*@3) -0.5*@1/sqrt(@2*@3*@3*@3) ) ///
prob label("`corr1'-`corr2'") `level' `bound'
}
}
else if `q'<2 {
di as text _n "Nothing to display" _n
}
di as text "{hline 13}{c BT}{hline 64}"
global CFA_temp
end
program FitIndices, rclass
syntax , [all tli rmsea rmsr aic bic]
di _n "{txt} Fit indices" _n
return add
* all, by default
if "`*'" == "" local all 1
* the fundamentals
local p = `: word count `e(obsvar)''
if "`rmsea'`all'"~="" {
return scalar RMSEA = sqrt( max( (e(lr_u)-e(df_u))/(e(N)-1), 0 )/e(df_u) )
tempname ll lu
scalar `ll' = cond(chi2(e(df_u),e(lr_u))>0.95,npnchi2(e(df_u),e(lr_u),0.95),0)
scalar `lu' = cond(chi2(e(df_u),e(lr_u))>0.05,npnchi2(e(df_u),e(lr_u),0.05),0)
return scalar RMSEA05 = sqrt( `ll'/( (e(N)-1)*e(df_u) ) )
return scalar RMSEA95 = sqrt( `lu'/( (e(N)-1)*e(df_u) ) )
di "{txt}RMSEA {col 8}= {res}" %6.4f return(RMSEA) _c
di "{txt}, 90% CI{col 8}= ({res}" %6.4f return(RMSEA05) "{txt}, {res}" %6.4f return(RMSEA95) "{txt})"
}
if "`rmsr'`all'"~="" {
cap mat li e(S)
if _rc {
* no matrix posted
return scalar RMSR = .
}
else {
tempname res
mata : st_numscalar("`res'",norm(vech(st_matrix("e(Sigma)") - st_matrix("e(S)"))) )
return scalar RMSR = `res'/sqrt(e(pstar) )
}
di "{txt}RMSR {col 8}= {res}" %6.4f return(RMSR)
}
if "`tli'`all'"~="" {
return scalar TLI = (e(lr_indep)/e(df_indep)-e(lr_u)/e(df_u))/(e(lr_indep)/e(df_indep)-1)
di "{txt}TLI {col 8}= {res}" %6.4f return(TLI)
}
if "`cfi'`all'"~="" {
return scalar CFI = 1 - max( e(lr_u)-e(df_u),0 )/max( e(lr_u)-e(df_u), e(lr_indep)-e(df_indep),0 )
di "{txt}CFI {col 8}= {res}" %6.4f return(CFI)
}
if "`aic'`all'"~="" {
if "`e(wexp)'" == "" & "`e(vcetype)'"~="Robust" return scalar AIC = -2*e(ll) + 2*e(df_m)
else return scalar AIC = .
di "{txt}AIC {col 8}= {res}" %8.3f return(AIC)
}
if "`bic'`all'"~="" {
if "`e(wexp)'" == "" & "`e(vcetype)'"~="Robust" return scalar BIC = -2*e(ll) + e(df_m)*ln( e(N) )
else return scalar BIC = .
di "{txt}BIC {col 8}= {res}" %8.3f return(BIC)
}
end
exit
Version history:
1.0.0 9 Jan 2008 -- Correlate
FitIndices
1.0.1 12 Sep 2008 -- AIC, BIC
1.0.2 Oct 2008 -- bound for Correlate CI
correlated measurement errors
CI for RMSEA

View File

@ -0,0 +1,205 @@
{smcl}
{* *! version 1.1.13 04jun2007}{...}
{cmd:help confa postestimation}{right: ({browse "http://www.stata-journal.com/article.html?article=st0169":SJ9-3: st0169})}
{hline}
{title:Title}
{p2colset 5 29 31 2}{...}
{p2col :{hi:confa postestimation} {hline 2}}Postestimation tools for confa{p_end}
{p2colreset}{...}
{title:Description}
{pstd}The following commands are available after {helpb confa}:{p_end}
{synoptset 17}{...}
{p2coldent :command}description{p_end}
{synoptline}
{synopt :{helpb confa_estat##fit:estat fitindices}}fit indices{p_end}
{synopt :{helpb confa_estat##ic:estat aic}}AIC{p_end}
{synopt :{helpb confa_estat##ic:estat bic}}BIC{p_end}
{synopt :{helpb confa_estat##corr:estat correlate}}correlations of factors and measurement errors{p_end}
{synopt :{helpb confa_estat##predict:predict}}factor scores{p_end}
{synopt :{helpb bollenstine}}Bollen-Stine bootstrap{p_end}
{synoptline}
{p2colreset}{...}
{marker fit}{...}
{title:The estat fitindices command}
{title:Syntax}
{p 8 15 2}
{cmd:estat} {cmdab:fit:indices}
[{cmd:,} {it:options}]
{p2colset 9 27 29 2}{...}
{p2col:{it:options}}fit index{p_end}
{p2line}
{p2col :{opt aic}}Akaike information criterion{p_end}
{p2col :{opt bic}}Schwarz Bayesian information criterion{p_end}
{p2col :{opt cfi}}comparative fit index{p_end}
{p2col :{opt rmsea}}root mean squared error of approximation{p_end}
{p2col :{opt rmsr}}root mean squared residual{p_end}
{p2col :{opt tli}}Tucker-Lewis index{p_end}
{p2col :{opt _all}}all the above indices, the default{p_end}
{p2line}
{p2colreset}{...}
{title:Description}
{pmore}{opt estat }{cmd:fitindices} computes, prints, and saves into
{cmd:r()} results several traditional fit indices.
{title:Options}
{phang2}
{opt aic} requests the Akaike information criterion (AIC).
{phang2}
{opt bic} requests the Schwarz Bayesian information criterion (BIC).
{phang2}
{opt cfi} requests the CFI (Bentler 1990b).
{phang2}
{opt rmsea} requests the RMSEA (Browne and Cudeck 1993).
{phang2}
{opt rmsr} requests the RMSR.
{phang2}
{opt tli} requests the TLI (Tucker and Lewis 1973).
{phang2}
{opt _all} requests all the above indices. This is the default
behavior if no option is specified.
{marker ic}{...}
{title:The estat aic and estat bic commands}
{title:Syntax}
{p 8 15 2}
{cmd:estat} {cmd:aic}
{p 8 15 2}
{cmd:estat} {cmd:aic}
{title:Description}
{pmore}{cmd:estat aic} and {cmd:estat bic} compute the Akaike and Schwarz
Bayesian information criteria, respectively.
{title:The estat correlate command}
{title:Syntax}
{p 8 15 2}
{cmd:estat} {cmdab:corr:elate}
[{cmd:,}
{opt level(#)}
{opt bound}]
{title:Description}
{marker corr}{...}
{pmore}{opt estat} {cmd:correlate} transforms the covariance parameters into
correlations for factor covariances and measurement-error covariances. The
delta method standard errors are given; for correlations close to plus or
minus 1, the confidence intervals may extend beyond the range of admissible
values.{p_end}
{title:Options}
{phang2}{opt level(#)} changes the confidence level for confidence-interval
reporting.{p_end}
{phang2}{cmd:bound} provides an alternative confidence interval based on
Fisher's z transform of the correlation coefficient. It guarantees
that the end points of the interval are in the (-1,1) range, provided the
estimate itself is in this range.
{marker predict}{...}
{title:The predict command}
{title:Syntax}
{p 8 19 2}
{cmd:predict} {dtype} {it:{help newvarlist}} {ifin} [{cmd:,} {it:scoring_method}]
{p2colset 9 27 29 2}{...}
{p2col:{it:scoring_method}}factor scoring method{p_end}
{p2line}
{p2col:{cmdab:reg:ression}}regression, or empirical Bayes, score{p_end}
{p2col:{cmdab:emp:iricalbayes}}alias for {cmd:regression}{p_end}
{p2col:{cmdab:eb:ayes}}alias for {cmd:regression}{p_end}
{p2col:{opt mle}}MLE, or Bartlett score{p_end}
{p2col:{cmdab:bart:lett}}alias for {cmd:mle}{p_end}
{p2line}
{p2colreset}{...}
{title:Description}
{pmore} {cmd:predict} can be used to create factor scores following {cmd:confa}.
The number of variables in {it:newvarlist} must be the same as the number of
factors in the model specification; all factors are predicted at once by the
relevant matrix formula.
{title:Options}
{phang2}
{opt regression}, {opt empiricalbayes}, or {opt ebayes}
requests regression, or empirical Bayes, factor scoring procedure.
{phang2}
{opt mle} or {opt bartlett} requests Bartlett scoring procedure.
{title:Example}
{phang}{cmd:. use hs-cfa}{p_end}
{phang}{cmd:. confa (vis: x1 x2 x3) (text: x4 x5 x6) (math: x7 x8 x9), from(iv) corr(x7:x8)}{p_end}
{phang}{cmd:. estat fit}{p_end}
{phang}{cmd:. estat corr}{p_end}
{phang}{cmd:. estat corr, bound}{p_end}
{phang}{cmd:. predict fa1-fa3, reg}{p_end}
{phang}{cmd:. predict fb1-fb3, bart}{p_end}
{title:References}
{phang}
Bentler, P. M. 1990. Comparative fit indexes in structural models.
{it:Psychological Bulletin} 107: 238-246.
{phang}
Browne, M. W., and R. Cudeck. 1993. Alternative ways of assessing model fit.
In {it:Testing Structural Equation Models}, ed. K. A. Bollen and J. S. Long,
136-162. Newbury Park, CA: Sage.
{phang}
Tucker, L. R., and C. Lewis. 1973. A reliability coefficient for maximum likelihood factor analysis. {it:Psychometrika} 38: 1-10.
{title:Author}
{pstd}Stanislav Kolenikov{p_end}
{pstd}Department of Statistics{p_end}
{pstd}University of Missouri{p_end}
{pstd}Columbia, MO{p_end}
{pstd}kolenikovs@missouri.edu{p_end}
{title:Also see}
{psee}
Article: {it:Stata Journal}, volume 9, number 3: {browse "http://www.stata-journal.com/article.html?article=st0169":st0169}
{psee}Online: {helpb confa}, {helpb bollenstine}{p_end}

View File

@ -0,0 +1,20 @@
*! Log likelihood for confa: linear form; part of confa suite; 16 Oct 2008
program define confa_lf
args lnf $CONFA_args
* $CONFA_args contains the names of the equations, but
* we need the variable names
gettoken lnf allthenames : 0
tempvar lnl
qui g double `lnl' = .
mata: CONFA_NormalLKHDr( "`allthenames'", "`lnl'")
qui replace `lnf' = `lnl'
if $CONFA_loglevel > 3 li `lnl'
end
exit

View File

@ -0,0 +1,20 @@
*! Log likelihood for confa: linear form; part of confa suite; 23 Apr 2009
program define confa_lfm
args lnf $CONFA_args
* $CONFA_args contains the names of the equations, but
* we need the variable names
gettoken lnf allthenames : 0
tempvar lnl
qui g double `lnl' = .
mata: CONFA_NormalLKHDrMiss( "`allthenames'", "`lnl'")
qui replace `lnf' = `lnl'
if $CONFA_loglevel > 3 li `lnl'
end
exit

View File

@ -0,0 +1,89 @@
*! v.1.0 -- prediction commands for confa suite; 16 Oct 2008
program define confa_p
version 10
* set trace on
* di as inp "`0'"
syntax anything [if] [in], [EBayes EMPiricalbayes REGression MLE BARTlett SCores]
* fork to equation scores or factor scores
if "`scores'"!="" EqScores `0'
else if "`ebayes'`empiricalbayes'`regression'`mle'`bartlett'" != "" FScores `0'
else {
di as err "cannot figure those options out"
exit 198
}
end
program define EqScores
* implicitly used by _robust and svy
* typical request: predict stub*, scores
* stub* is not parsed well by newvarlist, have to use anything
syntax anything [if] [in], scores
marksample touse, novarlist
ml score `anything' if `touse'
end
program define FScores
* user requested factor predictions
syntax newvarlist [if] [in], [EBayes EMPiricalbayes REGression MLE BARTlett]
marksample touse, novarlist
if "`ebayes'`empiricalbayes'`regression'`mle'`bartlett'" == "" | ///
( ("`ebayes'`empiricalbayes'`regression'"~="" ) & ("`mle'`bartlett'"~="" ) ) {
di as err "One and only one factor scoring option must be specified"
exit 198
}
else {
local nfactors = rowsof( e(Phi) )
if "`: word count `varlist''" ~= "`nfactors'" {
di as err "Must specify as many new variables as there were factors in confa model"
exit 198
}
* generate new variables
forvalues k=1/`nfactors' {
tempname f`k'
qui gen double `f`k'' = .
local flist `flist' `f`k''
}
if "`ebayes'`empiricalbayes'`regression`" ~= "" {
* Empirical Bayes:
mata : CONFA_P_EB("`flist'", "`e(observed)'", "`touse'")
}
if "`mle'`bartlett'" ~= "" {
* MLE/Bartlett scoring:
mata : CONFA_P_MLE("`flist'", "`e(observed)'", "`touse'")
}
nobreak {
forvalues k=1/`nfactors' {
local type : word `k' of `typlist'
local name : word `k' of `varlist'
qui gen `type' `name' = `f`k'' if `touse'
label var `name' `"`e(factor`k')', `ebayes'`empiricalbayes'`regression'`mle'`bartlett' method"'
}
}
}
end
exit
History:
v.1.0 -- June 12, 2008: Empirical Bayes and MLE scoring

View File

@ -0,0 +1,18 @@
*! comfirmdir Version 1.1 dan.blanchette@duke.edu 22Jan2009
*! Center of Entrepreneurship and Innovation Duke University's Fuqua School of Business
* confirmdir Version 1.1 dan_blanchette@unc.edu 17Jan2008
* research computing, unc-ch
* - made it handle long directory names
** confirmdir Version 1.0 dan_blanchette@unc.edu 05Oct2003
** the carolina population center, unc-ch
program define confirmdir, rclass
version 8
local cwd `"`c(pwd)'"'
quietly capture cd `"`1'"'
local confirmdir=_rc
quietly cd `"`cwd'"'
return local confirmdir `"`confirmdir'"'
end

View File

@ -0,0 +1,54 @@
{smcl}
{* 17Jan2008}{...}
{* 28Oct2004}{...}
{* 19Nov2003}{...}
{hline}
help for {hi:confirmdir} {right:manual: {hi:[R] none}}
{right:dialog: {hi: none} }
{hline}
{title:Confirms if directory exists}
{p 8 17 2}
{cmd:confirmdir} {it:full direcotory name}
{p_end}
{title:Description}
{p 4 4 2}{cmd:confirmdir} is designed for programmers who want to know if a directory exists.
This is just like {cmd:confirm} command when used to confirm a file. If Stata allowed their
{cmd:confirm} command to also have the "dir" option, this program would not have been
written. Stata's {cmd:confirm file} will confirm a directory in UNIX/Linux but not in Windows.
{cmd:confirmdir} works in all operating systems.{p_end}
{title:Examples}
{p 4 8 2}{cmd:. confirmdir "c:\My Favorite Directory\Where I Keep Stuff\"}{p_end}
{p 4 8 2}{cmd:. confirmdir /projects/ethiopia/survey2002/data/}{p_end}
{title:Saved Results}
{p 4 8 2}The {cmd:confirmdir} command saves in {cmd:r()}:{p_end}
{synoptset 20 tabbed}{...}
{p2col 5 20 24 2: Macros}{p_end}
{synopt:{cmd:r(confirmdir)}}return code returned by the {help cd:cd} command.{p_end}
{title:Author}
{p 4 4 2}
Dan Blanchette {break}
Center of Entrepreneurship and Innovation {break}
Duke University's Fuqua School of Business {break}
Dan.Blanchette@Duke.edu{p_end}
{title:Also see}
{p 4 13 2}On-line: {help confirm:confirm} {help cd:cd}, {help tmpdir:tmpdir} (if installed){p_end}

View File

@ -0,0 +1,62 @@
program def convlist, rclass
*! NJC 1.0.0 9 April 2001
version 6.0
gettoken lists 0 : 0, parse(",")
if "`lists'" == "" | "`lists'" == "," {
di in r "no lists specified"
exit 198
}
tokenize "`lists'", parse("\")
if "`4'" != "" | "`2'" != "\" {
di in r "incorrect syntax"
exit 198
}
numlist "`1'"
local list1 `r(numlist)'
local n1 : word count `list1'
numlist "`3'"
local list2 `r(numlist)'
local n2 : word count `list2'
syntax [ , Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
local n3 = `n1' + `n2' - 1
local k = 1
while `k' <= `n3' {
tempname c`k'
scalar `c`k'' = 0
local k = `k' + 1
}
tokenize `list1'
local i = 1
while `i' <= `n1' {
local j = 1
while `j' <= `n2' {
local b`j' : word `j' of `list2'
local k = `i' + `j' - 1
scalar `c`k'' = `c`k'' + (``i'' * `b`j'')
local j = `j' + 1
}
local i = `i' + 1
}
local k = 1
while `k' <= `n3' {
local this = `c`k''
local newlist "`newlist'`this' "
local k = `k' + 1
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,877 @@
*! version 0.8.1 - 2009-07-31
* work around S11 bug in estimates table
* To do:
* 1) add weights
* 2) use returns rather than scalars
// compare count models
capture program drop countfit
program define countfit, rclass
version 9.0
* 2009-07-31
local vwidth = 32 // causes error in Stata 11
local vwidth = 30
syntax varlist [if] [in] ///
, [Generate(string) replace ///
INFlate(string) /// inflation variables
MAXcount(integer 9) ///
NOGraph /// suppress graph of differences in predicted probabilities
NODifferences /// suppress table of differences in predicted probabilities
NOEstimates /// suppress table of estimated parameters
NOFit /// suppress table of fit statistics
Prm Nbreg ZIP ZINb ///
nodash ///
NOConstant ///
NOPRTable ///
note(string) /// note for graph
GRAPHexport(string) /// options passed to graph export
NOIsily ///
]
// trap weights
if "`e(wtype)'"!="" {
di in r "-countfix- does not work with weights."
exit
}
// define variable lists
* inflate is same as rhs if not specified
if "`inflate'"=="" {
** better way to do this?
local nvars : word count `varlist'
foreach i of numlist 2(1)`nvars' {
local var : word `i' of `varlist'
local inflate "`inflate' `var'"
}
}
// set up printing
local f83 "%8.3f"
local f93 "%9.3f"
local f93l "%-9.3f"
local f103 "%10.3f"
local dash ""
if "`nodash'"=="" {
local dash ///
"--------------------------------------------------------------------------"
}
// construct information on models being assessed
* defalult name is CF for variables created
if "`generate'"=="" local set "CF"
local set "`generate'" // label for sets of models
local modellist "" // list of types of models estimated
local mdlnmlist "" // like modellist but begin with generate prefix _
local pltsym "" // symbols used in the plot
local plty "" // y variables to plot
local pltx "" // x variables to plot
local mdlnum = 1 // number associated with each model
* set up information for all models specified
if "`prm'"=="prm" {
local mdlnmlist "`mdlnmlist' `set'PRM"
local modellist "PRM "
local pltx "`set'PRMval"
local plty "`set'PRMdif "
local pltsym "Th "
local pltleg1 "`set'PRM"
local mdlnum = `mdlnum' + 1
}
if "`nbreg'"=="nbreg" {
local mdlnmlist "`mdlnmlist' `set'NBRM"
local modellist "`modellist'NBRM "
local pltx "`set'NBRMval"
local plty "`plty' `set'NBRMdif "
local pltsym "`pltsym' Sh "
local pltleg`mdlnum' "`set'NBRM"
local mdlnum = `mdlnum' + 1
}
if "`zip'"=="zip" {
local mdlnmlist "`mdlnmlist' `set'ZIP"
local modellist "`modellist'ZIP "
local pltx "`set'ZIPval"
local plty "`plty' `set'ZIPdif "
local pltsym "`pltsym' T "
local pltleg`mdlnum' "`set'ZIP"
local mdlnum = `mdlnum' + 1
}
if "`zinb'"=="zinb" {
local mdlnmlist "`mdlnmlist' `set'ZINB"
local modellist "`modellist'ZINB "
local pltx "`set'ZINBval"
local plty "`plty' `set'ZINBdif "
local pltsym "`pltsym' S "
local pltleg`mdlnum' "`set'ZINB"
local mdlnum = `mdlnum' + 1
}
* if none, then all
if "`prm'"=="" & "`nbreg'"=="" & "`zip'"=="" & "`zinb'"=="" {
local pltx "`set'PRMval"
local plty "`set'PRMdif `set'NBRMdif `set'ZIPdif `set'ZINBdif"
local pltsym "Th Sh T S"
local pltleg1 "`set'PRM"
local pltleg2 "`set'NBRM"
local pltleg3 "`set'ZIP"
local pltleg4 "`set'ZINB"
local mdlnmlist "`set'PRM `set'NBRM `set'ZIP `set'ZINB"
local modellist "PRM NBRM ZIP ZINB"
local prm "prm"
local nbreg "nbreg"
local zip "zip"
local zinb "zinb"
* drop? local alphaopt "drop(lnalpha:_cons)"
}
// estimate models
* 2009-07-31
_count_estimate `varlist' `if' `in' [`fweight' `pweight' `iweight'], ///
max(`maxcount') gen(`generate') `replace' inflate(`inflate') ///
`prm' `nbreg' `zip' `zinb' `noconstant' `noisily'
// table of estimates
if "`noestimates'"=="" {
estimates table `mdlnmlist', eform ///
b(%9.3f) t(%7.2f) label varwidth(`vwidth') ///
stats(alpha N ll bic aic)
}
// summary table of mean observed - predicted probabilities
local c1 = 10
local c2 = 25
local c3 = 31
local c4 = 47
if "`nodifferences'"=="" {
di in g "Comparison of Mean Observed and Predicted Count"
di
di in g " " _col(`c1') " Maximum" ///
_col(`c2') " At" ///
_col(`c3') " Mean"
di in g "Model" _col(`c1') " Difference" ///
_col(`c2') "Value" ///
_col(`c3') " |Diff|"
di in g "---------------------------------------------"
}
* loop through models and compute differences
foreach m in `modellist' {
local modelnm "`set'`m'"
* stats on difference
qui sum `modelnm'dif
local toplot "`toplot' `modelnm'dif"
local `m'difsd = r(sd) // sd of obs-pred
local `m'difmin = r(min) // min
local `m'difmax = r(max) // max
* stats on mean absolute difference
capture drop `modelnm'difabs
qui gen `modelnm'absdif = abs(`modelnm'dif)
qui sum `modelnm'absdif
local `m'absdifmax = r(max) // max
scalar absdifsd`m' = r(sd) // sd
scalar absdifmn`m' = r(mean) // mean
* find values for largest difference
tempname difval
qui gen `difval' = (`modelnm'absdif>``m'absdifmax'-.00001)*_n ///
if `modelnm'absdif!=.
qui sum `difval'
local `m'absdifmaxval = r(max) - 1
* sign of deviation
tempname maxdif // 0.8.0
if ``m'absdifmax' == abs(``m'difmin') {
*local maxdif = ``m'difmin' // 0.8.0
scalar `maxdif' = ``m'difmin'
}
if ``m'absdifmax' == abs(``m'difmax') {
*local maxdif = ``m'difmax' // 0.8.0
scalar `maxdif' = ``m'difmax'
}
if "`nodifferences'"=="" {
* print summary of differences from predictions
di in y "`modelnm'" _col(`c1') in y `f83' `maxdif' ///
_col(`c2') `f83' " ``m'absdifmaxval'" ///
_col(`c3') `f83' absdifmn`m'
}
} // loop through models
// TABLES OF OBSERVED AND PREDICTED COUNTS
if "`noprtable'" == "" {
foreach t in `mdlnmlist' {
qui {
sum `t'absdif
local max = r(N) - 1 // largest count
tempname sumde sumpe sumob sumpr // 0.8.0
scalar `sumde' = r(sum) // sum of abs dif
sum `t'pearson
scalar `sumpe' = r(sum) // sum of pearson dif
sum `t'obeq
scalar `sumob' = r(sum) // sum of pr observed
sum `t'preq
scalar `sumpr' = r(sum) // sum of pr predicted
} // qui
local c1 = 7
local c2 = 19
local c3 = 30
local c4 = 40
local c5 = 50
di
di in y "`t'" in g ": Predicted and actual probabilities"
di
di in g "Count" _col(`c1') " Actual" ///
_col(`c2') "Predicted" ///
_col(`c3') " |Diff|" ///
_col(`c4') " Pearson"
local dash ""
if "`nodash'"=="" {
local dash ///
"------------------------------------------------"
}
di in g "`dash'"
foreach c of numlist 0(1)`max' {
local i = `c' + 1
tempname ob pr de pe // 0.8.0
scalar `ob' = `t'obeq[`i']
scalar `pr' = `t'preq[`i']
scalar `de' = abs(`t'dif[`i'])
scalar `pe' = `t'pearson[`i']
di in y "`c'" ///
_col(`c1') `f83' `ob' ///
_col(`c2') `f83' `pr' ///
_col(`c3') `f83' `de' ///
_col(`c4') `f83' `pe'
} // loop through counts
di in g "`dash'"
di in y "Sum" ///
_col(`c1') `f83' `sumob' ///
_col(`c2') `f83' `sumpr' ///
_col(`c3') `f83' `sumde' ///
_col(`c4') `f83' `sumpe'
} // loop through modellist
} // print table of predictions
// PLOT DIFFERENCES
if "`nograph'"=="" {
twoway (connected `plty' `pltx', ///
msymbol(`pltsym') ///
clpat(tight_dot tight_dot tight_dot tight_dot ) ///
ytitle("Observed-Predicted") ///
subtitle("Note: positive deviations show underpredictions.", ///
pos(11) size(small)) ///
ylabel(-.10(.05).10, grid gmax gmin) ///
xlabel(0(1)`maxcount') ///
note(`note') ///
ysize(3.5) xsize(4.5) ///
legend(order(1 "`pltleg1'" 2 "`pltleg2'" ///
3 "`pltleg3'" 4 "`pltleg4'")) ///
)
* export graph
if "`graphexport'" != "" {
qui graph export `graphexport'
}
}
// COMPARE FIT STATISTICS
if "`nofit'"=="" {
local dash ""
if "`nodash'"=="" {
local dash ///
"-------------------------------------------------------------------------"
}
local c1 = 16
local c2 = 32
local c3 = 48
local c4 = 56
local c5 = 62
di _n in g "Tests and Fit Statistics"
di
tempname aicd // 0.8.0
* base PRM
if "`prm'"!="" {
local mdl1 "`set'PRM"
local mdl1type "PRM"
di in y "`mdl1'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl1' ///
_col(`c2') in g "AIC=" in y `f103' aic`mdl1' ///
_col(`c3') in g "Prefer" ///
_col(`c4') in g "Over" ///
_col(`c5') in g "Evidence"
* prm vs nbreg
if "`nbreg'"!="" {
local mdl2 "`set'NBRM"
local mdl2type "NBRM"
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di "`dash'"
* bic
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
* lr test
local lrfav "`mdl1type'"
local lrnofav "`mdl2type'"
if lrnb_prmp < .05 {
local lrfav "`mdl2type'"
local lrnofav "`mdl1type'"
}
di _col(`c1') in g "LRX2=" in y `f93' lrnb_prm ///
_col(`c2') in g "prob=" in y `f93' lrnb_prmp in g ///
_col(`c3') in y "`lrfav'" ///
_col(`c4') in y "`lrnofav'" ///
_col(`c5') in y "p=" `f93l' lrnb_prmp
} // no nbreg vs prm
* prm vs zip
if "`zip'"!="" {
local mdl2 "`set'ZIP"
local mdl2type "ZIP"
* bic
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di in g "`dash'"
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
tempname aicd // 0.8.0
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
* vuong test
local vufav "`mdl1type'"
local vunofav "`mdl2type'"
if vu`mdl2'>0 {
local vufav "`mdl2type'"
local vunofav "`mdl1type'"
}
di _col(`c1') in g "Vuong=" in y `f83' vu`mdl2' ///
_col(`c2') in g "prob=" in y `f93' vu`mdl2'p in g ///
_col(`c3') in y "`vufav'" ///
_col(`c4') in y "`vunofav'" ///
_col(`c5') in y "p=" `f93l' vu`mdl2'p
} // no zip vs prm
* prm vs zinb
if "`zinb'"!="" {
local mdl2 "`set'ZINB"
local mdl2type "ZINB"
* bic
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di in g "`dash'"
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
} // no zinb vs prm
} // if no prm
* base nbreg
if "`nbreg'"!="" {
local mdl1 "`set'NBRM"
local mdl1type "NBRM"
di in g "`dash'"
di in y "`mdl1'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl1' ///
_col(`c2') in g "AIC=" in y `f103' aic`mdl1' ///
_col(`c3') in g "Prefer" ///
_col(`c4') in g "Over" ///
_col(`c5') in g "Evidence"
* nbreg vs zip
if "`zip'"!="" {
local mdl2 "`set'ZIP"
local mdl2type "ZIP"
* bic
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di in g "`dash'"
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
} // no zip vs nbreg
* nbreg vs zinb
if "`zinb'"!="" {
local mdl2 "`set'ZINB"
local mdl2type "ZINB"
* bic
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di in g "`dash'"
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
* vuong test
local vufav "`mdl1type'"
local vunofav "`mdl2type'"
if vu`mdl2'>0 {
local vufav "`mdl2type'"
local vunofav "`mdl1type'"
}
di _col(`c1') in g "Vuong=" in y `f83' vu`mdl2' ///
_col(`c2') in g "prob=" in y `f93' vu`mdl2'p in g ///
_col(`c3') in y "`vufav'" ///
_col(`c4') in y "`vunofav'" ///
_col(`c5') in y "p=" `f93l' vu`mdl2'p
} // no zinb vs nbreg
} // if no nbreg
if "`zip'"!="" {
local mdl1 "`set'ZIP"
local mdl1type "ZIP"
di in g "`dash'"
di in y "`mdl1'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl1' ///
_col(`c2') in g "AIC=" in y `f103' aic`mdl1' ///
_col(`c3') in g "Prefer" ///
_col(`c4') in g "Over" ///
_col(`c5') in g "Evidence"
* zip vs zinb
if "`zinb'"!="" {
local mdl2 "`set'ZINB"
local mdl2type "ZINB"
* bic
global bic1 = bic`mdl1'
global bic2 = bic`mdl2'
_count_bic_dif
local pref = $bicpref
local nopref = $bicnopref
di in g "`dash'"
di in y " " in g " vs " in y "`mdl2'" ///
_col(`c1') in g "BIC=" in y `f103' bic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' $bicdif ///
_col(`c3') in y "`mdl`pref'type'" ///
_col(`c4') in y "`mdl`nopref'type'" ///
_col(`c5') in y "$bicsup"
* aic
scalar `aicd' = aic`mdl1' - aic`mdl2'
local aicfav "`mdl2type'"
local aicnofav "`mdl1type'"
if aic`mdl1' < aic`mdl2' {
local aicfav "`mdl1type'"
local aicnofav "`mdl2type'"
}
di _col(`c1') in g "AIC=" in y `f103' aic`mdl2' ///
_col(`c2') in g "dif=" in y `f103' `aicd' ///
_col(`c3') in y "`aicfav'" ///
_col(`c4') in y "`aicnofav'"
* lr test
local lrfav "`mdl1type'"
local lrnofav "`mdl2type'"
if lrnb_prmp < .05 {
local lrfav "`mdl2type'"
local lrnofav "`mdl1type'"
}
di _col(`c1') in g "LRX2=" in y `f93' lrzip_zinb ///
_col(`c2') in g "prob=" in y `f93' lrzip_zinbp in g ///
_col(`c3') in y "`lrfav'" ///
_col(`c4') in y "`lrnofav'" ///
_col(`c5') in y "p=" `f93l' lrnb_prmp
di in g "`dash'"
} // no zinb vs zip
} // no zip
} // no fit
end
// compute strength of bic difference?
capture program drop _count_bic_dif
program define _count_bic_dif
* compute bin diff from model 1 and model 2
global bicdif = $bic1 - $bic2
tempname bicdabs // 0.8.0
scalar `bicdabs' = abs($bicdif)
* evaluate support based on bic difference
if `bicdabs'~=. {
global bicsup "Very strong"
if `bicdabs'<= .0000000001 {
global bicsup "no"
}
else if `bicdabs' <=2 {
global bicsup "Weak"
}
else if `bicdabs' <=6 {
global bicsup "Positive"
}
else if `bicdabs' <=10 {
global bicsup "Strong"
}
global bicpref = 2
global bicnopref = 1
if $bicdif < 0 {
global bicpref = 1
global bicnopref = 2
}
if `bicdabs'< .0000000001 & `bicdabs'>-.0000000001 {
global bicpref = 0
}
}
end
// ESTIMATE MODELS AND STORE RESULTS
capture program drop _count_estimate
program define _count_estimate, rclass
syntax varlist [if] [in] [, ///
inflate(varlist) ///
MAXcount(integer 9) Generate(string) replace ///
Prm Nbreg ZIP ZINb NOConstant ///
NOIsily ]
// estimate models & create globals with stats
local set "`generate'"
tempname n
local noise ""
if "`noisily'"=="noisily" {
local noise "noisily"
}
// prm
if "`prm'"!="" {
qui {
local modelnm "PRM"
local fullnm "`set'`modelnm'"
`noise' poisson `varlist' `if' `in', `noconstant'
estimates store `fullnm'
scalar ll`fullnm' = e(ll) // log lik
fitstat, bic
scalar bicp`fullnm' = r(bic_p) // bic'
return scalar bicp`fullnm' = r(bic_p) // 0.8.0
scalar aic`fullnm' = r(aic) // aic
scalar x2`fullnm' = r(lrx2) // lrx2 all b=0
scalar x2p`fullnm' = r(lrx2_p)
scalar bic`fullnm' = r(bic) // bic
if "`replace'"=="replace" {
capture drop `fullnm'rate
capture drop `fullnm'prgt
capture drop `fullnm'val
capture drop `fullnm'obeq
capture drop `fullnm'preq
capture drop `fullnm'prle
capture drop `fullnm'oble
capture drop `fullnm'absdif
capture drop `fullnm'dif
capture drop `fullnm'pearson
local i = 0
while `i'<=`maxcount' {
capture drop `fullnm'pr`i'
capture drop `fullnm'cu`i'
local i = `i' + 1
}
}
prcounts `fullnm', plot max(`maxcount') // predicted counts
label var `fullnm'preq "`modelnm' predicted" // predicted Pr(y)
gen `fullnm'dif = `fullnm'obeq - `fullnm'preq // obs - predicted
label var `fullnm'dif "`modelnm' obs - pred"
* 2004-10-29 add CT 5.34
scalar `n' = e(N) // sample size
gen `fullnm'pearson = ///
(`n' * `fullnm'dif * `fullnm'dif) / `fullnm'preq
label var `fullnm'pearson "`modelnm' contribution to Pearson X2"
sum `fullnm'pearson
} // qui
}
// nbreg
if "`nbreg'"!="" {
qui {
local modelnm "NBRM"
local fullnm "`set'`modelnm'"
`noise' nbreg `varlist' `if' `in', `noconstant'
estimates store `fullnm'
scalar ll`fullnm' = e(ll) // log lik
scalar lrnb_prm = e(chi2_c) // lrx2 of nb vs prm
scalar lrnb_prmp = chiprob(1, e(chi2_c))*0.5
fitstat, bic
scalar bicp`fullnm' = r(bic_p) // bic'
scalar aic`fullnm' = r(aic) // aic
scalar x2`fullnm' = r(lrx2) // lrx2 all b=0
scalar x2p`fullnm' = r(lrx2_p)
scalar bic`fullnm' = r(bic) // bic
if "`replace'"=="replace" {
capture drop `fullnm'rate
capture drop `fullnm'prgt
capture drop `fullnm'val
capture drop `fullnm'obeq
capture drop `fullnm'preq
capture drop `fullnm'prle
capture drop `fullnm'oble
capture drop `fullnm'dif
capture drop `fullnm'absdif
capture drop `fullnm'pearson
local i = 0
while `i'<=`maxcount' {
capture drop `fullnm'pr`i'
capture drop `fullnm'cu`i'
local i = `i' + 1
}
}
prcounts `fullnm', plot max(`maxcount') // predicted counts
label var `fullnm'preq "`modelnm' predicted"
gen `fullnm'dif = `fullnm'obeq - `fullnm'preq
label var `fullnm'dif "`modelnm' obs - pred"
* 2004-10-29 add CT 5.34
scalar `n' = e(N) // sample size
gen `fullnm'pearson = ///
(`n' * `fullnm'dif * `fullnm'dif) / `fullnm'preq
label var `fullnm'pearson "`modelnm' contribution to Pearson X2"
sum `fullnm'pearson
} // qui
}
// zip
if "`zip'"!="" {
qui {
local modelnm "ZIP"
local fullnm "`set'`modelnm'"
`noise' zip `varlist' `if' `in', ///
inf(`inflate') vuong `noconstant'
estimates store `fullnm'
scalar vu`fullnm' = e(vuong) // vuong vs prm
scalar vu`fullnm'p = 1-norm(abs(e(vuong)))
scalar ll`fullnm' = e(ll) // log lik
fitstat, bic
scalar bicp`fullnm' = r(bic_p) // bic'
scalar aic`fullnm' = r(aic) // aic
scalar x2`fullnm' = r(lrx2) // lrx2 all b=0
scalar x2p`fullnm' = r(lrx2_p)
scalar bic`fullnm' = r(bic) // bic
if "`replace'"=="replace" {
capture drop `fullnm'rate
capture drop `fullnm'prgt
capture drop `fullnm'val
capture drop `fullnm'obeq
capture drop `fullnm'preq
capture drop `fullnm'prle
capture drop `fullnm'oble
capture drop `fullnm'all0
capture drop `fullnm'dif
capture drop `fullnm'absdif
capture drop `fullnm'pearson
local i = 0
while `i'<=`maxcount' {
capture drop `fullnm'pr`i'
capture drop `fullnm'cu`i'
local i = `i' + 1
}
}
prcounts `fullnm', plot max(`maxcount') // predicted counts
label var `fullnm'preq "`modelnm' predicted"
gen `fullnm'dif = `fullnm'obeq - `fullnm'preq
label var `fullnm'dif "`modelnm' obs - pred"
* 2004-10-29 add CT 5.34
scalar `n' = e(N) // sample size
gen `fullnm'pearson = ///
(`n' * `fullnm'dif * `fullnm'dif) / `fullnm'preq
label var `fullnm'pearson "`modelnm' contribution to Pearson X2"
qui sum `fullnm'pearson
} // qui
}
// zinb
if "`zinb'"!="" {
qui {
local modelnm "ZINB"
local fullnm "`set'`modelnm'"
`noise' zinb `varlist' `if' `in', ///
inf(`inflate') vuong zip `noconstant'
estimates store `fullnm'
scalar vu`fullnm' = e(vuong) // vuong vs nbreg
scalar vu`fullnm'p = 1-norm(abs(e(vuong)))
scalar ll`fullnm' = e(ll) // loglik
scalar lrzip_zinb = e(chi2_cp) // lrx2 zinb vs zip
scalar lrzip_zinbp = chiprob(1, e(chi2_cp))*0.5
fitstat, bic
scalar bicp`fullnm' = r(bic_p) // bic'
scalar aic`fullnm' = r(aic) // aic
scalar x2`fullnm' = r(lrx2) // lrx2 all b=0
scalar x2p`fullnm' = r(lrx2_p)
scalar bic`fullnm' = r(bic) // bic
if "`replace'"=="replace" {
capture drop `fullnm'rate
capture drop `fullnm'prgt
capture drop `fullnm'val
capture drop `fullnm'obeq
capture drop `fullnm'preq
capture drop `fullnm'prle
capture drop `fullnm'oble
capture drop `fullnm'absdif
capture drop `fullnm'dif
capture drop `fullnm'all0
capture drop `fullnm'pearson
local i = 0
while `i'<=`maxcount' {
capture drop `fullnm'pr`i'
capture drop `fullnm'cu`i'
local i = `i' + 1
}
}
prcounts `fullnm', plot max(`maxcount') // predicted counts
label var `fullnm'preq "`modelnm' predicted"
gen `fullnm'dif = `fullnm'obeq - `fullnm'preq
label var `fullnm'dif "`modelnm' obs - pred"
* 2004-10-29 add CT 5.34
scalar `n' = e(N) // sample size
gen `fullnm'pearson = ///
(`n' * `fullnm'dif * `fullnm'dif) / `fullnm'preq
label var `fullnm'pearson "`modelnm' contribution to Pearson X2"
qui sum `fullnm'pearson
} // qui
}
end // _count_estimate
exit
* version 0.8.0 fix pr bug
* version 0.2.1 13Apr2005 add replace; trap weights
* version 0.2.0 27Feb2005 first documented version

View File

@ -0,0 +1,86 @@
.-
help for ^countfit^ - 03Nov2005
.-
Compare fit of alternive count models
-------------------------------------
^countfit^ varlist [if] [in]^,^ [^inf^late^(^varlist2^)^ ^noc^onstant ^p^rm ^n^breg
^zip^ ^zin^b ^g^enerate^(^prefix^)^ ^replace^ ^note(^string^)^
^graph^export^(^filename[^, replace^]^)^ ^nog^raph ^nod^ifferences ^noprt^able
^noe^stimates ^nof^it ^nodash^ ^max^count(#) ^noi^sily]
Description
-----------
^countfit^ compares the fit of the Poissin, negative binomial, zero-inflated
Poisson and zero-inflated negative binomial models, generateing a table of
estimates, a table of differences between observed and average estimated
probabilities for each count, a graph of these differences, and various tests
and measures used to compare the fit of count models.
Specifying the model
--------------------
Immediately afer the command name ^countfit^ you specify the dependent and
independent variables as you would with ^poisson^ or the other models. For
zero-inflated models, the ^inflate^ option is used in the same was as in the
^zip^ and ^zinb^ commands. ^noconstant^ can be used to exclude the constant
term.
Options to select the models to fit
-----------------------------------
By default, ^poisson^, ^nbreg^, ^zip^ and ^zinb^ are estimated. If you
only want some of these models, specify the modesl you want with:
^prm^ - include ^poisson^
^nbreg^ - include ^nbreg^
^zip^ - include ^poisson^
^zinb^ - include ^poisson^
Options to label results
------------------------
^generate()^ is up to five letters to name the variables that are created
and to label the models in the output. This name is placed in front
of the type of model (e.g., namePRM). This is optional but will help
keep track of results from multiple specifications of models.
^replace^ will replace variables created with the ^generate^ option if they
already exist.
^note()^ is a label added to the graph that is saved.
^graphexport()^ contains options to be sent to the ^graph export^ command to
export the graph that is create. For example, ^graph(mdl.emf, replace)^
would save the file mdl.emf and replace it if it exists. If this option
is not included, the graph is not saved.
Options controlling what is printed
-----------------------------------
^noisily^ shows the output from the estimation commands called by ^countfit^.
^nograph^ suppress graph of differences from observed counts.
^nodifferences^ suppress table of differences from observed counts.
^noprtable^ suppresses table of predictions for each model.
^noestimates^ suppress table of estimated coefficients.
^nofit^ suppress table of fit statistics and test of fit
^nodash^ suppress dashed lines between measures of fit
^maxcount()^ number of counts to evaluate.
^noisily^ includes output from Stata estimation commands; without this option
the results are only shown in the ^estimates table^ output.
Notes
-----
^countfit^ is based on the results from the Stata models described above,
the predictions computed by ^prcounts^, and the fit measures computed by
^fitstat^.
Examples
--------
^. use couart2^
^. countfit art fem mar kid5 phd ment, inf(ment fem) nbreg zinb nograph^
.-
Author: J. Scott Long and Jeremy Freese
www.indiana.edu/~jslsoc/spost.htm
spostsup@@indiana.edu

View File

@ -0,0 +1,29 @@
program def cseplist, rclass
*! NJC 1.0.0 31 August 2000
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , [ Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tokenize `list'
local n : word count `list'
local i = 1
while `i' < `n' {
local newlist "`newlist'``i'',"
local i = `i' + 1
}
local newlist "`newlist'``n''"
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,54 @@
program def cvarlist, rclass
*! NJC 1.0.0 18 August 2000
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , [ new NUmeric String Noisily Global(str)]
local nopts : word count `new' `numeric' `string'
if `nopts' > 1 {
di in r "use just one of options new, numeric, string"
exit 198
}
if "`global'" != "" {
tokenize `global'
args global1 global2 global3
if "`global3'" != "" {
di in r "global( ) must contain at most 2 names"
exit 198
}
if (length("`global1'") > 8) | (length("`global2'") > 8) {
di in r "global name must be <=8 characters"
exit 198
}
}
tokenize `list'
local nwords : word count `list'
local i = 1
while `i' <= `nwords' {
capture confirm `new' `numeric' `string' variable ``i''
if _rc == 0 { local newlist "`newlist'``i'' " }
else local badlist "`badlist'``i'' "
local i = `i' + 1
}
if "`noisily'" != "" {
if "`newlist'" != "" {
di in g "list 1: " in y "`newlist'"
}
if "`badlist'" != "" {
di in g "list 2: " in y "`badlist'"
}
}
if "`global1'" != "" { global `global1' "`newlist'" }
if "`global2'" != "" { global `global2' "`badlist'" }
return local list1 `newlist'
return local list2 `badlist'
end

View File

@ -0,0 +1,2 @@
.h listutil