File restructure #1

This commit is contained in:
2024-04-19 16:46:33 +02:00
parent a8a94ddc10
commit ecac05b9c4
703 changed files with 10 additions and 272568 deletions

View File

@ -1,32 +0,0 @@
*! version 2.0, October 2002 (SJ4-1: st0057)
program define hermite
version 7.0
tokenize `0'
local n "`1'"
local x "`2'"
local w "`3'"
local last = `n' + 2
tempvar p
tempname i
qui gen double `p' = .
scalar `i' = 1
while `i' <= 10 {
qui replace `p' = 0 in 1
qui replace `p' = _pi^(-0.25) in 2
qui replace `p' = `x'*sqrt(2/(_n-2))*`p'[_n-1] /*
*/ - sqrt((_n-3)/(_n-2))*`p'[_n-2] in 3/`last'
scalar `w' = sqrt(2*`n')*`p'[`last'-1]
scalar `x' = `x' - `p'[`last']/`w'
if abs(`p'[`last']/`w') < 3e-14 {
scalar `w' = 2/(`w'*`w')
exit
}
scalar `i' = `i' + 1
}
di in red "hermite did not converge"
exit 499
end
exit

View File

@ -1,682 +0,0 @@
*! Date : 3 Sep 2007
*! Version : 1.72
*! Authors : Adrian Mander/David Clayton
*! Email : adrian.mander@mrc-hnr.cam.ac.uk
*! Description : Hotdeck imputation
/*
25/07/06 version 1.67 - removed some = and investigated the set seed problem
16/3/07 version 1.68 - spruced up the displays
13/6/07 version 1.69 - Made sure set seed does what the masses want although it is truly the wrong thing to do
27/7/07 version 1.70 - The warnings were not strong enough about the strata information.. in fact I think when there is
no data to impute it still tries to combine results. Also a slight error in the output.
15/8/07 version 1.71 - Corrected the confidence intervals.. the tail probability was wrong
ALSO checked the calculation of T B Ubar Qbar by hand!
3/9/07 version 1.72 - Corrected the % lines missing to % lines complete
*/
program define hotdeck
version 9.0
syntax [varlist] [if] [in] [using/], [BY(varlist) IMPute(integer 1) STORE GENerate(string) COMmand(string) PARMS(string asis) REPlace NOISE KEEP(varlist) SEED(string) QUIET INFILES(string) ]
tokenize "`varlist'"
local z "`1'"
preserve
if "`if'"~="" qui keep `if'
/* Check the seed option */
if "`seed'"=="1" local seed 2
if "`seed'"=="" {
local noseed "noseed"
local seed 1
}
confirm number `seed'
/* To generate a seed from the time note need to truncate the seed to be below 2^31-1 */
if `seed'==1 {
local time "$S_TIME"
local date "$S_DATE"
tokenize "`time'", parse(":")
local seed1 "`1'`3'`5'"
tokenize "`date'", parse(" ")
local dat "`1'`2'`3'"
local dat1 = date("`dat'","dmy")
local seed1 "`seed1'`dat1'"
di
local l_seed "2^31-1"
local seed1 = mod(`seed1',`l_seed')
set seed `seed1'
local seed "`seed1'" /* Added to sort out the seed */
}
di "{txt}Seed is set as {res} `seed'"
estimates clear
tempfile olddata
tempvar touse
mark `touse' `if' `in'
markout `touse' `by', strok
di in green "DELETING all matrices...."
mat drop _all
/* Display the patterns of missingness.. only on observed data not imputed */
if "`infiles'"=="" {
if "`by'"=="" _misspat `varlist' `if' `in'
else _misspat `varlist' `if' `in', by(`by')
local nfill=r(nmiss)
}
qui save "`olddata'"
/* Count the missing data for displaying later */
if "`infiles'"=="" {
global allpat = r(allpat)
qui count if `touse'
local miss = (r(N)-`nfill')/r(N)
}
/* Make sure the users are using the right syntax.. lots of checks here to make sure*/
if "`command'"=="" {
di in red "WARNING: When the <command> option is not selected "
di in red "then no analysis is performed on the imputed datasets"
di
if "`store'"=="" {
di "ALSO STORE isnt selected so hotdeck will appear to do nothing"
exit(198)
}
}
if `impute'<1 {
di in red "The number of imputations must be more than 0 not `impute'"
exit(198)
}
if `impute'==1 & "`infiles'"=="" {
if "`store'"=="" | "`command'"~="" {
di in red "If one imputation is made then command option should NOT be used"
di in red "AND the store option must be specified"
exit(198)
}
}
if "`using'"~="" {
if "`store'"=="" {
di in red "To save datasets you must specify the STORE option"
exit(198)
}
}
if "`keep'"~="" {
if "`store'"=="" {
di in red "If you use the KEEP option you must specify the STORE option"
exit(198)
}
}
if "`noise'"~="" & "`command'"=="" {
di in red "When specifying noise you must also specify the command option"
exit(198)
}
if "`command'"~="" {
if `"`parms'"'==`""' {
di in red "To obtain any output from the command option you must also specify "
di in red "the parameters of interest using the parms() option"
exit(198)
}
}
/************************************************
* Loop over the number of imputed data sets
* required
************************************************/
if "`seed'"~="1" set seed `seed'
/* This is the if statement that allows the input of imputed datafiles */
if "`infiles'"~="" {
local i 1
tokenize "`infiles'"
while "`1'"~="" {
use "`1'",replace
mac shift 1
if "`command'"=="" {
di in red "You must use the command option when using INFILES"
exit(198)
}
if "`noise'"~="" `command'
else qui `command' /* Do the analysis */
_parms, parms(`"`parms'"') command(`command') iter(`i') /* Select Parameters of interest*/
local i=`i'+1
}
local impute=`i'-1
}
/* If there are no INFILES .. then just have to create the imputed datasets and analyse them */
else {
forv i =1/`impute' {
/* Use original dataset */
use "`olddata'",replace
qui keep if `touse'
/* Impute values */
if "`by'"~="" _hotdeck `varlist', by(`by') i((`seed'+`i')) `noseed'
else _hotdeck `varlist', i((`seed'+`i')) `noseed'
/* Save imputed datasets */
if "`store'"~="" {
if "`using'"=="" local using "imp"
if "`keep'"=="" {
qui keep `varlist' `by'
qui save `using'`i',replace
}
else {
mkvlist `varlist' `by', vlist(`keep')
qui keep `r(vlist)'
qui save `using'`i',replace
}
}
if "`command'"~="" {
/* Do the analysis */
if "`noise'"~="" `command'
else qui `command'
/* Select Parameters of interest*/
_parms, parms(`"`parms'"') command(`command') iter(`i')
}
}
}
/********************************************************
* Loop to calculate the estimates needed
*
* First get the dimensions of the parameter matrices
********************************************************/
if "`command'"~="" {
local dim= rowsof(impV1)
mat Qbar = J(1,`dim',0)
mat Ubar = J(`dim',`dim',0)
/* calc the averaging factor */
local inv = 1/`impute'
/* calc the average coef and variance qbar and ubar */
forv i=1/`impute' {
mat Qbar= `inv'*impb`i'+ Qbar
mat Ubar= `inv'*impV`i'+ Ubar
}
/* calc between variances */
mat B=J(`dim',`dim',0)
local inv1 = 1/(`impute'-1)
forv i=1/`impute' {
mat B= B + `inv1'*(impb`i' - Qbar)'*(impb`i' - Qbar)
}
/* Calc total variance */
mat T = Ubar+(1+1/`impute')*B
cap mat tempmt=B*inv(Ubar)
if _rc==504 {
di as error "WARNING: Trying to invert variance matrix with zero elements?"
local ter = rowsof(Ubar)
mat temp = J(`ter',1,1)
mat temp2 = Ubar*temp
local tei 1
local names: colfullnames impb1
matrix rownames temp2 = `names'
while `tei'<=`ter' {
if temp2[`tei',1]==0 {
local var:word `tei' of "`names'"
di as txt "Variance for covariate `tei' is 0 !!"
}
local tei=`tei'+1
}
mat tempmt=B*inv(Ubar)
}
local trace=trace(tempmt)
/* Everything hunky dorey until now... a strange 1 appears.. */
local r1 = 1-((1+1/`impute')*`trace'/`dim')
/************************************************
* Just sorting out the matrix names
************************************************/
local names: rowfullnames impb1
matrix rownames Qbar = `names'
local names: colfullnames impb1
matrix colnames Qbar = `names'
local names: rowfullnames impV1
matrix rownames T = `names'
matrix rownames B = `names'
matrix rownames Ubar = `names'
local names: colfullnames impV1
matrix colnames T = `names'
matrix colnames B = `names'
matrix colnames Ubar = `names'
mat Tsurr= `r1'*T
mat D = Qbar*inv(Tsurr)*Qbar'
local D1 = D[1,1]/`dim'
local t=`dim'*(`impute'-1)
local v1= 4+(`t'-4)*(1+(1-2/`t')*1/`r1')^2
local ftest= fprob(`dim',`v1',`D1')
/********************************************************
* The next will output the main results in Stata style
* if the normal approximation is good then you could
* use the matrix post command
********************************************************/
if "`quiet'"=="" {
if `r1'<0 {
di in red "WARNING: between se larger than within se in one or more "
di in red "parameters invalidating the global F test"
}
if `t'<4 {
di in red "WARNING: t less than 4 invalid global test "
di in red "increase parameters OR imputations"
}
}
di
di in gr _col(1) "Number of Obs.", _col(45) "= ", as res %5.0f _N
di in gr _col(1) "No. of Imputations", _col(45) "= ", as res %5.0f `impute'
if "`infiles'"=="" di in gr _col(1) "% Lines of Complete Data", _col(45) "= ", as res %10.4f `miss'*100, as text "%"
di in gr _col(1) "F(",%6.3f `v1',",`dim')", _col(45) "= ", as res %10.4f `D1'
di in gr _col(1) "Prob > F " , _col(45) "= ", as res %10.4f `ftest'
di "{text}{dup 14:{c -}}{c TT}{dup 68:{c -}}"
local names: colfullnames impb1
/* Transform the double quoted names to a macrolist */
di in gr _continue "Variable" _col(15) "{c |}",_col(17) "Average", _col(28) "Between", _col(38) "Within", _col(48) "Total", _col(58) "df", _col(68) "t", _col(77) "p-value"
di
di in gr _continue _col(15) "{c |}", _col(17) "Coef.",_col(28) "Imp. SE", _col(38) "Imp. SE", _col(47) " SE", _col(58) "", _col(68) "", _col(74) ""
di
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
foreach name of local names {
di
mat qhat=Qbar[1,"`name'"]
mat b=B["`name'","`name'"]
mat u=Ubar["`name'","`name'"]
mat t=T["`name'","`name'"]
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
local ttest= qhat[1,1]/sqrt(t[1,1])
di as text _continue "`name'",_col(15) "{c |}", as res _col(10) %7.4f qhat[1,1],_col(17) %9.3f sqrt(b[1,1]), _col(25) %9.3f sqrt(u[1,1]), _col(34) %9.3f sqrt(t[1,1]), _col(44) %9.1f `df', _col(53) %9.3f `ttest', _col(62) %9.3f tprob(`df',`ttest')
}
di
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
di
local name : word 1 of `names'
local i 1
di in gr _continue "Variable", _col(15) "{c |}", _col(17) "[$S_level% Conf. Interval]"
di
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
while "`name'"~="" {
di
mat qhat=Qbar[1,"`name'"]
mat b=B["`name'","`name'"]
mat u=Ubar["`name'","`name'"]
mat t=T["`name'","`name'"]
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
local ttest= qhat[1,1]/sqrt(t[1,1])
local prob = 1-((100-$S_level)/2)/100
local tvalue=abs( invttail(`df',`prob') )
/* The t-distribution function could be very out here.... due to a version 6 bug!
version 6 : local tvalue = invt(`df',`prob')
THIS HAS BEEN REMOVED 15Aug07 as the probability is calculated on adding the two tails AND it should have been
a single tailed value!!!
*/
local left = qhat[1,1]-`tvalue'*sqrt(t[1,1])
local right = qhat[1,1]+`tvalue'*sqrt(t[1,1])
di as text _continue "`name'",_col(15) "{c |}", as res %9.4f `left', %9.4f `right'
local i=`i'+1
local name : word `i' of `names'
}
di ""
di "{text}{dup 14:{c -}}{c BT}{dup 68:{c -}}"
} /* end of command if statement */
restore
end
/****************************************************
* The approximate Bayesian Bootstrap hotdecking
****************************************************/
program define _hotdeck
version 9.0
syntax [varlist] [using], [BY(string) Iseed(string) NOSEED]
local iseed =`iseed'
tokenize "`varlist'"
local z "ipattern"
if "`by'"!="" confirm ex var `by'
tempvar nobs bstrp b2strp temp temp2
local nold = _N
local nnew = _N
/* This is the place of difference for a set seed command ..*/
if "`noseed'"=="" set seed `iseed'
qui sort `by' `z' `varlist'
qui gen long `nobs' = (`z'!=.)
if "`by'"=="" {
qui replace `nobs' = sum(`nobs')
qui replace `nobs' = `nobs'[_N]
qui gen long `bstrp' = int(uniform()*`nobs'+1)
qui gen long `b2strp' = int(uniform()*`nobs'+1)
qui gen long `temp' = `bstrp'[`b2strp']
qui replace `bstrp' = `temp'
qui replace `bstrp' = _n if _n<=`nobs'
qui tokenize "`varlist'"
while "`1'"~="" {
qui gen `temp2' = `1'[`bstrp']
qui replace `1' = `temp2'
qui drop `temp2'
qui mac shift 1
}
}
else {
qui by `by': replace `nobs' = sum(`nobs')
qui by `by': replace `nobs' = `nobs'[_N]
qui by `by': gen long `bstrp' = int(uniform()*`nobs'+1)
qui by `by': gen long `b2strp' = int(uniform()*`nobs'+1)
qui by `by': gen long `temp' = `bstrp'[`b2strp']
qui by `by': replace `bstrp' = `temp'
qui by `by': replace `bstrp' = _n if _n<=`nobs'
qui tokenize "`varlist'"
while "`1'"~="" {
qui by `by': gen `temp2' = `1'[`bstrp']
qui by `by': replace `1' = `temp2'
qui mac shift 1
qui drop `temp2'
}
}
end
/*******************************************************************
* Get the parameters or a subset of them from the
* model and the subset
* the covariance variance matrix as well
* Note that this section can also handle non-regression commands
* and macro lists
*******************************************************************/
program define _parms
syntax [varlist], [PARMS(string asis) ITER(integer 1) COMMAND(string) GENerate(string) REPlace]
/*
previously accepted a varlist in the parms string.. too many difficulties with multiple equation models
so this code below is being dropped
* local 0 "`parms'"
* while "`parms'"~="" {
* gettoken 0 parms: parms , parse(" ,")
* cap syntax [varlist]
* if _rc~=0 {
* if "`0'"=="_cons" local vlist "`vlist' `0'" <-- just extract _cons
* else local plist "`plist' `0'"
* }
* else local vlist "`vlist' `varlist'"
* }
*/
foreach item in `"`parms'"' {
local vlist `"`vlist' `item'"'
}
/* if results were not part of a regression command */
if "`e(cmd)'"=="" {
local names ""
if `iter'==1 di in red "Using Non Regression Parameters and Command"
tokenize "`plist' `vlist'"
local np=0
while "`1'"~="" {
local names1 ="`names1' `1'"
local names2 ="`names2' `2'"
if "`2'"=="" {
di in red "Must supply variance estimate of `1'"
exit(302)
}
mac shift 2
local `np++'
}
mat impb`iter' = J(1,`np',0)
mat impV`iter' = J(`np',`np',0)
tokenize "`plist' `vlist'"
local np 1
while "`1'"~="" {
if "$`1'"=="" & "``1''"=="" {
di in red "Global = $`1' Local = ``1''"
di in red "Global/local macro `1' is missing "
exit(198)
}
if "``1''"~="" mat impb`iter'[1,`np'] = ``1''
if "$`1'"~="" & "``1''"=="" mat impb`iter'[1,`np'] = $`1'
if "$`2'"=="" & "``2''"=="" {
di in red "Global = $`2' Local = ``2''"
di in red "Global/local macro `2' is missing "
exit(198)
}
if "``2''"~="" mat impV`iter'[`np',`np'] = ``2''
if "$`2'"~="" & "``2''"=="" mat impV`iter'[`np',`np'] = $`2'
local np=`np'+1
mac shift 2
}
matrix colnames impb`iter'=`names1'
matrix colnames impV`iter'=`names1'
matrix rownames impV`iter'=`names1'
}
/* The regression-type output part */
else {
matrix myb = e(b)
matrix myV = e(V)
/* This next statement is to handle double quoted strings.. otherwise parms will contain one item in a macro */
local teparms :di `parms'
local first 1
foreach item of local teparms {
if `first'==1 {
cap mat impb`iter' = myb[.,"`item'"]
if _rc==111 {
di as error `" Attempted to extract `item' from e(b) "'
mat list e(b)
di as error "Check the matrix of estimates and only include column names in the parameters NOT variable names"
exit(111)
}
mat impVt`iter'= myV[.,"`item'"]
}
else {
mat temp = myb[.,"`item'"]
mat impb`iter'= impb`iter' , temp
mat drop temp
mat temp=myV[.,"`item'"]
mat impVt`iter'= impVt`iter' , temp
mat drop temp
}
local `first++'
}
local first 1
foreach item of local teparms {
if `first'==1 mat impV`iter' = impVt`iter'["`item'",.]
else {
mat temp=impVt`iter'["`item'",.]
mat impV`iter'= impV`iter' \ temp
}
local `first++'
}
}
end
/*************************************************
* Look at the missing pattern in the varlist
*************************************************/
program define _misspat,rclass
syntax varlist [if] [in] , [BY(string) ]
tokenize "`varlist'"
tempvar touse2 tempid
qui gen long `tempid'=_n
mark `touse2' `if' `in'
markout `touse2'
qui gen str50 pattern=""
local allstr ""
while "`1'"~="" {
qui replace pattern = cond(`1'==.,pattern+"*",pattern+"-") if `touse2'
local allstr="-`allstr'"
mac shift 1
}
qui compress pattern
sort pattern
lab var pattern "Missing pattern"
di
di in green "Missing Patterns"
di "{text}{dup 16:{c -}}"
di
di in green "Table of the Missing data patterns "
di in green " * signifies missing and - is not missing"
di
di "Varlist order: `varlist'"
tab pattern if `touse2'
local n=r(N)
qui count if pattern=="`allstr'" & `touse2'
if r(N)==`n' {
di "There is no missing data in the varlist"
exit(198)
}
return scalar nmiss = `n'-r(N)
return local allpat = "`allstr'"
qui gen ipattern=cond(pattern=="`allstr'",1,.) if `touse2'
/*****************************************
* Calculate stratum missing numbers
*****************************************/
if "`by'"~="" {
di
di "{text}STRATUM information"
di "{text}{dup 19:{c -}}"
di
di "{text} Listing the number observed (No_obs) and "
di in green "the number missing (No_miss) in each stratum"
tempvar cnt mcnt
qui sort `by'
qui by `by':gen `cnt'=sum(ipattern)
qui by `by':gen `mcnt'=sum(ipattern==.)
qui by `by': replace `cnt'=cond( _n==_N,`cnt',.)
qui by `by': replace `mcnt'=cond( _n==_N,`mcnt',.)
rename `cnt' No_obs
rename `mcnt' No_miss
l `by' No_obs No_miss if No_obs~=., noobs
di
qui count if No_obs==0
if `r(N)'>0 {
di in red "WARNING: `r(N)' strata with NO complete records"
di
di "{error}This implies that within these strata the missing data will NOT be replaced "
di "and hence will give the wrong answers in the analysis because the analysis"
di "command will do casewise deletion"
}
qui count if No_obs==1
if `r(N)'>0 di in blue "Note: `r(N)' strata with only 1 complete record"
qui count if (No_obs>1 & No_obs<6)
if `r(N)'>0 di in blue "Note: `r(N)' strata with 2-5 complete records"
di
}
/* I thought that the following bit of command might've sorted out the seed problem :( but I don't think so */
qui sort `tempid'
end
/*************************************************
* Expand stata syntax
*************************************************/
program define mkvlist, rclass
syntax varlist, VLIST(string)
local o_vlist "`varlist'"
local keep "`vlist'"
local 0 "`keep'"
while "`keep'"~="" {
gettoken 0 keep: keep , parse(" ,")
cap syntax [varlist]
if _rc~=0 {
if "`0'"=="_cons" local vlist "`vlist' `0'"
else local plist "`plist' `0'"
}
else local vlist "`vlist' `varlist'"
}
return local vlist "`o_vlist' `vlist'"
end

View File

@ -1,220 +0,0 @@
{smcl}
{* 15 Aug 2007}{...}
{cmd:help hotdeck}
{hline}
{title:Title}
{hi:Impute missing values using the hotdeck method}
{title:Syntax}
{p 8 27}
{cmdab:hotdeck}
[{it:varlist}] [{cmd:using}] [{hi:if}{it: exp}] [{hi:in}{it: exp}]
,
[
{cmdab:by}{cmd:(}{it:varlist}{cmd:)}
{cmdab:store}
{cmdab:imp:ute}{cmd:(}{it:varlist}{cmd:)}
{cmdab:noise}
{cmdab:keep}{cmd:(}{it:varlist}{cmd:)}
{cmdab:com:mand}{cmd:(}{it:command}{cmd:)}
{cmdab:parms}{cmd:(}{it:varlist}{cmd:)}
{cmdab:seed}{cmd:(}{it:#}{cmd:)}
{cmdab:infiles}{cmd:(}{it:filename filename ...}{cmd:)}
]
{p}
{title:Description}
{pstd}
{hi:Hotdeck} will tabulate the missing data patterns within the {help varlist}.
A row of data with missing values in any of the variables in the {hi:varlist}
is defined as a `missing line' of data, similarly a `complete line' is one where all the
variables in the {hi:varlist} contain data. The {hi:hotdeck} procedure
replaces the {hi:varlist} variables in the `missing lines' with the
corresponding values in the `complete lines'.
{hi:Hotdeck} should be used several times within a multiple imputation
sequence since missing data
are imputed stochastically rather than deterministically. The {hi:nmiss} missing
lines in each stratum of the data described by the `by' option are replaced
by lines sampled from the {hi:nobs} complete lines in the same stratum. The
approximate Bayesian bootstrap method of Rubin and Schenker(1986) is used;
first a bootstrap sample of {hi:nobs} lines are sampled with replacement from
the complete lines, and the {hi:nmiss} missing lines are sampled at random
(again with replacement) from this bootstrap sample.
{pstd}
A major assumption with the hotdeck procedure is
that the missing data are either missing completely at random (MCAR) or is
missing at random (MAR), the probability that a line is missing
varying only with respect to the categorical
variables specified in the `by' option.
{pstd}
If a dataset contains many variables with missing values then
it is possible that many of the rows of data will contain at
least one missing value. The {hi:hotdeck} procedure will not work
very well in such circumstances.
There are more
elaborate methods that {bf:only} replace missing values, rather than the whole row,
for imputed values.
These multivariate multiple imputation methods are discussed by Schafer(1997).
{pstd}
A critical point is that all variables that are used in the analysis should be included in
the variable list. This is particularly true for variables that have missing data!
Variables that predict missingness should be included in the
by option so missing data is imputed within strata.
{title:Latest Version}
{pstd}
The latest version is always kept on the SSC website. To install the latest version click
on the following link
{phang}
{stata ssc install hotdeck, replace}.
{title:Options}
{phang}
{cmdab:using} specifies the root of the imputed datasets filenames. The default is
"imp" and hence the datasets will be saved as imp1.dta, imp2.dta, ....
{phang}
{cmdab:by}{cmd:(}{it:varlist}{cmd:)} specifies categorical variables defining strata within which
the imputation is to be carried out. Missing values will be replaced by complete values only within the
strata. If within a strata there are no complete records then no data will be imputed and will lead
to the wrong answers. Make sure there are a reasonable number of complete records per strata.
{phang}
{cmdab:store} specifies whether the imputed datasets are saved to disk.
{phang}
{cmdab:imp:ute}{cmd:(}{it:varlist}{cmd:)} specifies the number of imputed datasets to generate. The number
needed varies according to the percentage missing and the type of data, but
generally 5 is sufficient.
{phang}
{cmdab:noise} specifies whether the individual analyses, from the {hi:command()} option,
are displayed.
{phang}
{cmdab:keep}{cmd:(}{it:varlist}{cmd:)} specifies the variables saved in the imputed datasets
in addition to the imputed variables and the by list. By default the imputed
variables and the by list are always saved.
{phang}
{cmdab:com:mand}{cmd:(}{it:command}{cmd:)} specifies the analysis performed on every imputed dataset.
{phang}
{cmdab:parms}{cmd:(}{it:varlist}{cmd:)} specifies the parameters of interest from the
analysis. If the {hi:command} is a regression command then the parameter list can
include a subset of the variables specified in the regression command.The
final output consists of the combined estimates of these parameters.
For non-standard commands that are "regression" commands the {hi:parms()} option
looks at the estimation matrix e(b) and requires the column names to identify
the coefficients of interest.
{phang}
{cmdab:seed}{cmd:(}{it:#}{cmd:)} specifies the random number generator seed. When using the {hi:seed} option
the hotdeck command must be used in the correct way. The key point is that ALL variables in the analysis command
must be in the variable list, this ensures that the correlations between the variables are maintained post
imputation.
{phang}
{cmdab:infiles}{cmd:(}{it:filename filename ...}{cmd:)} specifies a list of files that have missing
values replaced by imputed values. This is convenient when the user has
several imputed datasets and wants to analyse them and combine the results.
{title:Examples}
Impute values for y in sex/age groups.
{inp:hotdeck y, by(sex age) }
Additionally to store the imputed datasets above as {hi:imp1.dta} and {hi:imp2.dta}.
{inp:hotdeck y using imp,store by(sex age) impute(2)}
{p 0 0}
Hotdeck can also use the stored imputed datafiles hi:imp1.dta} and {hi:imp2.dta}
and carry out the combined analysis. This analysis is displayed for the coefficient
of {hi:x} and constant term {hi:_cons}.
{inp:hotdeck y using imp, command(logit y x) parms(x _cons) infiles(imp1 imp2)}
{p 0 0}
Do not save imputed datasets to disk but carry out a logistic regression on the imputed
datasets and display the coefficients for {hi:x} and the constant term {hi:_cons} of the model.
{inp:hotdeck y x, by(sex age) command(logit y x) parms(x _cons) impute(5)}
{title:Example - Multiple Equation Model}
{p 0 0}
Multiple equation models require more complicated {hi:parms()} statements.
The example used can be applied to all multiple equation models. The only complication
is that the name of the coefficients are different.
For the following command
{inp:xtreg kgh f1, mle}
Then inspect the matrix of coefficients
{inp:mat list e(b)}
e(b)[1,4]
kgh: kgh: sigma_u: sigma_e:
f1 _cons _cons _cons
y1 -1.6751401 77.792948 0 16.730843
Then the following command will do an imputation and analysis for the single parameter.
{inp:hotdeck kgh, by(ethn) command(xtreg kgh f1, mle) parms(kgh:f1) impute(5)}
{title:Example - mlogit}
Use this web dataset for STATA release 9.
{stata "use http://www.stata-press.com/data/r9/sysdsn3.dta"}
The simple model without handling missing data
{stata mlogit insure male}
{p 0 0}
The estimated coefficients are put automatically by STATA into the matrix e(b), note the column
headings are the parameter names that {hi:hotdeck} uses. So you can not use the simple syntax
of just {hi:parms(male)} because this refers to two parameters.
{stata mat list e(b)}
{p 0 0}
So this syntax will handle the missing data using {hi:hotdeck} imputation.
{stata "hotdeck insure male, command(mlogit insure male) parms(Prepaid:male) impute(5)"}
{p 0 0}
{hi:NOTE} hotdeck will fail when using mlogit with spaces in the category labels. This is due
to the lack of functionality in STATA's matrix commands.
{title:Author}
{p}
Adrian Mander, MRC Human Nutrition Research, Cambridge, UK.
Email {browse "mailto:adrian.mander@mrc-hnr.cam.ac.uk":adrian.mander@mrc-hnr.cam.ac.uk}
{title:See Also}
Related commands
HELP FILES Installation status SSC installation links Description
{help whotdeck} (if installed) ({stata ssc install whotdeck}) Weighted version of Hotdeck

View File

@ -1,637 +0,0 @@
*! Date : 1 Aug 2005
*! Version : 1.64
*! Authors : Adrian Mander/David Clayton
*! Email : adrian.mander@mrc-hnr.cam.ac.uk
*! Description : Hotdeck imputation
program define hotdeck6
version 6.0
syntax [varlist] [if] [in] [using/], [BY(varlist) IMPute(integer 1) STORE GENerate(string) COMmand(string) PARMS(string) REPlace NOISE KEEP(varlist) SEED(string) QUIET INFILES(string) ]
tokenize "`varlist'"
local z "`1'"
preserve
if "`if'"~="" {
qui keep `if'
}
/* Check the seed option */
if "`seed'"=="1" { local seed 2}
if "`seed'"=="" { local seed 1 }
confirm number `seed'
/* To generate a seed from the time */
if `seed'==1 {
local time = "$S_TIME"
local date = "$S_DATE"
tokenize "`time'", parse(":")
local seed1 "`1'`3'`5'"
tokenize "`date'", parse(" ")
local dat "`1'`2'`3'"
local dat1 = date("`dat'","dmy")
local seed1 "`seed1'`dat1'"
di
local l_seed "2^31-1"
local seed1 = mod(`seed1',`l_seed')
di in green "Seed is `seed1'"
set seed `seed1'
}
estimates clear
tempfile olddata
/*NOT SURE if I must implement no strings in BY() option
tokenize "`by'"
while "`1'"~="" {
confirm numeric variable `1'
mac shift 1
}
*/
tempvar touse
mark `touse' `if' `in'
markout `touse' `by', strok
di in green "DELETING all matrices...."
mat drop _all
if "`infiles'"=="" {
if "`by'"=="" { _misspat `varlist' `if' `in' }
else { _misspat `varlist' `if' `in', by(`by') }
local nfill=r(nmiss)
}
qui save "`olddata'"
if "`infiles'"=="" {
global allpat = r(allpat)
count if `touse'
local miss = (r(N)-`nfill')/r(N)
}
if "`command'"=="" {
di in red "WARNING: When the <command> option is not selected "
di in red "then no analysis is performed on the imputed datasets"
di
if "`store'"=="" {
di "ALSO STORE isnt selected so hotdeck will appear to do nothing"
exit(198)
}
}
if `impute'<1 {
di in red "The number of imputations must be more than 0 not `impute'"
exit(198)
}
if `impute'==1 & "`infiles'"=="" {
if "`store'"=="" | "`command'"~="" {
di in red "If one imputation is made then command option should NOT be used"
di in red "AND the store option must be specified"
exit(198)
}
}
if "`using'"~="" {
if "`store'"=="" {
di in red "To save datasets you must specify the STORE option"
exit(198)
}
}
if "`keep'"~="" {
if "`store'"=="" {
di in red "If you use the KEEP option you must specify the STORE option"
exit(198)
}
}
if "`noise'"~="" & "`command'"=="" {
di in red "When specifying noise you must also specify the command option"
exit(198)
}
if "`command'"~="" {
if "`parms'"=="" {
di in red "To obtain any output from the command option you must also specify "
di in red "the parameters of interest using the parms() option"
exit(198)
}
}
/************************************************
* Loop over the number of imputed data sets
* required
************************************************/
if "`seed'"~="1" {set seed `seed'}
/* This is the if statement that allows the input of imputed datafiles */
if "`infiles'"~="" {
local i 1
tokenize "`infiles'"
while "`1'"~="" {
use "`1'",replace
mac shift 1
if "`command'"=="" {
di in red "You must use the command option when using INFILES"
exit(198)
}
if "`noise'"~="" { `command' }
else { qui `command' } /* Do the analysis */
_parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/
local i =`i'+1
}
local impute=`i'-1
}
else {
local i 1
while `i'<= `impute' {
use "`olddata'",clear /* Use original dataset */
qui keep if `touse'
if "`by'"~="" { _hotdeck `varlist', by(`by') }
else { _hotdeck `varlist' } /* Impute values */
if "`store'"~="" { /* Save imputed datasets */
if "`using'"=="" {
local using "imp"
}
if "`keep'"=="" {
qui keep `varlist' `by'
qui save `using'`i',replace
}
else {
mkvlist `varlist' `by', vlist(`keep')
qui keep `r(vlist)'
qui save `using'`i',replace
}
}
if "`command'"~="" {
if "`noise'"~="" { `command' }
else { qui `command' } /* Do the analysis */
_parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/
}
local i=`i'+1
}
}
if "`command'"~="" {
/********************************************************
* Loop to calculate the estimates needed
*
* First get the dimensions of the parameter matrices
********************************************************/
local dim= rowsof(impV1)
mat Qbar = J(1,`dim',0)
mat Ubar = J(`dim',`dim',0)
/* calc the averaging factor */
local inv = 1/`impute'
/* calc the average coef and variance qbar and ubar */
local i 1
while `i'<= `impute' {
mat Qbar= `inv'*impb`i'+ Qbar
mat Ubar= `inv'*impV`i'+ Ubar
local i=`i'+1
}
/* calc between variances */
mat B=J(`dim',`dim',0)
local inv1 = 1/(`impute'-1)
local i 1
while `i'<= `impute' {
mat B= B + `inv1'*(impb`i' - Qbar)'*(impb`i' - Qbar)
local i=`i'+1
}
/* Calc total variance */
mat T = Ubar+(1+1/`impute')*B
cap mat tempmt=B*inv(Ubar)
if _rc==504 {
di as error "WARNING: Trying to invert variance matrix with zero elements?"
local ter = rowsof(Ubar)
mat temp = J(`ter',1,1)
mat temp2 = Ubar*temp
local tei 1
local names: colfullnames(impb1)
matrix rownames temp2 = `names'
while `tei'<=`ter' {
if temp2[`tei',1]==0 {
local var:word `tei' of "`names'"
di as txt "Variance for covariate `tei' is 0 !!"
}
local tei=`tei'+1
}
mat tempmt=B*inv(Ubar)
}
local trace=trace(tempmt)
local r1 = 1-((1+1/`impute')*`trace'/`dim')
local names: rowfullnames(impb1)
matrix rownames Qbar = `names'
local names: colfullnames(impb1)
matrix colnames Qbar = `names'
local names: rowfullnames(impV1)
matrix rownames T = `names'
matrix rownames B = `names'
matrix rownames Ubar = `names'
local names: colfullnames(impV1)
matrix colnames T = `names'
matrix colnames B = `names'
matrix colnames Ubar = `names'
mat Tsurr= `r1'*T
mat D = Qbar*inv(Tsurr)*Qbar'
local D1 = D[1,1]/`dim'
local t=`dim'*(`impute'-1)
local v1= 4+(`t'-4)*(1+(1-2/`t')*1/`r1')^2
local ftest= fprob(`dim',`v1',`D1')
/********************************************************
* The next will output the main results in Stata style
* if the normal approximation is good then you could
* use the matrix post command
********************************************************/
if "`quiet'"=="" {
if `r1'<0 {
di in red "WARNING: between se larger than within se in one or more "
di in red "parameters invalidating the global F test"
}
if `t'<4 {
di in red "WARNING: t less than 4 invalid global test increase "
di in red "parameters OR imputations"
}
}
di
di in gr _col(1) "Number of Obs.", _col(45) " = ", _N
di in gr _col(1) "No. of Imputations", _col(48) "= ", `impute'
if "`infiles'"=="" {
di in gr _col(1) "% Lines of Missing Data", _col(45) " = ", `miss'*100,"%"
}
di in gr _col(1) "F(",%6.3f `v1',",`dim')", _col(45) " = ", %9.4f `D1'
di in gr _col(1) "Prob > F " , _col(45) " = ", %9.4f `ftest'
di in gr _dup(83) "-"
local names: colfullnames(impb1)
local name : word 1 of `names'
local i 1
di in gr _continue "Variable |",_col(12) "Average",_col(21) "Between", _col(30) "Within", _col(40) "Total", _col(50) "df", _col(59) "t", _col(65) "p-value"
di
di in gr _continue" |",_col(12) "Coef.",_col(21) "Imp. SE", _col(30) "Imp. SE", _col(39) " SE", _col(50) "", _col(59) "", _col(65) ""
di
di in gr _continue "-------------+---------------------------------------------------------------------"
while "`name'"~="" {
di
mat qhat=Qbar[1,"`name'"]
mat b=B["`name'","`name'"]
mat u=Ubar["`name'","`name'"]
mat t=T["`name'","`name'"]
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
local ttest= qhat[1,1]/sqrt(t[1,1])
di _continue "`name'",_col(9) "|", _col(10) %7.4f qhat[1,1],_col(17) %9.3f sqrt(b[1,1]), _col(25) %9.3f sqrt(u[1,1]), _col(34) %9.3f sqrt(t[1,1]), _col(44) %9.1f `df', _col(53) %9.3f `ttest', _col(62) %9.3f tprob(`df',`ttest')
local i=`i'+1
local name : word `i' of `names'
}
di
di in gr _continue "-------------+---------------------------------------------------------------------"
di
local name : word 1 of `names'
local i 1
di in gr _continue "Variable |",_col(12) "[$S_level% Conf. Interval]"
di
di in gr _continue "-------------+---------------------------------------------------------------------"
while "`name'"~="" {
di
mat qhat=Qbar[1,"`name'"]
mat b=B["`name'","`name'"]
mat u=Ubar["`name'","`name'"]
mat t=T["`name'","`name'"]
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
local ttest= qhat[1,1]/sqrt(t[1,1])
local prob = 1-((100-$S_level)/2)/100
local tvalue = invt(`df',`prob')
local left = qhat[1,1]-`tvalue'*sqrt(t[1,1])
local right = qhat[1,1]+`tvalue'*sqrt(t[1,1])
di _continue "`name'",_col(9) "|", %9.4f `left', %9.4f `right'
local i=`i'+1
local name : word `i' of `names'
}
di ""
di in gr _dup(83) "-"
} /* end of command if statement */
restore
end
/****************************************************
* The approximate Bayesian Bootstrap hotdecking
****************************************************/
program define _hotdeck
version 6.0
syntax [varlist] [using], [BY(string)]
tokenize "`varlist'"
local z "ipattern"
if "`by'"!="" {
confirm ex var `by'
}
tempvar nobs bstrp b2strp temp temp2
qui {
local nold = _N
local nnew = _N
sort `by' `z' `varlist'
gen long `nobs' = (`z'!=.)
if "`by'"=="" {
replace `nobs' = sum(`nobs')
replace `nobs' = `nobs'[_N]
gen long `bstrp' = int(uniform()*`nobs'+1)
gen long `b2strp' = int(uniform()*`nobs'+1)
gen long `temp' = `bstrp'[`b2strp']
replace `bstrp' = `temp'
replace `bstrp' = _n if _n<=`nobs'
tokenize "`varlist'"
while "`1'"~="" {
gen `temp2' = `1'[`bstrp']
replace `1' = `temp2'
drop `temp2'
mac shift 1
}
}
else {
by `by': replace `nobs' = sum(`nobs')
by `by': replace `nobs' = `nobs'[_N]
by `by': gen long `bstrp' = int(uniform()*`nobs'+1)
by `by': gen long `b2strp' = int(uniform()*`nobs'+1)
by `by': gen long `temp' = `bstrp'[`b2strp']
by `by': replace `bstrp' = `temp'
by `by': replace `bstrp' = _n if _n<=`nobs'
tokenize "`varlist'"
while "`1'"~="" {
by `by': gen `temp2' = `1'[`bstrp']
by `by': replace `1' = `temp2'
mac shift 1
drop `temp2'
}
}
}
end
/*******************************************************************
* Get the parameters or a subset of them from the
* model and the subset
* the covariance variance matrix as well
* Note that this section can also handle non-regression commands
* and macro lists
*******************************************************************/
program define _parms
syntax [varlist], [PARMS(string) ITER(integer 1) COMMAND(string) GENerate(string) REPlace]
local 0 "`parms'"
while "`parms'"~="" {
gettoken 0 parms: parms , parse(" ,")
cap syntax [varlist]
if _rc~=0 {
if "`0'"=="_cons" { local vlist "`vlist' `0'" }
else { local plist "`plist' `0'" }
}
else { local vlist "`vlist' `varlist'" }
}
* if results were not part of a regression command
if "`e(cmd)'"=="" {
local names = ""
if `iter'==1 { di in red "Using Non Regression Parameters and Command" }
tokenize "`plist' `vlist'"
local np=0
while "`1'"~="" {
local names1 ="`names1' `1'"
local names2 ="`names2' `2'"
if "`2'"=="" { di in red "Must supply variance estimate of `1'"
exit(302)
}
mac shift 2
local np = `np'+1
}
mat impb`iter' = J(1,`np',0)
mat impV`iter' = J(`np',`np',0)
tokenize "`plist' `vlist'"
local np 1
while "`1'"~="" {
if "$`1'"=="" & "``1''"=="" {
di in red "Global = $`1' Local = ``1''"
di in red "Global/local macro `1' is missing "
exit(198)
}
if "``1''"~="" { mat impb`iter'[1,`np'] = ``1'' }
if "$`1'"~="" & "``1''"=="" { mat impb`iter'[1,`np'] = $`1' }
if "$`2'"=="" & "``2''"=="" {
di in red "Global = $`2' Local = ``2''"
di in red "Global/local macro `2' is missing "
exit(198)
}
if "``2''"~="" { mat impV`iter'[`np',`np'] = ``2'' }
if "$`2'"~="" & "``2''"=="" { mat impV`iter'[`np',`np'] = $`2' }
local np=`np'+1
mac shift 2
}
matrix colnames impb`iter'=`names1'
matrix colnames impV`iter'=`names1'
matrix rownames impV`iter'=`names1'
}
else {
matrix myb = get(_b)
matrix myV = get(VCE)
tokenize "`vlist' `plist'"
if "`2'"~="" {
cap mat impb`iter' = myb[.,"`1'"]
if _rc==111 {
di as error "Are you sure `1' is in the model??"
di as error "Check the matrix of estimates and only include column names in the parameters NOT variable names"
exit(111)
}
mac shift 1
while "`1'"~="" {
mat temp=myb[.,"`1'"]
mat impb`iter'= impb`iter' , temp
mac shift 1
}
tokenize "`vlist' `plist'", parse(" ")
mat impVt`iter' = myV[.,"`1'"]
mac shift 1
while "`1'"~="" {
mat temp=myV[.,"`1'"]
mat impVt`iter'= impVt`iter' , temp
mac shift 1
}
tokenize "`vlist' `plist'", parse(" ")
mat impV`iter' = impVt`iter'["`1'",.]
mac shift 1
while "`1'"~="" {
mat temp=impVt`iter'["`1'",.]
mat impV`iter'= impV`iter' \ temp
mac shift 1
}
mat drop myb
mat drop myV
mat drop impVt`iter'
mat drop temp
}
else {
cap mat impb`iter' = myb[.,"`1'"]
if _rc==111 {
di as error "Are you sure `1' is in the model??"
exit(111)
}
mat impVt`iter'=myV[.,"`1'"]
mat impV`iter' = impVt`iter'["`1'",.]
}
}
end
/*************************************************
* Look at the missing pattern in the varlist
*************************************************/
program define _misspat,rclass
syntax varlist [if] [in] , [BY(string) ]
tokenize "`varlist'"
tempvar touse2
mark `touse2' `if' `in'
markout `touse2'
qui gen str40 pattern=""
local allstr ""
while "`1'"~="" {
qui replace pattern = cond(`1'==.,pattern+"*",pattern+"-") if `touse2'
local allstr="-`allstr'"
mac shift 1
}
qui compress pattern
sort pattern
lab var pattern "Missing pattern"
di
di in green "Missing Patterns"
di in green "----------------"
di
di in green "Table of the Missing data patterns "
di in green " * signifies missing and - is not missing"
di
di "Varlist order: `varlist'"
tab pattern if `touse2'
local n=r(N)
qui count if pattern=="`allstr'" & `touse2'
if r(N)==`n' {
di "There is no missing data in the varlist"
exit(198)
}
return scalar nmiss = `n'-r(N)
return local allpat = "`allstr'"
qui gen ipattern=cond(pattern=="`allstr'",1,.) if `touse2'
/*****************************************
* Calculate stratum missing numbers
*****************************************/
if "`by'"~="" {
di
di in green "STRATUM information"
di in green "-------------------"
di
di in green "Listing the number observed (No_obs) and "
di in green "the number missing (No_miss) in each stratum"
tempvar cnt mcnt
qui sort `by'
qui by `by':gen `cnt'=sum(ipattern)
qui by `by':gen `mcnt'=sum(ipattern==.)
qui by `by': replace `cnt'=cond( _n==_N,`cnt',.)
qui by `by': replace `mcnt'=cond( _n==_N,`mcnt',.)
rename `cnt' No_obs
rename `mcnt' No_miss
l `by' No_obs No_miss if No_obs~=., noobs
di
qui count if No_obs==0
if `r(N)'>0 { di in red "WARNING: `r(N)' strata with NO observed data" }
qui count if No_obs==1
if `r(N)'>0 { di in blue "`r(N)' strata with only 1 observed data"}
qui count if (No_obs>1 & No_obs<6)
if `r(N)'>0 { di in blue "`r(N)' strata with 2-5 observed data"}
di
}
end
/*************************************************
* Expand stata syntax
*************************************************/
program define mkvlist, rclass
syntax varlist, VLIST(string)
local o_vlist "`varlist'"
local keep "`vlist'"
local 0 "`keep'"
while "`keep'"~="" {
gettoken 0 keep: keep , parse(" ,")
cap syntax [varlist]
if _rc~=0 {
if "`0'"=="_cons" {
local vlist "`vlist' `0'"
}
else { local plist "`plist' `0'" }
}
else {
local vlist "`vlist' `varlist'"
}
}
return local vlist "`o_vlist' `vlist'"
end

View File

@ -1 +0,0 @@
.h hotdeck

View File

@ -1,625 +0,0 @@
*! 1.4.1 NJC 16 May 1999
* 1.4.0 NJC 19 March 1999
* Mike Bradburn unearthed bug marked `MB'
* 1.3.0 NJC 31 March 1998
* Fred Wolfe unearthed bugs and suggested features marked `FW'
* 1.2.0 NJC 17 June 1997
program define hplot
version 6.0
#delimit ;
syntax varlist(numeric) [if] [in]
[, BOrder SOrt(string) noXaxis noYaxis PEn(string) Symbol(string)
T1title(string) T2title(string) TItle(string) TTIck XLAbel(numlist)
XLIne(numlist) XSCale(str) XTIck(numlist) Axtol(int 600) Blank flipt
Format(string) FONTC(int 290) FONTR(int 570) FONTCB(int 444)
FONTRB(int 923) GAPMag(real 1) GAPs(numlist int) GLegend(string)
GLLJ GLPOS(int -1) t2m(int 0) Grid LAP Legend(string) LIne NIT2
t1m(int 0) PTSize(int 275) Range TIM(int 0) Vat VATFmt(string)
VATPos(int 31500) Cstart(int -1) SAving(string) PENText(int 1) ] ;
#delimit cr
qui {
tempvar touse order gleg gap dmin dmax dneg dpos z
mark `touse' `if' `in'
Markout2 `touse' `varlist'
gen `order' = _n
gsort - `touse' `sort' `order'
count if `touse'
loc nuse = r(N)
* legend on left
if "`legend'" == "" {
tempvar legend
g str1 `legend' = " "
if "`blank'" == "" {
replace `legend' = string(_n) if `touse'
}
}
else {
confirm variable `legend'
capture confirm string variable `legend'
if _rc == 7 {
tempvar legend2
capture decode `legend', g(`legend2')
if _rc {
gen str1 `legend2' = ""
replace `legend2' = string(`legend')
}
else {
replace `legend2' = string(`legend') /*
*/ if missing(`legend2')
}
loc legend "`legend2'"
}
}
loc leglen : type `legend'
loc leglen = substr("`leglen'",4,.)
if `cstart' == -1 {
loc cstart = 2000 + int(9000 * `leglen' / 25)
}
* axis scale
if "`xscale'" != "" {
tokenize "`xscale'", parse(",")
if "`4'" != "" | "`2'" != "," {
di in r "invalid xscale( ) option"
exit 198
}
loc xscmin `1'
loc xscmax `3'
}
if "`xscmin'" == "" { loc min 0 }
else loc min `xscmin'
if "`xscmax'" == "" { loc max 0 }
else loc max `xscmax'
* xlabel xtick xline might extend graph range beyond data range
if "`xlabel'`xtick'`xline'" != "" {
numlist "`xlabel' `xtick' `xline'", sort
loc nn : word count `r(numlist)'
loc xmin : word 1 of `r(numlist)'
loc xmax : word `nn' of `r(numlist)'
loc min = min(`min', `xmin')
loc max = max(`max', `xmax')
}
* gap legend?
g str1 `gleg' = " "
loc glj = cond("`gllj'" != "", -1, 1)
if "`glegend'" != "" {
tokenize "`glegend'", parse("!")
loc j 1
while "`1'" != "" {
if "`1'" != "!" {
if "`1'" == "." { loc 1 " " }
loc gleg`j' "`1'"
loc j = `j' + 1
}
mac shift
}
}
* gaps between lines?
g byte `gap' = 0
if "`gaps'" != "" {
loc j 1
numlist "`gaps'", int range(>=0)
tokenize `r(numlist)'
while "`1'" != "" {
if "`1'" == "0" {
loc gleg0 "`gleg`j''"
if "`gleg0'" == "" { loc gleg0 " " }
}
else {
replace `gap' = 1 in `1' if `1' <= `nuse'
replace `gleg' = "`gleg`j''" in `1' if `1' <= `nuse'
}
loc j = `j' + 1
mac shift
}
}
count if `gap'
loc ngaps = r(N) + ("`gleg0'" != "")
* data range
tokenize `varlist'
loc nvars : word count `varlist'
g `dmin' = `1'
g `dmax' = `1'
if `nvars' >= 2 {
loc i 2
while `i' <= `nvars' {
replace `dmin' = min(`dmin', ``i'')
replace `dmax' = max(`dmax', ``i'')
loc i = `i' + 1
}
}
su `dmin' if `touse', meanonly
loc min = min(`min', r(min))
su `dmax' if `touse', meanonly
loc max = max(`max', r(max))
loc drange = `max' - `min'
loc zero = cond(`min' >= 0, max(0,`min'), min(0,`max'))
g `z' = `zero'
g `dneg' = min(`dmin', `z')
g `dpos' = max(`dmax', `z')
}
* start of parameter block
loc t1start 1000 /* row for t1title */
loc t2start 1900 /* row for t2title */
loc ybeg 2400 /* start of y-axis */
loc ylength 17600
* `axtol' is space at ends of y-axis
* axtol too large => ystep negative FW
if `axtol' > `ylength' / 2 {
di in bl "axtol too large: reset to default 600"
loc axtol 600
}
* row where first line starts
loc ystart = `ybeg' + `axtol'
* step between lines: one gap defaults to one line
loc ystep = /*
*/ (`ylength' - 2 * `axtol')/(`nuse' - 1 + `ngaps' *`gapmag')
loc yend = `ybeg' + `ylength'
loc ynudge = 200 * (`fontr'/570)^2
/* text displaced downwards from lines */
loc ytick 400 /* tick length */
loc yleg 1000 /* labels down from axis */
loc yleg = `yend' + `yleg'
loc ytitle 1400 /* title down from labels */
loc ytitlef 900 /* title down from labels, flip titles */
loc xstart `cstart' /* col where first line begins */
loc xgap 400 /* gap between left legend and body of plot */
loc xbeg = `xstart' - `xgap'
if `glpos' == -1 { loc glpos `xbeg' }
loc xlength = 30000 - `xstart'
/* horizontal extent of data region */
loc xend = `xbeg' + `xgap' + `xlength'
loc xz = /*
*/ `xbeg' + `xgap' + `xlength' * (`zero' - `min') / `drange'
loc mcent = (`cstart' + 30000)/2 + `tim'
/* col where main title centred */
loc dotsp 150 /* spacing between dots */
if "`format'" == "" { loc format "%1.0f" }
if "`vatfmt'" == "" { loc vatfmt "%1.0f" }
loc ahl = 500 * `ptsize'/275 /* arrowhead length */
loc aha = _pi/6 /* arrowhead angle, between head and stem */
loc barht `ahl' /* bar height */
if "`symbol'" == "" {
if `nvars' < 6 { loc symbol "46253" }
else loc symbol : di _dup(`nvars') "4"
}
else if length("`symbol'") == 1 & `nvars' > 1 {
loc symbol : di _dup(`nvars') "`symbol'"
}
Gphtrans `symbol'
loc symbol "`r(symbol)'"
if "`pen'" == "" { loc pen : di _dup(`nvars') "2" }
else if length("`pen'") == 1 & `nvars' > 1 {
loc pen : di _dup(`nvars') "`pen'"
}
* end of parameter block
* start gph
if "`saving'" != "" { loc saving ", saving(`saving')" }
gph open `saving' /* FW */
gph pen `pentext'
gph font `fontr' `fontc'
* y-axis
if "`yaxis'" == "" { gph line `ybeg' `xstart' `yend' `xstart' }
* ttick => top ticks
loc ttick = "`ttick'" == "ttick"
* ttick should => border FW
if `ttick' { loc border "border" }
* x-axis and labels
if "`xaxis'" == "" {
gph line `yend' `xstart' `yend' `xend'
loc ytick2 = `ybeg' - `ytick'/2
loc ytick = `yend' + `ytick'
if "`xlabel'" == "" {
gph line `yend' `xstart' `ytick' `xstart'
gph line `yend' `xend' `ytick' `xend'
if `ttick' {
gph line `ybeg' `xstart' `ytick2' `xstart'
gph line `ybeg' `xend' `ytick2' `xend' /* FW */
}
loc text = cond("`lap'" == "lap", abs(`min'), `min')
loc text : di `format' `text'
gph text `yleg' `xstart' 0 0 `text'
loc text = cond("`lap'" == "lap", abs(`max'), `max')
loc text : di `format' `text'
gph text `yleg' `xend' 0 0 `text'
}
else {
numlist "`xlabel'"
tokenize `r(numlist)'
while "`1'" != "" {
loc xtickp = /*
*/ `xbeg' + `xgap' + `xlength' * (`1' - `min')/`drange'
gph line `yend' `xtickp' `ytick' `xtickp'
if `ttick' {
gph line `ybeg' `xtickp' `ytick2' `xtickp'
}
loc text = cond("`lap'" == "lap", abs(`1'), `1')
loc text : di `format' `text'
gph text `yleg' `xtickp' 0 0 `text'
mac shift
}
}
}
* x-ticks
if "`xtick'" != "" {
numlist "`xtick'"
tokenize `r(numlist)'
while "`1'" != "" {
loc xtickp = /*
*/ `xbeg' + `xgap' + `xlength' * (`1' - `min')/`drange'
gph line `yend' `xtickp' `ytick' `xtickp'
if `ttick' {
gph line `ybeg' `xtickp' `ytick2' `xtickp'
}
mac shift
}
}
* x-lines
if "`xline'" != "" {
numlist "`xline'"
tokenize `r(numlist)'
while "`1'" != "" {
loc xli = /*
*/ `xbeg' + `xgap' + `xlength' * (`1' - `min')/`drange'
gph line `yend' `xli' `ybeg' `xli'
mac shift
}
}
* border
if "`border'" != "" {
gph line `ybeg' `xstart' `ybeg' `xend'
gph line `ybeg' `xend' `yend' `xend'
if "`xaxis'" != "" { gph line `yend' `xstart' `yend' `xend' }
}
* gap legend above first data point
if "`gleg0'" != "" {
loc y2 = `ystart' + (`gapmag' - 1) * `ystep' + `ynudge'
gph text `y2' `glpos' 0 `glj' `gleg0'
}
* for each variable
tokenize `varlist'
loc j 1
while "`1'" != "" {
loc data "`1'"
loc sy = substr("`symbol'",`j',1)
loc pe = substr("`pen'",`j',1)
loc y `ystart'
if "`gleg0'" != "" { loc y = `y' + `ystep' * `gapmag' }
* for each observation
loc i 1
while `i' <= `nuse' {
* dots and/or lines
if `j' == 1 {
if "`range'" == "range" {
/* MB: next statement needed because largest value
could be negative, of course! */
loc xmax = `xbeg' + `xgap' + /*
*/ `xlength' * (`dmax'[`i'] - `min') / `drange'
loc xmin = `xbeg' + `xgap' + /*
*/ `xlength' * (`dmin'[`i'] - `min') / `drange'
loc xz `xmin'
}
else {
loc xmax = `xbeg' + `xgap' + /*
*/ `xlength' * (`dpos'[`i'] - `min') / `drange'
loc xmin = `xbeg' + `xgap' + /*
*/ `xlength' * (`dneg'[`i'] - `min') / `drange'
}
if "`line'" == "line" {
gph line `y' `xmax' `y' `xz'
gph line `y' `xmin' `y' `xz'
}
if "`grid'" == "" {
loc xdot `xz'
loc ndots = int(abs(`xmax' - `xz') / `dotsp')
loc idot 1
while `idot' <= `ndots' {
gph point `y' `xdot' `ptsize' 0
if "`range'" == "range" {
loc xdot = `xdot' + `dotsp'
}
else loc xdot = /*
*/ `xdot' + `dotsp' * sign(`dpos'[`i'])
loc idot = `idot' + 1
}
loc xdot `xz'
loc ndots = int(abs(`xmin' - `xz') / `dotsp')
loc idot 1
while `idot' <= `ndots' {
gph point `y' `xdot' `ptsize' 0
loc xdot = `xdot' + `dotsp' * sign(`dneg'[`i'])
loc idot = `idot' + 1
}
}
else {
loc xdot `xstart'
while `xdot' < `xend' {
gph point `y' `xdot' `ptsize' 0
loc xdot = `xdot' + `dotsp'
}
}
}
* data point
gph pen `pe'
loc x = `xbeg' + `xgap' + /*
*/ `xlength' * (`data'[`i'] - `min') / `drange'
if `data'[`i'] < . & "`sy'" != "i" {
if "`sy'" == "a" {
if `j' == 1 { loc sign 1 }
else loc sign = sign(`data'[`i'] - `prev'[`i'])
Gphhah `y' `x' `sign' `ahl' `aha'
}
else if "`sy'" == ">" { Gphhah `y' `x' 1 `ahl' `aha' }
else if "`sy'" == "<" { Gphhah `y' `x' -1 `ahl' `aha' }
else Gphpt `sy' `y' `x' `barht' `ptsize'
}
* text
gph pen `pentext'
loc y2 = `y' + `ynudge'
if "`vat'" != "" & `nvars' == 1 {
loc text : di `vatfmt' `data'[`i']
gph text `y2' `vatpos' 0 1 `text'
}
if `j' == 1 {
loc text = `legend'[`i']
gph text `y2' `xbeg' 0 1 `text'
}
* gap
if `gap'[`i'] {
loc y = `y' + `ystep' * `gapmag'
if `j' == 1 {
loc text = `gleg'[`i']
loc y2 = `y' + `ynudge'
gph text `y2' `glpos' 0 `glj' `text'
}
}
loc y = `y' + `ystep'
loc i = `i' + 1
}
* next observation
loc prev `1'
loc j = `j' + 1
mac shift
}
* next variable
* t2title, left justified (defaults to key for 2 or more variables)
if "`t2title'" != "" & trim("`t2title'") == "" {
loc t2title
}
else if "`t2title'" != "" {
gph text `t2start' `xstart' 0 -1 `t2title'
}
else if `nvars' >= 2 {
loc t2 = `t2start' - `ynudge'
loc xjump = `xlength' / `nvars'
loc xjump2 = `xjump' / 50
loc x = `xstart' + `xjump2' + `t2m'
loc j 1
while `j' <= `nvars' {
loc sy = substr("`symbol'",`j',1)
loc pe = substr("`pen'",`j',1)
gph pen `pe'
if "`sy'" == "a" {
loc ahv = index("`symbol'","a")
if `ahv' > 1 {
loc this : word `ahv' of `varlist'
loc ahvm1 = `ahv' - 1
local prev : word `ahvm1' of `varlist'
count if `this' >= `prev' & `touse'
loc majsign = cond(r(N) > `nuse'/2, 1, -1)
}
else loc majsign 1
loc x3 = `x' + 0.6 * `ptsize' * `majsign'
Gphhah `t2' `x3' `majsign' `ahl' `aha'
}
else if "`sy'" == ">" {
loc x3 = `x' + 0.6 * `ptsize'
Gphhah `t2' `x3' 1 `ahl' `aha'
}
else if "`sy'" == "<" {
loc x3 = `x' - 0.6 * `ptsize'
Gphhah `t2' `x3' -1 `ahl' `aha'
}
else if "`sy'" != "i" {
Gphpt `sy' `t2' `x' `barht' `ptsize'
}
loc x2 = `x' + `xjump2'
loc var : word `j' of `varlist'
if "`nit2'" == "" {
loc text : variable label `var'
if "`text'" == "" { loc text "`var'" }
}
else loc text "`var'"
gph pen `pentext'
gph text `t2start' `x2' 0 -1 `text'
loc x = `x' + `xjump'
loc j = `j' + 1
}
}
* title and t1title
if "`title'" == "" & `nvars' == 1 {
loc title : variable label `data'
if "`title'" == "" { loc title "`data'" }
}
else if "`title'" != "" & trim("`title'") == "" { loc title }
loc xL = `xstart' + `t1m'
if "`flipt'" == "" { /* default */
* t1title, left justified
gph text `t1start' `xL' 0 -1 `t1title'
* main title at bottom, centred
gph font `fontrb' `fontcb'
loc ytitle = `yleg' + `ytitle'
gph text `ytitle' `mcent' 0 0 `title'
}
else { /* flip titles from default */
* bottom title, centred (and closer to axis than default)
loc ytitle = `yleg' + `ytitlef'
gph text `ytitle' `mcent' 0 0 `t1title'
* main title at top, left justified
gph font `fontrb' `fontcb'
gph text `t1start' `xL' 0 -1 `title'
}
gph close
end
program define Gphhah /* horizontal arrow head */
* `1' y position of tip
* `2' x position of tip
* `3' sign determines direction: 1 = >, -1 = <
* `4' arrowhead length
* `5' arrowhead interior angle (between shaft and head)
* 1.0.1 NJC 18 February 1999
* 1.0.0 NJC 27 May 1997
version 6.0
loc ah1y = `1' - `3' * `4' * sin(`5')
loc ah1x = `2' - `3' * `4' * cos(`5')
loc ah2y = `1' + `3' * `4' * sin(`5')
loc ah2x = `2' - `3' * `4' * cos(`5')
gph line `ah1y' `ah1x' `1' `2'
gph line `ah2y' `ah2x' `1' `2'
end
program define Gphbar /* vertical bar */
* Gphbar yposition xposition bar_height
* 1.0.1 NJC 18 February 1999
* 1.0.0 NJC 27 May 1997
version 6.0
loc by1 = `1' - 0.5 * `3'
loc by2 = `1' + 0.5 * `3'
gph line `by1' `2' `by2' `2'
end
program define Gphcross /* cross X */ /* FW */
* Gphcross yposition xposition bar_height
* 1.0.1 NJC 18 February 1999
* 1.0.0 NJC 6 October 1997
version 6.0
loc Xy1 = `1' - 0.5 * `3'
loc Xy2 = `1' + 0.5 * `3'
loc Xx1 = `2' - 0.5 * `3'
loc Xx2 = `2' + 0.5 * `3'
gph line `Xy1' `Xx1' `Xy2' `Xx2'
gph line `Xy2' `Xx1' `Xy1' `Xx2'
end
program def Markout2 /* marks out obs with all missing values */
* 1.0.2 NJC 16 February 1999
* 1.0.1 NJC 25 March 1998
version 6.0
syntax varlist(min=1) [, Strok ]
tokenize `varlist'
loc nvars : word count `varlist'
if `nvars' == 1 { exit 0 }
loc nvars = `nvars' - 1
loc markvar `1'
mac shift
tempvar nmiss
gen `nmiss' = 0
qui {
while "`1'" != "" {
loc type : type `1'
if substr("`type'",1,3) == "str" {
if "`strok'" != "" {
replace `nmiss' = `nmiss' + (`1' == "")
}
else replace `nmiss' = `nmiss' + 1
}
else replace `nmiss' = `nmiss' + (`1' == .)
mac shift
}
replace `nmiss' = `nmiss' == `nvars'
replace `markvar' = 0 if `nmiss'
}
end
program def Gphtrans, rclass /* transliterate ".OSTodp" -> "0123456" */
* 1.0.2 NJC 1 March 1999
* 1.0.0 NJC 31 March 1998
version 6.0
args argin
loc length = length("`argin'")
loc i 1
while `i' <= `length' {
loc s = substr("`argin'", `i', 1)
if "`s'" == "." { loc s 0 }
else if "`s'" == "O" { loc s 1 }
else if "`s'" == "S" { loc s 2 }
else if "`s'" == "T" { loc s 3 }
else if "`s'" == "o" { loc s 4 }
else if "`s'" == "d" { loc s 5 }
else if "`s'" == "p" { loc s 6 }
loc argout "`argout'`s'"
loc i = `i' + 1
}
return loc symbol `argout'
end
program def Gphpt
* 1.2.2 NJC 24 February 1999 smaller big cross, bigger ,
* 1.2.1 NJC 18 February 1999
* 1.2.0 NJC 12 Jan 1999
* 1.1.0 NJC 24 Sept 1998
* 1.0.0 NJC 6 April 1998
version 6.0
args sy y x barht ptsize
if "`sy'" == "|" { Gphbar `y' `x' `barht' }
else if "`sy'" == "," { Gphbar `y' `x' 0.6*`barht' }
else if "`sy'" == "X" { Gphcross `y' `x' 0.707*`barht' }
else if "`sy'" == "x" { Gphcross `y' `x' 0.5*`barht' }
else if "`sy'" == "-" { Gphhbar `y' `x' 0.5*`barht' }
else gph point `y' `x' `ptsize' `sy'
end
program define Gphhbar /* horizontal bar */
* Gphhbar yposition xposition bar_length
* 1.0.1 NJC 18 February 1999
* 1.0.0 NJC 24 Sept 1998
version 6.0
loc bx1 = `2' - 0.5 * `3'
loc bx2 = `2' + 0.5 * `3'
gph line `1' `bx1' `1' `bx2'
end

View File

@ -1,307 +0,0 @@
.-
help for ^hplot^
.-
Horizontally labelled plots
---------------------------
^hplot^ varlist [^if^ exp] [^in^ range] [ ^,^
^a^xtol^(^#^) c^start^(^#^) gapm^ag^(^#^) gap^s^(^string^)^
^g^rid ^li^ne ^pts^ize ^r^ange ^so^rt^(^string^)^
^s^ymbol^(^string^)^
^bo^rder ^f^ormat^(^format^) lap nox^axis ^noy^axis ^tti^ck
^xla^bel^(^string^) xli^ne^(^string^) xsc^ale^(^string^)^
^xti^ck^(^string^)^
^b^lank ^gl^egend^(^string^) gllj glpos(^#^) l^egend^(^legendvar^)^
^flipt nit2 t1m(^#^) t1^title^(^string^) t2m(^#^)^
^t2^title^(^string^) tim(^#^) ti^tle^(^string^)^
^fontr(^#^) fontc(^#^) fontrb(^#^) fontcb(^#^)^
^pe^n^(^string^) pent^ext^(^#^) sa^ving^(^graph_filename^)^ ]
Description
-----------
The basic form of ^hplot^ is a graph with one horizontal line for each
observation included. On that line are one or more point symbols
representing the values in varlist according to a common scale.
^hplot^ can produce a variety of horizontally labelled plots for data,
including W.S. Cleveland's dot charts or dot plots; variations on them
with continuous rather than dotted lines; D.R. McNeil's horizontal
parallel line plots; and displays for showing key quantities with or
without confidence intervals.
By default, the data in varlist are represented on horizontal dotted
lines with base at zero that extend to the maximum for each observation.
If negative values are present, dotted lines also extend to the minimum.
Point symbols are used to show actual values.
If the ^grid^ option is used, the data are represented on horizontal
dotted lines that extend over the whole data region of the graph.
If the ^line^ option is used, the data are represented on horizontal
continuous lines with base at the left-hand margin. This can be a lot
faster and may be adequate for exploratory analyses.
If the ^range^ option is used, the data are represented on horizontal
dotted or continuous lines which extend only over the range of values
for each observation, from the smallest value to the largest value.
If the data allow it, a different base may be forced using the ^xscale^
option.
A legend on the left of the data region can be from a specified
variable. If that variable is not specified, the order in the data will
be used; or, if that is not desired, the legend can be blank. The legend
is right-justified. The legend should look readable up to about 30
observations.
For understanding placement of material on the plot, it helps to know
that @gph@ draws in a space defined by 23063 rows and 32000 columns
with origin at top left. See help for @gph@.
Options
--------
Options controlling size, layout and symbol presentation:
---------------------------------------------------------
^axtol(^#^)^ controls the space between the x axis or the position of
the top border and the nearest data line. Default 600.
^cstart(^#^)^ controls the column (horizontal position) of the start of
the lines.
^gapmag(^#^)^ controls the magnitude of any gaps relative to the spacing
between lines. Default 1.
^gaps(^string^)^ places gaps after lines. ^gaps(3,6)^ places gaps after
the 3rd and 6th lines as they appear on the graph, counting down
from the top. ^0^ means a gap before the first line.
^grid^ places point symbols on horizontal dotted grid or guide lines
that extend all the way across the plot. This was originally
recommended by Cleveland if the base is not 0, and later used by him
in all examples.
^sort(^string^)^ means that observations are to be plotted in the
vertical order determined by application of @gsort@. For example,
^sort(^sortvar^)^ indicates sort in ascending order of sortvar
(highest values on bottom) and ^sort(- ^sortvar^)^ indicates
descending order (highest values on top).
^line^ replaces dotted by continuous horizontal lines from the left-hand
margin to each data point.
^ptsize(^#^)^ controls point symbol size. Default 275.
^range^ restricts the dotted or continuous lines to extend from the
minimum to the maximum of those quantities plotted on each
horizontal line.
^symbol(^#^)^ controls point symbols available. The list is most of the
standard list for @graph@, with some additions.
^.^ dot
^o^ small circle
^O^ large circle
^S^ square
^T^ triangle
^d^ diamond
^p^ plus
^i^ invisible
^|^ vertical bar
^,^ short vertical bar
^-^ short horizontal bar
^>^ arrow pointing right
^<^ arrow pointing left
^x^ small cross
^X^ large cross
^a^ arrow pointing from the value for the previous variable
to the value of the present variable
^symbol^ defaults to ^opSdT^ for up to 5 variables and ^o^ for every
variable for 6 or more variables. With 2 or more variables and a
single symbol specified, that symbol is used for every variable.
That is, ^hplot a b c, sy(|)^ expands to ^hplot a b c, sy(|||)^.
Options controlling axes, lines, labels, ticks, border:
-------------------------------------------------------
^border^ adds a border.
^format(^format^)^ controls the format with which ^xlabel^s are shown.
Default %1.0f.
^lap^ (^l^abels ^a^ll ^p^ositive) makes the labels as shown all
positive. ^xla(-40,-20,0,20,40) lap^ will place the labels
40, 20, 0, 20, 40 at the axis positions for -40, -20, 0, 20, 40.
^noxaxis^ suppresses the x axis.
^noyaxis^ suppresses the y axis.
^ttick^ produces short unlabelled ticks on the border above the x axis
that echo the labelled and unlabelled ticks on the x axis. Note that
(unlike the option in @graph@) ^ttick^ with a string is illegal, and
that ^ttick^ necessarily implies ^border^, but not conversely.
^xlabel(^string^)^ controls the labelled ticks on the x axis. Note that
(unlike the option in @graph@) ^xlabel^ without a string is illegal.
numlists may be used, such as ^1/5^ for ^1,2,3,4,5^ and ^0(10)50^
for ^0,10,20,30,40,50^.
^xline(^string^)^ specifies lines drawn for constant values of x. Note
that (unlike the option in @graph@) ^xline^ without a string is
illegal. numlists may be used, such as ^1/5^ for ^1,2,3,4,5^ and
^0(10)50^ for ^0,10,20,30,40,50^.
^xscale(^string^)^ controls the scale of the graph, except that (like
the option in @graph@) it will not cause values to be omitted (for
which purpose use ^if^).
^xtick(^string^)^ controls the unlabelled ticks on the x axis. Note that
(unlike the option in @graph@) ^xtick^ without a string is illegal.
numlists may be used, such as ^1/5^ for ^1,2,3,4,5^ and ^0(10)50^
for ^0,10,20,30,40,50^.
Options controlling legends to left and between gaps:
-----------------------------------------------------
^blank^ blanks out any legend on the left of the data region.
^glegend(^string^)^ places right-justified legend in the gaps between
lines. ^gaps(0,4) glegend(Males!Females)^ places ^Males^ in the gap
before line 1 and ^Females^ in the gap after line 4. Note that ^!^
must be used to separate legends, which thus enables the use of
commas within the legend, but has the side-effect of disallowing the
use of exclamation marks. ^.^ has the special meaning of blank.
^gllj^ makes ^glegend^ left-justified.
^glpos(^#^)^ controls the horizontal position of ^glegend^ and defaults
to an alignment with the main legend.
^legend(^legendvar^)^ specifies a variable to be used for the legend. If
legendvar is a numeric variable with labels, the labels will be used
in the legend.
Options controlling titles (^t1title^, ^t2title^, ^title^):
-----------------------------------------------------
^flipt^ flips titles: ^title^ will be shown at its (default larger) font
size and left-justified at the top of the graph, and ^t1title^ will
be shown at default font size and centred below the axis at the
bottom of the graph (but closer to the axis than the default).
^nit2^: see ^t2title^ below.
^t1title(^string^)^ controls the ^t1title^, shown at default font size
and left-justified at the top of the graph. But see ^flipt^ above.
^t2m(^#^)^ moves the ^t2title^ bodily # to the right. The default is to
start hard left to allow plenty of space for several variable labels
or names in a key, but that default may seem too far left.
^t2title(^string^)^ controls the ^t2title^, shown at default font size
and left-justified at the top of the graph. This defaults to a key
of point symbols if the number of variables is more than 2: the key,
however, is likely to be a mess if the number is more than 5. The
key uses variable labels, or variable names if either they do not
exist or the further option ^nit2^ is invoked. As with @graph@,
^" "^ blanks out the title.
^tim(^#^)^ moves the ^title^ bodily # to the right. The default is to
centre at whatever column would bisect the x axis.
^title(^string^)^ controls the ^title^, shown at its (default larger)
font size and centred below the axis at the bottom of the graph. But
see ^flipt^ above. As with @graph@, ^" "^ blanks out the title.
Other graph options:
--------------------
^fontr(^#^)^ and ^fontc(^#^)^ control the font used for all but the main
title and default to 570 and 290 (which is the default of @gph@).
Font sizes should be changed circumspectly.
^fontrb(^#^)^ and ^fontcb(^#^)^ control the font used for the main
title and default to 923 and 444.
^pen(^string^)^ controls the pens used for data. ^pen^ defaults to ^2^
for every variable. With 2 or more variables and a single pen
specified, that pen is used for every variable. That is,
^hplot a b c, pen(3)^ expands to ^hplot a b c, pen(333)^.
^pentext(^#^)^ controls the pen used for text and other non-data
elements.
^saving(^graph_filename^)^ saves the graph in a .gph file.
Examples
--------
. ^hplot reserves, l(area) xsc(0,30) xla(0(5)30)^
^t1(percent of total) ti(Oil reserves 1994) flipt border^
. ^hplot area, l(name) xsc(0,6) xla(0/6) t1(million^
^square km) ti(Areas of major drainage basins) flipt line^
^fontr(491) fontc(250)^
. ^hplot lcl mean ucl, l(name) xsc(0,6) xla(0/6)^
^t1(95% confidence intervals) sy(|O|) border^
Remarks
-------
In addition to the options above, many choices are coded into ^hplot^
as parameter values. Users may want to copy ^hplot^ and then edit
these permanently or temporarily according to taste.
Cleveland's dot plots are not the same as the histogram-like dotplots
implemented in @dotplot@.
References
----------
Cleveland, W.S. 1984. Graphical methods for data presentation: full
scale breaks, dot charts, and multibased logging. American Statistician
38, 270-80.
Cleveland, W.S. 1994. The elements of graphing data. Hobart Press,
Summit, NJ.
McNeil, D.R. 1992. On graphing paired data. American Statistician
46, 307-11.
McNeil, D.R. 1996. Epidemiological research methods. Wiley, Chichester.
Author
------
Nicholas J. Cox, University of Durham, U.K.
n.j.cox@@durham.ac.uk
Acknowledgments
---------------
Mike Bradburn, Arne Kolstad and Fred Wolfe made very helpful
comments.
Also see
--------
On-line: help for @graph@, @gph@, @format@, @gsort@, @dotplot@,
@hbar@ (if installed), @cihplot@ (if installed),
@tabhplot@ (if installed), @numlist@