Computed theoretical power for N=100 and N=200 scenarios

This commit is contained in:
2024-02-19 18:35:26 +01:00
parent ac9189d26a
commit 238852b08b
704 changed files with 261610 additions and 187 deletions

View File

@ -0,0 +1,155 @@
capture program drop calcscore
program calcscore,rclass
syntax varlist, PARTition(numlist integer >0) [CALCmethod(string) SCOrename(string)]
local y = 1
*di "{bf:Calculs des scores}"
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit
}
if "`scorename'" != "" {
local P:word count `partition'
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit 119
}
foreach sco in `scorename' {
capture confirm variable `sco'
if !_rc {
di in red "`sco' is a variable of the dataset. Choose another name"
exit 119
}
}
}
/*
local cpt = 0
if "`sum'" != "" {
local cpt `cpt' + 1
}
if "`mean'" != "" {
local cpt `cpt' + 1
}
if "`stand'" != "" {
local cpt `cpt' + 1
}
if `cpt'>1 {
di in red "You must choose between mean, sum or stand (the options are exclusive)"
exit 119
}
*/
local i = 1
foreach x in `partition' {
tokenize `varlist'
if `i' == 1 local s = `x'
else local s = `s' +`x'
local liste = ""
forvalues w = `y'/`s' {
local liste `liste' ``w''
}
tempvar nonmiss
qui egen `nonmiss' = rownonmiss(`liste')
if "`scorename'" != "" {
tokenize `scorename'
local sc = "``i''"
}
else local sc = "Dim`i'"
/* if "`calc_method'" == "" {
local calc_method = "mean"
}
if "`calc_method'" != "sum" & "`calc_method'" != "mean" {
di in red "The calc_method option is invalid. Choose mean or sum."
exit 119
}
if "`calc_method'" == "sum" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
if "`calc_stand'" != "" {
local maxs = 0
foreach var in `liste' {
qui levelsof `var', local(levels)
local max = 0
foreach l in `levels' {
if `l'>`max' local max = `l'
}
local maxs = `maxs' + `max'
}
di "`sc' : `maxs'"
qui replace `sc' = `sc'*`nonmiss'*100/`maxs'
}
else qui replace `sc' = `sc'*`nonmiss'
}
else if "`calc_method'" == "mean" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
}
*/
if "`calcmethod'" == "" local calcmethod = "mean"
if "`calcmethod'" != "mean" & "`calcmethod'" != "sum" & "`calcmethod'" != "stand" {
di in red "option calcmethod incorrectly specified (choose among mean, sum and stand)"
error 198
}
if "`calcmethod'" == "sum" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
qui replace `sc' = `sc'*`nonmiss'
}
else if "`calcmethod'" == "stand" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
qui replace `sc' = `sc'*`nonmiss'
tempvar min max
egen `min' = min(`sc')
egen `max' = max(`sc')
/*
foreach var in `liste' {
/*qui levelsof `var', local(levels)
local max = 0
foreach l in `levels' {
if `l'>`max' local max = `l'
}
local maxs = `maxs' + `max'*/
local max = max(`levels')
}
*/
*di "max : "`max'
*di "min : "`min'
qui replace `sc' = (`sc'-`min')/(`max'-`min')*100
}
else {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
}
local `i++'
local y = `s'+1
}
end
*calcscore ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(HA PSE W BCC AC AE LI MOC) calcmethod(stand)
*calcscore x1-x40, partition(5 5 5 5 5 5 5 5) calcmethod(stand)
*calcscore sf36_3q_intenses sf36_3q_moderees sf36_3q_soulever sf36_3q_etages sf36_3q_etage sf36_3q_pencher sf36_3q_15km sf36_3q_500m sf36_3q_100m sf36_3q_douche sf36_4q_limite_temps_travail sf36_4q_moins_choses sf36_4q_type_travail sf36_4q_effort sf36_7q_intensite_douleurs sf36_8q_douleurs_physiques sf36_1q sf36_11q_malade sf36_11q_porte_bien sf36_11q_degrade sf36_11q_excellente_sante sf36_9q_enthousiaste sf36_9q_energie sf36_9q_epuise sf36_9q_fatigue sf36_6q_vie_sociale sf36_10q_etat_mental sf36_5q_limite_temps_travail sf36_5q_moins_choses sf36_5q_accomplies_soigneusement sf36_9q_nerveux sf36_9q_triste sf36_9q_calme sf36_9q_maussade sf36_9q_heureux, partition(10 4 2 5 4 2 3 5) scorename(PF RP BP GH VT SF RE MH) calcmethod(mean)

View File

@ -0,0 +1,395 @@
*program drop calcul
program define calcul
syntax, s10(numlist)
matrix define deces=J(140,6,0)
local j=0
tcm, s10(`s10') anneepop(1989) annees(1988/1990) sexe(1)
matrix essai=r(donnees)
local TCMH89=r(TCM)
matrix deces[1,1]=essai[2..19,4]
matrix deces[25,1]=`TCMH89'
tcm, s10(`s10') anneepop(1989) annees(1988/1990) sexe(2)
matrix essai=r(donnees)
local TCMF89=r(TCM)
matrix deces[1,2]=essai[2..19,4]
matrix deces[25,2]=`TCMF89'
tcm, s10(`s10') anneepop(1982) annees(1981/1983) sexe(1)
matrix essai=r(donnees)
local TCMH82=r(TCM)
matrix deces[1,3]=essai[2..19,4]
matrix deces[20,1]=`TCMH82'
tcm, s10(`s10') anneepop(1982) annees(1981/1983) sexe(2)
matrix essai=r(donnees)
local TCMF82=r(TCM)
matrix deces[1,4]=essai[2..19,4]
matrix deces[20,2]=`TCMF82'
tcm, s10(`s10') anneepop(1983) annees(1982/1984) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[21,1]=`TCMH'
tcm, s10(`s10') anneepop(1983) annees(1982/1984) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[21,2]=`TCMF'
tcm, s10(`s10') anneepop(1984) annees(1983/1985) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[22,1]=`TCMH'
tcm, s10(`s10') anneepop(1984) annees(1983/1985) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[22,2]=`TCMF'
tcm, s10(`s10') anneepop(1985) annees(1984/1986) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[23,1]=`TCMH'
tcm, s10(`s10') anneepop(1985) annees(1984/1986) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[23,2]=`TCMF'
tcm, s10(`s10') anneepop(1986) annees(1985/1987) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[24,1]=`TCMH'
tcm, s10(`s10') anneepop(1986) annees(1985/1987) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[24,2]=`TCMF'
tcm, s10(`s10') anneepop(1992) annees(1991/1993) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[26,1]=`TCMH'
tcm, s10(`s10') anneepop(1992) annees(1991/1993) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[26,2]=`TCMF'
tcm, s10(`s10') anneepop(1993) annees(1992/1994) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[27,1]=`TCMH'
tcm, s10(`s10') anneepop(1993) annees(1992/1994) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[27,2]=`TCMF'
tcm, s10(`s10') anneepop(1994) annees(1993/1995) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[28,1]=`TCMH'
tcm, s10(`s10') anneepop(1994) annees(1993/1995) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[28,2]=`TCMF'
tcm, s10(`s10') anneepop(1995) annees(1994/1996) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[29,1]=`TCMH'
tcm, s10(`s10') anneepop(1995) annees(1994/1996) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[29,2]=`TCMF'
tcm, s10(`s10') anneepop(1996) annees(1995/1997) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[30,1]=`TCMH'
tcm, s10(`s10') anneepop(1996) annees(1995/1997) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[30,2]=`TCMF'
tcm, s10(`s10') anneepop(1997) annees(1996/1998) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[31,1]=`TCMH'
tcm, s10(`s10') anneepop(1997) annees(1996/1998) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[31,2]=`TCMF'
tcm, s10(`s10') anneepop(1998) annees(1997/1999) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[32,1]=`TCMH'
tcm, s10(`s10') anneepop(1998) annees(1997/1999) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[32,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
local TCMH9800reg=r(TCM)
matrix deces[33,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
local TCMF9800reg=r(TCM)
matrix deces[33,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(18) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,1]=essai[2..19,2]
matrix deces[53,1]=r(TCM)
matrix deces[54,1]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(18) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,1]=essai[2..19,2]
matrix deces[74,1]=r(TCM)
matrix deces[75,1]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(28) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,2]=essai[2..19,2]
matrix deces[53,2]=r(TCM)
matrix deces[54,2]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(28) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,2]=essai[2..19,2]
matrix deces[74,2]=r(TCM)
matrix deces[75,2]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(36) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,3]=essai[2..19,2]
matrix deces[53,3]=r(TCM)
matrix deces[54,3]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(36) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,3]=essai[2..19,2]
matrix deces[74,3]=r(TCM)
matrix deces[75,3]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(37) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,4]=essai[2..19,2]
matrix deces[53,4]=r(TCM)
matrix deces[54,4]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(37) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,4]=essai[2..19,2]
matrix deces[74,4]=r(TCM)
matrix deces[75,4]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(41) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,5]=essai[2..19,2]
matrix deces[53,5]=r(TCM)
matrix deces[54,5]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(41) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,5]=essai[2..19,2]
matrix deces[74,5]=r(TCM)
matrix deces[75,5]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(45) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,6]=essai[2..19,2]
matrix deces[53,6]=r(TCM)
matrix deces[54,6]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(45) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,6]=essai[2..19,2]
matrix deces[74,6]=r(TCM)
matrix deces[75,6]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2411)
local TCMH=r(TCM)
matrix deces[77,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2411)
local TCMF=r(TCM)
matrix deces[77,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2412)
local TCMH=r(TCM)
matrix deces[78,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2412)
local TCMF=r(TCM)
matrix deces[78,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2413)
local TCMH=r(TCM)
matrix deces[79,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2413)
local TCMF=r(TCM)
matrix deces[79,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2414)
local TCMH=r(TCM)
matrix deces[80,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2414)
local TCMF=r(TCM)
matrix deces[80,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2421)
local TCMH=r(TCM)
matrix deces[81,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2421)
local TCMF=r(TCM)
matrix deces[81,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2422)
local TCMH=r(TCM)
matrix deces[82,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2422)
local TCMF=r(TCM)
matrix deces[82,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2423)
local TCMH=r(TCM)
matrix deces[83,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2423)
local TCMF=r(TCM)
matrix deces[83,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2424)
local TCMH=r(TCM)
matrix deces[84,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2424)
local TCMF=r(TCM)
matrix deces[84,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2431)
local TCMH=r(TCM)
matrix deces[85,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2431)
local TCMF=r(TCM)
matrix deces[85,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2432)
local TCMH=r(TCM)
matrix deces[86,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2432)
local TCMF=r(TCM)
matrix deces[86,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2433)
local TCMH=r(TCM)
matrix deces[87,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2433)
local TCMF=r(TCM)
matrix deces[87,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2434)
local TCMH=r(TCM)
matrix deces[88,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2434)
local TCMF=r(TCM)
matrix deces[88,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2441)
local TCMH=r(TCM)
matrix deces[89,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2441)
local TCMF=r(TCM)
matrix deces[89,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2442)
local TCMH=r(TCM)
matrix deces[90,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2442)
local TCMF=r(TCM)
matrix deces[90,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2443)
local TCMH=r(TCM)
matrix deces[91,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2443)
local TCMF=r(TCM)
matrix deces[91,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2444)
local TCMH=r(TCM)
matrix deces[92,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2444)
local TCMF=r(TCM)
matrix deces[92,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2451)
local TCMH=r(TCM)
matrix deces[93,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2451)
local TCMF=r(TCM)
matrix deces[93,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2452)
local TCMH=r(TCM)
matrix deces[94,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2452)
local TCMF=r(TCM)
matrix deces[94,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2453)
local TCMH=r(TCM)
matrix deces[95,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2453)
local TCMF=r(TCM)
matrix deces[95,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2461)
local TCMH=r(TCM)
matrix deces[96,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2461)
local TCMF=r(TCM)
matrix deces[96,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2462)
local TCMH=r(TCM)
matrix deces[97,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2462)
local TCMF=r(TCM)
matrix deces[97,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2463)
local TCMH=r(TCM)
matrix deces[98,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2463)
local TCMF=r(TCM)
matrix deces[98,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2464)
local TCMH=r(TCM)
matrix deces[99,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2464)
local TCMF=r(TCM)
matrix deces[99,2]=`TCMF'
use "C:\ado\personal\files\dccentre8000reduit.dta", clear
tab annee deptdom if S10==`s10' & sexe==1 , matcell(nbH)
matrix deces[101,1]=nbH
tab annee deptdom if S10==`s10' & sexe==2 , matcell(nbF)
matrix deces[121,1]=nbF
matrix list deces
end

View File

@ -0,0 +1,363 @@
capture program drop cfa
program cfa,rclass
syntax varlist, PARTition(numlist integer >0) [SCOrename(string) CFAMethod(string) CFAStand]
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit
}
local P:word count `partition'
if "`scorename'" !="" {
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit
}
}
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
local name
local nname
if "`scorename'"=="" {
forvalues i = 1/`P' {
local name "Dim`i'"
local nname `nname' `name'
}
local scorename = "`nname'"
}
local upscorename = upper("`scorename'")
*capture calcscore `varlist', scorename(`upscorename') partition(`partition')
local i = 0
local y = 1
tokenize `upscorename'
foreach x in `partition' {
local `i++'
if `i' == 1 local s = `x'
else local s = `s' +`x'
local liste = ""
forvalues w = `y'/`s' {
local liste `liste' `var`w''
}
local a = "(``i'' -> `liste')"
local z `z' `a'
local y = `s'+1
}
/*
local cpt = 0
if "`cfa_ml'" != "" {
local method = "ml"
local cpt `cpt' + 1
}
if "`cfa_mlmv'" != "" {
local method = "mlmv"
local cpt `cpt' + 1
}
if "`cfa_adf'" != "" {
local method = "adf"
local cpt `cpt' + 1
}
else local method = "ml"
if `cpt'>1 {
di in red "You must choose between cfa_ml, cfa_mlmv or cfa_adf (the options are exclusive)"
exit 119
}
*/
if "`cfamethod'" == "" local cfamethod = "ml"
if "`cfamethod'" != "ml" & "`cfamethod'" != "mlmv" & "`cfamethod'" != "adf" {
di "`cfamethod'"
di in red "option cfamethod incorrectly specified (choose among ml, mlmv and adf)"
error 198
}
if "`cfastand'" != "" local cfastand = "stand"
di as result "{hline}"
di "{bf:Confirmatory factor analysis}"
di as result "{hline}"
di
qui sem `z', method(`cfamethod') `cfastand'
/*
sem (HA -> ioc1-ioc4) (PSE -> ioc5-ioc8) (W -> ioc9-ioc15) ///
(BCC -> ioc16-ioc18) (AC -> ioc19-ioc21) (AE -> ioc22-ioc25) ///
(LI -> ioc26-ioc32) (MOC -> ioc33-ioc37)
,stand
cov(e.ioc36*e.ioc37
e.ioc28*e.ioc29 e.ioc14*e.ioc15 e.ioc23*e.ioc25 e.ioc33*e.ioc34 ///
e.ioc9*e.ioc10 e.ioc6*e.ioc8 e.ioc5*e.ioc7) // method(mlmv)
*/
/* factor loadings */
matrix r = r(table)
matrix r = r[1,1...]
matrix r = r'
local n = `nbvars'*2
matrix a = r[1,1]
forvalues i=3(2)`n' {
matrix b = r[`i',1]
matrix a = a\b
}
/* standard error */
matrix r = r(table)
matrix r = r[2,1...]
matrix r = r'
local n = `nbvars'*2
matrix se = r[1,1]
forvalues i=3(2)`n' {
matrix b = r[`i',1]
matrix se = se\b
}
/* intercepts */
matrix r = r(table)
matrix r = r[1,1...]
matrix r = r'
local n = `nbvars'*2
matrix a2 = r[2,1]
forvalues i=4(2)`n' {
matrix b = r[`i',1]
matrix a2 = a2\b
}
/* variances des erreurs */
local m = `n'+1
matrix r = r(table)
matrix r = r[1,`m'...]
matrix r = r'
matrix a3 = r[1,1]
forvalues i=2/`nbvars' {
matrix b = r[`i',1]
matrix a3 = a3\b
}
/* variance des dimensions*/
matrix r = r(table)
local n = `nbvars'*3+1
matrix r = r[1,`n'...]
matrix r = r'
matrix var = r[1,1]
forvalues i=2/`P' {
matrix b = r[`i',1]
matrix var = var\b
}
local i = 1
foreach v in `varlist' {
local var`i' = abbrev("`v'",10)
local `++i'
}
local i = 1
foreach s in `scorename' {
local s`i' = abbrev("`s'",10)
local sc `sc' `s`i''
local `++i'
}
local max = 10
local dec = `max'+5
local max2 = 10
local dec2 = `dec'+`max2'+5
local a = e(N)
di "{text:Number of used individuals: `a'}"
di
di _col(`=`dec2'+17+4') "{bf:Estimation:}"
di as result "{bf:Item}" _c
di _col(`dec') "{bf:Dimension}" _c
*local col = `dec'+17
di _col(`dec2') "{bf:Factor}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:Standard}" _c
*local col = `col'+17
di _col(`=`dec2'+28') "{bf:Intercept}" _c
*local col = `col'+13
if "`cfastand'" == "" {
di _col(`=`dec2'+42') "{bf:Variance of}" _c
di _col(`=`dec2'+56') "{bf:Variance of}"
di _col(`dec2') "{bf:loading}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:error}" _c
*local col = `col'+30
di _col(`=`dec2'+42') "{bf:error}" _c
di _col(`=`dec2'+56') "{bf:dimension}"
local h = `dec2'+66
}
else {
di _col(`=`dec2'+42') "{bf:Variance of}"
di _col(`dec2') "{bf:loading}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:error}" _c
*local col = `col'+30
di _col(`=`dec2'+42') "{bf:errors}"
local h = `dec2'+52
}
di "{hline `h'}"
local i = 1
local y = 1
foreach x in `partition' {
if `i' == 1 local s = `x'
else local s = `s' +`x'
forvalues z = `y'/`s' {
tokenize `sc'
di "{bf:`var`z''}"_c
di _col(`dec') "{bf:``i''}" _c
local t = a[`z',1]
local t : di %7.2f `t'
*local col = `dec'+17
di _col(`dec2') "{text:`t'}" _c
local t = se[`z',1]
local t : di %8.2f `t'
*local col = `dec2'+9
di _col(`=`dec2'+14') "{text:`t'}" _c
local t = a2[`z',1]
local t : di %9.2f `t'
*local col = `col'+17
di _col(`=`dec2'+28') "{text:`t'}" _c
local t = a3[`z',1]
local t : di %11.2f `t'
*local col = `col'+13
if "`cfastand'" == "" & `z' == `y'{
di _col(`=`dec2'+42') "{text:`t'}" _c
local t = var[`i',1]
local t : di %11.2f `t'
*local col = `dec2'+17+17+13+14
di _col(`=`dec2'+56') "{text:`t'}"
}
else di _col(`=`dec2'+42') "{text:`t'}"
}
di
local `i++'
local y = `s'+1
}
qui estat gof, stats(all)
local chi2 = r(chi2_ms)
local p = r(p_ms)
local ddl = r(df_ms)
local ratio = `chi2'/`ddl'
local rmsea = r(rmsea)
local lb = r(lb90_rmsea)
local ub = r(ub90_rmsea)
local nfi = 1-(r(chi2_ms)/r(chi2_bs))
local rni = 1-(r(chi2_ms)-r(df_ms))/(r(chi2_bs)-r(df_bs))
local cfi = r(cfi)
local ifi = (r(chi2_bs)-r(chi2_ms))/(r(chi2_bs)-r(df_ms))
local mci = exp(-0.5*((r(chi2_ms)-r(df_ms))/(e(N)-1)))
local srmr = r(srmr)
di
di "{bf:Goodness of fit}"
di
di as result _col(4) "chi2" _c
di as result _col(20) "ddl" _c
di as result _col(28) "chi2/ddl" _c
di as result _col(42) "RMSEA [90% CI]" _c
di as result _col(64) "SRMR" _c
di as result _col(74) "NFI" _c
di as result _col(84) "RNI" _c
di as result _col(94) "CFI" _c
di as result _col(104) "IFI" _c
di as result _col(114) "MCI"
*di as result "`P' dimensions" _c
local t : di %7.2f `chi2'
di "{text:`t'}" _c
local t : di %3.0f `ddl'
di _col(20) "{text:`t'}" _c
local t : di %7.1f `ratio'
di _col(29) "{text:`t'}" _c
local t : di %5.3f `rmsea'
local l : di %5.3f `lb'
local u : di %5.3f `up'
di _col(40) "{text:`t' [`l' ; `u']}" _c
local t : di %5.3f `srmr'
di _col(63) "{text:`t'}" _c
local t : di %5.3f `nfi'
di _col(72) "{text:`t'}" _c
local t : di %5.3f `rni'
di _col(82) "{text:`t'}" _c
local t : di %5.3f `cfi'
di _col(92) "{text:`t'}" _c
local t : di %5.3f `ifi'
di _col(102) "{text:`t'}" _c
local t : di %5.3f `mci'
di _col(112) "{text:`t'}"
local p : di %5.3f `p'
di "{text:(p-value = `p')}"
di as result
/*
matrix ind = (`chi2',`ddl',`ratio',`rmsea',`nfi',`rni',`cfi',`ifi',`srmr')
matrix colnames ind = "chi2" "ddl" "chi2/ddl" "RMSEA" "NFI" "RNI" "CFI" "IFI" "SRMR"
matrix rownames ind = ""
di
di "{bf:Goodness of fit}"
matrix list ind, format(%6.3f) noheader
*/
end
*cfa ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(HAaaaaaaaaaaaaaaaaaaaaaaaaaz PSE W BCC AC AE LI MOC) cfamethod(ml) //cfastand
*cfa x1-x40, partition(5 5 5 5 5 5 5 5) cfastand

View File

@ -0,0 +1,168 @@
*!Version 1.1
*!Data management utility: check for existence of variables in a dataset.
*!Authors: Amadou Bassirou DIALLO (World Bank) and Jean-Benoit Hardouin (Regional Health Observatory of Orl<72>ans)
program checkfor2 , rclass
version 8
syntax anything [if] [in] [, noList Tolerance(real 0) TAble noSUm GENMiss(namelist min=1 max=1) MISsing(string)]
marksample touse
tempname rat
local av
local unav
local manymissings
local avnum
quietly count if `touse'
local tot = r(N)
qui isvar `anything'
local badlist `r(badlist)'
local varlist `r(varlist)'
di _n
if "`table'"!="" {
if "`badlist'"!="" {
di _col(4) in green "{hline 39}"
di _col(4)in green "Unavailable variables: "
foreach i of local badlist {
di _col(4) in ye "`i'"
}
di _col(4) in green "{hline 39}"
di
}
di _col(4) in green "{hline 39}"
display _col(4) in gr "Existing" _col(15) in gr "Rate of"
display _col(4) in gr "Variable" _col(14) "missings" _col(26) "Type" _col(34) "Available"
di _col(4) in green "{hline 39}"
}
tokenize `varlist'
local nbvar : word count `varlist'
forvalues i=1/`nbvar' {
capture assert missing(``i'') if `touse'
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if !_rc {
if "`table'"=="" {
display in ye "``i''" in gr " is empty in the database." in ye " ``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye "100.00%" _col(26) "`ty'"
}
local manymissings `manymissings' ``i''
}
else {
if "`table'"=="" {
display in ye "``i''" in gr " exists and is not empty."
}
*Consider type
if "`tty'" == "str" {
qui count if (``i'' == ""|``i''=="`missing'") & `touse'
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
else {
local avnum `avnum' ``i''
capture confirm number `missing'
if _rc!=0 {
quietly count if ``i'' >= . & `touse'
}
else {
quietly count if (``i'' >= .|``i''==`missing') & `touse'
}
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
if "`table'"=="" {
display in ye "``i''" in gr " has " in ye r(N) in gr " missings."
display in gr "Ratio number of missings of" in ye " ``i''" in gr " to total number of observations: " in ye %6.2f `rat' "%"
}
if `rat' <= `tolerance' {
local av `av' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " is added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" in ye _col(15) %6.2f `rat' "%" _col(26) "`ty'" _col(34) "X"
}
}
else {
local manymissings `manymissings' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " has too many missings, compared to the tolerance level."
display in ye "``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye %6.2f `rat' "%" _col(26) "`ty'"
}
}
}
if "`table'"=="" {
di
}
}
if "`table'"!="" {
di _col(4) in green "{hline 39}"
}
return local available `av'
return local unavailable `badlist'
return local manymissings `manymissings'
if "`avnum'" ~= ""&"`sum'"=="" {
display _newline
display in ye _col(14) "Unweighted summary statistics for available variables:" _n
capture confirm number `missing'
if _rc!=0 {
summarize `avnum' if `touse'
}
else {
foreach i of local avnum {
summarize `i' if `touse'&`i'!=`missing'
}
}
}
if "`list'"== "" {
display _newline
display in ye _d(97) "_"
display _newline
if "`badlist'"~="" {
display in gr "Unavailable variables: " in ye _col(45) "`badlist'" _n
}
if "`av'"~="" {
display in gr "Available variables: " in ye _col(45) "`av'" _n
}
if "`manymissings'"~="" {
display in gr "Available variables but with too missings: " in ye _col(45) "`manymissings'" _n
}
display in ye _d(97) "_"
}
if "`genmiss'" !="" {
capture confirm variable `genmiss'
if _rc!=0 {
qui gen `genmiss' = 0
local nbav : word count `av'
tokenize `av'
forvalues i=1/`nbav' {
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if "`tty'" == "str" {
qui replace `genmiss'=`genmiss'+1 if ``i''=="."
}
else {
qui replace `genmiss'=`genmiss'+1 if ``i''>=.
}
}
}
else {
di in green "The variable" in ye " `genmiss' " in green "already exists".
}
}
end

View File

@ -0,0 +1,88 @@
{smcl}
{hline}
help for {cmd:checkfor2} {right:Amadou B. DIALLO}
{right:Jean-Benoit HARDOUIN}
{hline}
{title:Allows checking whether a variable exists or not in a dataset.}
{p 4 8 2}{cmd:checkfor2} {it:anything} [{cmd:,}
{cmdab:t:olerance}({it:#}) {cmdab:ta:ble} {cmdab:nol:ist} {cmdab:nosu:m}
{cmdab:genm:iss}({it:newvarname}) {cmdab:mis:sing}({it:string})]
{title:Description}
{p 4 4 2}{cmd:checkfor2} is a data management routine to check for existence of variables
within a (usually big) data set.
{p 4 4 2}{cmd:checkfor2} searchs through the data whether each variable exists.
The variables are clustered between unavailable variables, available variables with
a little amount of missing values and available variables with too many missing values.
{p 4 4 2}{cmd:isvar} must be installed ({stata ssc install isvar:ssc install isvar}).
{title:Options}
{p 4 4 2}{it:anything} is composed of variable names or lists of variables,
{p 4 4 2}{cmd:tolerance} is the tolerance level (in percentage) to consider a variable as available, with default 0,
{p 4 4 2}{cmd:nolist} avoids displaying availability status at the end of the process,
{p 4 4 2}{cmd:nosum} avoids displaying summary statistics of available variables,
{p 4 4 2}{cmd:table} displays the results in a table (instead as text),
{p 4 4 2}{cmd:genmiss} creates a new variable containing the number of missing values among the available variables,
{p 4 4 2}{cmd:missing} defines a specific value or string considered as a missing value.
{title:Saved results}
{p 4 4 2} {cmd:r(unavailable)} names of unavailable variables.{p_end}
{p 4 4 2} {cmd:r(available)} names of available variables with a small amount of missing values.{p_end}
{p 4 4 2} {cmd:r(manymissings)} names of variables present but with too missings.{p_end}
{title:Examples}
{p 4 4 2}{cmd:. use mydata, clear }{p_end}
{p 4 4 2}{cmd:. checkfor2 x y z , mis(99) genmiss(countmiss) }{p_end}
{p 4 4 2}{cmd:. su `r(available)' }{p_end}
{p 4 4 2}{cmd:. tab countmiss }{p_end}
{p 4 4 2}{cmd:. u bigdataset in 1/100, clear // Big data set}{p_end}
{p 4 4 2}{cmd:. checkfor2 v1 v2 v3 xx yy , nosum tol(5) tab}{p_end}
{p 4 4 2}{cmd:. use `r(available)' using bigdataset, clear }{p_end}
{title:Remarks}
{p 4 4 2}{cmd:checkfor2} and its primary version ({cmd:checkfor}) have been primarily written for comparable surveys such as the Demography and
Health Surveys (DHS) or the Multiple Indicator Cluster Surveys (MICS). But this could easily applied
to any other survey.
{title:Authors}
{p 4 4 2}Amadou Bassirou DIALLO.
Poverty and Health Division, PREM, The World Bank.{p_end}
{p 4 4 2}Email: {browse "mailto:adiallo5@worldbank.org":adiallo5@worldbank.org}
{p 4 4 2}Jean-Benoit HARDOUIN.
Regional Health Observatory of Orl<72>ans, France.{p_end}
{p 4 4 2}Email: {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
{title:Aknowledgements}
{p 4 4 2}We would like to thank Christophe Rockmore and also Nick Cox
and Kit Baum for their comments.
{title:Also see}
{p 4 13 2}Online: help for {help checkfor}, {help isvar}, {help nmissing}, {help npresent}, {help missing} and {help dropmiss} if installed.{p_end}

View File

@ -0,0 +1,133 @@
*!Version 1.1
*!Data management utility: check for existence of variables in a dataset.
*!Authors: Amadou Bassirou DIALLO (World Bank) and Jean-Benoit Hardouin (Regional Health Observatory of Orl<72>ans)
program checkvars, rclass
version 8
syntax anything [if] [in] [, noList Tolerance(real 0) TAble noSUm GENMiss(namelist min=1 max=1)]
marksample touse
tempname rat
local av
local unav
local manymissings
quietly count if `touse'
local tot = r(N)
qui isvar `anything'
local badlist `r(badlist)'
local varlist `r(varlist)'
if "`table'"!="" {
if "`badlist'"!="" {
di _col(4)in green "Unavailable variables: " in ye "`badlist'"
di
}
di _col(4) in green "{hline 29}"
display _col(4) in gr "Existing" _col(15) in gr "Rate of"
display _col(4) in gr "Variable" _col(14) "missings" _col(24) "Available"
di _col(4) in green "{hline 29}"
}
tokenize `varlist'
local nbvar:word count `varlist'
forvalues i=1/`nbvar' {
capture assert missing(``i'') if `touse'
if !_rc {
if "`table'"=="" {
display in ye "``i''" in gr " is empty in the database." in ye " ``i''" in gr ///
" is not added to the available list"
}
else {
display _col(4) in gr "``i''" _col(14) "100.00%"
}
}
else {
if "`table'"=="" {
display in ye "``i''" in gr " exists and is not empty."
}
*if "`available'"~= "" {
quietly count if ``i'' >= . & `touse'
local num = r(N)
scalar `rat' = (`num'/`tot')*100
if "`table'"=="" {
display in ye "``i''" in gr " has " in ye r(N) in gr " missings."
display in gr "Ratio number of missings of" in ye " ``i''" in gr ///
" to total number of observations: " in ye %6.2f `rat' "%"
}
if `rat' <= `tolerance' {
local av `av' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " is added to the available list."
}
else {
display _col(4) in gr "``i''" in ye _col(15) %6.2f `rat' "%" _col(32) "X"
}
}
else {
local manymissings `manymissings' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " has too many missings, compared to the tolerance level."
display in ye "``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "``i''" _col(15) in ye %6.2f `rat' "%"
}
}
*}
}
if "`table'"=="" {
di
}
}
if "`table'"!="" {
di _col(4) in green "{hline 29}"
}
return local available `av'
return local unavailable `badlist'
return local manymissings `manymissings'
if "`av'" ~= ""&"`sum'"=="" {
display _newline
display in ye _col(14) "Unweighted summary statistics for available variables:" _n
summarize `av' if `touse'
}
if "`list'"== "" {
display _newline
display in ye _d(97) "_"
display _newline
if "`badlist'"~="" {
display in gr "Unavailable variables: " in ye _col(45) "`badlist'" _n
}
if "`av'"~="" {
display in gr "Available variables: " in ye _col(45) "`av'" _n
}
if "`manymissings'"~="" {
display in gr "Available variables but with too missings: " in ye _col(45) "`manymissings'" _n
}
display in ye _d(97) "_"
}
if "`genmiss'" !="" {
capture confirm variable `genmiss'
if _rc!=0 {
qui gen `genmiss'=0
local nbav:word count `av'
tokenize `av'
forvalues i=1/`nbav' {
qui replace `genmiss'=`genmiss'+1 if ``i''>=.
}
}
else {
di in green "The variable" in ye " `genmiss' " in green "already exists".
}
}
end

View File

@ -0,0 +1,90 @@
{smcl}
{hline}
help for {cmd:checkvars} {right:Amadou B. DIALLO}
{right:Jean-Benoit HARDOUIN}
{hline}
{title:Allows checking whether a variable exists or not in a dataset.}
{p 4 8 2}{cmd:checkvars} {it:anything} [{cmd:,}
{cmdab:t:olerance}({it:#}) {cmdab:ta:ble} {cmdab:nol:ist} {cmdab:nosu:m}
{cmdab:genm:iss}({it:newvarname})]
{title:Description}
{p 4 4 2}{cmd:checkvars} is a routine to check for existence of variables
within a (usually big) data set.
{p 4 4 2}{cmd:checkvars} searchs through the data whether each variable exists.
The variables are clustered between unavailable variables, available variables with
a little amount of missing values and available variables with too many missing values.
{p 4 4 2}{cmd:isvar} must be installed ({stata ssc install isvar:ssc install isvar}).
{title:Options}
{p 4 4 2}{it:anything} is composed of variable names or lists of variables,
{p 4 4 2}{cmd:tolerance} is the tolerance level (in percentage) to consider a variable as available, with default 0,
{p 4 4 2}{cmd:nolist} avoids displaying availability status at the end of the process,
{p 4 4 2}{cmd:nosum} avoids displaying summary statistics of available variables,
{p 4 4 2}{cmd:table} displays the results in a table (instead as text),
{p 4 4 2}{cmd:genmiss} creates a new variable containing the number of missing values among the available variables.
{title:Saved results}
{p 4 4 2} {cmd:r(unavailable)} names of unavailable variables.{p_end}
{p 4 4 2} {cmd:r(available)} names of available variables with a small amount of missing values.{p_end}
{p 4 4 2} {cmd:r(manymissings)} names of variables but with too missings.{p_end}
{title:Examples}
{p 4 4 2}{cmd:. use mydata, clear }{p_end}
{p 4 4 2}{cmd:. checkvars x y z ,genmiss(countmiss) }{p_end}
{p 4 4 2}{cmd:. su `r(available)' }{p_end}
{p 4 4 2}{cmd:. tab countmiss }{p_end}
{p 4 4 2}{cmd:. u bigdataset in 1/100, clear // Big data set}{p_end}
{p 4 4 2}{cmd:. checkvars v1 v2 v3 xx yy , nosum tol(5) tab}{p_end}
{p 4 4 2}{cmd:. use `r(available)' using bigdataset, clear }{p_end}
{title:Remarks}
{p 4 4 2}{cmd:checkvars} and its primary versions ({cmd:checkfor} and {cmd:checkfor2}) have been primarily written for comparable surveys such as the Demography and
Health Surveys (DHS) or the Multiple Indicator Cluster Surveys (MICS). But this could easily applied
to any other survey.
{title:Authors}
{p 4 4 2}Amadou Bassirou DIALLO.
Poverty and Health Specialist. AFTPM, The World Bank.{p_end}
{p 4 4 2}Email: {browse "mailto:adiallo5@worldbank.org":adiallo5@worldbank.org}
{p 4 4 2}Jean-Benoit HARDOUIN.
Regional Health Observatory of Orl<72>ans, France.{p_end}
{p 4 4 2}Email: {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
{title:Aknowledgements}
{p 4 4 2}We would like to thank Christophe Rockmore and also Nick Cox
and Kit Baum for their comments.
{title:Also see}
{p 4 13 2}Online: help for {help checkfor}, {help checkfor2}, {help isvar}, {help nmissing}, {help npresent}, {help missing} and {help dropmiss} if installed.{p_end}

View File

@ -0,0 +1,957 @@
*! Version 2.17 10July2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
* Version 2.15 (2014-04-14): Jean-Benoit Hardouin /*save and use options*/
* Version 2.16 (2014-04-30): Jean-Benoit Hardouin, Bastien Perrot /*HTML option*/
* Version 2.17 (2019-07-10): Jean-Benoit Hardouin /*filesave and dirsave options*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* INSERM UMR 1246-SPHERE "Methods in Patient Centered Outcomes and Health Research", Nantes University, University of Tours
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.sphere-nantes.fr
*
* Copyright 2005-2006, 2010, 2014, 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 10
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string) save(string) use(string) FILESave DIRSave(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"==""&"`use'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
if "`filesave'"!="" {
if "`dirsave'"=="" {
local dirsave `c(pwd)'
}
local fsb saving(`dirsave'//bar,replace)
local fsd saving(`dirsave'//dendrogram,replace)
local fsbi saving(`dirsave'//biplot,replace)
}
tempname matclus vp indexes
/*********TESTS**********/
if "`use'"!="" {
local error=0
capture matrix `vp'=`use'_vp
if _rc!=0 {
local error=_rc
}
capture matrix `matclus'=`use'_matclus
if _rc!=0 {
local error=_rc
}
local varlist $`use'_varlist
local method $`use'_method
local kernel $`use'_kernel
if "`varlist'"==""|"`method'"=="" {
local error=1
}
if `error'!=0 {
di in red "You cannot use the {hi:use} option without a preliminary use of the {hi:save} option"
error 198
exit
}
}
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
qui graph twoway (bar v3 id, name(bar,replace) `fsb' vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
*matrix list clv
qui cluster dendro clv, name (dendrogram,replace) `fsd' hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
qui cluster dendro clv, name(dendrogram,replace) `fsd' xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
qui graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui qui biplotvlab `latent' `add', name(biplot,replace) `fsbi' norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
if "`save'"!="" {
qui matrix `save'_vp=`vp'
qui matrix `save'_matclus=`matclus'
qui global `save'_varlist `varlist'
qui global `save'_method `method'
qui global `save'_kernel `kernel'
}
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

View File

@ -0,0 +1,122 @@
{smcl}
{* 29 juillet 2019}{* version 2.17}{...}
{hline}
help for {hi:clv}{right:Jean-Benoit Hardouin}
{hline}
{title:Clustering around latent variables }
{p 8 14 2}{cmd:clv} [{it:varlist}] [{cmd:if} {it:exp}] [{cmd:in} {it:range}] [{cmd:weight}]
[{cmd:,} {cmdab:nostand:ardized} {cmdab:ker:nel}({it:numlist}) {cmdab:meth:od}({it:keyword}) {cmdab:cons:olidation}({it:#}) {cmd:genlv}(string) {cmdab:rep:lace}
{cmdab:noden:dro} {cmdab:saved:endro}({it:filename}[,replace]) {cmdab:cut:number}({it:#}) {cmdab:show:count} {cmdab:texts:ize}({it:string}) {cmdab:deltaT}
{cmdab:hor:izontal} {cmdab:abb:rev}({it:#}) {cmdab:tit:le}({it:string}) {cmdab:cap:tion}({it:string})
{cmdab:bar} {cmdab:nobip:lot} {cmdab:add:var} {cmd:std} {cmd:dim}({it:string}) {cmdab:files:ave} {cmdab:dirs:ave}({it:string})]
{title:Description}
{p 4 8 2}{cmd:clv} clusters variables around latent components. The variables are clustered by
seeking to minimize at each step the decrease of the T criterion, computed as the sum of the
first eigenvalues of the matrices of data of all the clusters. A hierarchical cluster analysis
based on this criterion is performed. A iterative consolidation procedure can be subsequently run which
allows each variable to be assigned to the latent component it is the most correlated with.
{title:Options}
{p 0 8 2}{cmd:Options concerning the method CLV}
{p 4 8 2}{cmd:nostandardized} uses centered variables instead of standardized variables.
{p 4 8 2}{cmd:kernel} defines one or several kernels of variables (variables which are clustered together in an initial step). The first number #k1 indicates that the first #k1 variables are clustered together, the second number #k2 indicates that the following #k2 variables are clustered together...
{p 4 8 2}{cmd:method} indicates the method to cluster the variables among {it:classical} (by default) for the method described by Vigneau and Qannari,
{it:polychoric} for a use of the matrix of polychoric coefficients of correlation (instead of Pearson coefficients of correlation), {it:v2} for a modified
algorithm wich search to minimize the maximum second eigenvalue among the clusters of 2 variables and more, {it:polychoricv2} which correspond to the {it:v2}
option with the matrix of polychoric coefficients of correlation, and {it:centroid} which is defined by Vigneau and Qannari as an adaptation of CLV when
the sign of the correlation coefficients between the variables is important.
{p 4 8 2}{cmd:consolidation} performs a consolidation procedure with the obtained partition into the specified number of clusters (by default, no consolidation procedure is performed).
{p 4 8 2}{cmd:genlv} saves the latent variables in new variables with the defined string as prefix (followed by a number). This option must be used in conjonction with the {cmd:consolidation} option.
{p 4 8 2}{cmd:replace} allows replacing the created variables with the {cmd:genlv} option if they already exist.
{p 0 8 2}{cmd:Options concerning the drawing of the dendrogram}
{p 4 8 2}{cmd:nodendro} avoids to display of the dendrogram.
{p 4 8 2}{cmd:savedendro} saves the dendrogram in the file defined by this option. If this file already exists, it is possible to replace it with the {cmd:replace} option.
{p 4 8 2}{cmd:cutnumber} defines the maximal number of clusters displayed in the dendrogram (40 by default).
{p 4 8 2}{cmd:showcount} displays the number of variables in each cluster (useful with the {cmd:cutnumber} option).
{p 4 8 2}{cmd:textsize} defines the size of the labels of the variables on the dendrogram (see {help textsizestyle}).
{p 4 8 2}{cmd:deltaT} uses the variation of the T criterion as height variable for the dendrogram.
{p 4 8 2}{cmd:horizontal} displays an horizontal (instead a vertical) dendrogram.
{p 4 8 2}{cmd:abbrev} defines the length of the variables labels on the dendrogram (15 characters by default).
{p 4 8 2}{cmd:title} defines the title of the dendrogram.
{p 4 8 2}{cmd:caption} defines the caption of the axis of the dendrogram which indicates the names of the variables.
{p 0 8 2}{cmd:Options concerning the others graphs}
{p 4 8 2}{cmd:bar} displays a chart of the decrease in the T criterion at each step.
{p 4 8 2}{cmd:nobiplot} avoids to display a biplot of the latent variables with the {cmd:consolidation} option.
{p 4 8 2}{cmd:addvar} allows drawing the items on the graphical representation on the biplot.
{p 4 8 2}{cmd:std} allows standardizing the latent variables for the graphical representation on the biplot.
{p 4 8 2}{cmd:dim}({it:string}) allows choosing the axes represented on the biplot.
{p 4 8 2}{cmd:filesave} allows saving the graphs in gph files on the default directory or on the directory defined by the {cmd:dirsave} option.
{p 4 8 2}{cmd:dirsave}({it:string}) allows determining the directory to save the graphs (usefull with the {cmd:filesave} option).
{p 4 8 2} If no {it:varlist} is indicated, the procedure uses the varlist from the last {cmd:clv} procedure, but does not perform the hierarchical cluster analysis.
{title:Notes}
{p 4 8 2} The classifications around latent variables (CLV) is defined by its authors (Vigneau and Qannari, 2003) only for continuous variables. Results with binary or ordinal variables must be interpreted with precautions.
{p 4 8 2} Only {cmd:fweights} are allowed. The biplots are disabled if weights are used.
{p 4 8 2} In this procedure, all the individuals with at least one missing value are omitted.
{p 4 8 2} With the {it:polychoric} and {it:polychoricv2} methods, the {cmd:nostandardized} option is disabled.
{p 4 8 2} This module uses the following modules downloadable on SSC: {stata ssc describe polychoric}, {stata ssc describe biplotvlab} and {stata ssc describe genscore}
{title:Example}
{p 4 8 2}{cmd:. clv var1-var15} /*performs the HCA procedure*/
{p 4 8 2}{cmd:. clv var1-var15, cons(6) bar nodendro meth(centroid)} /* performs the HCA procedure based on the centroid method followed by a consolidation procedure with 6 clusters*/
{p 4 8 2}{cmd:. clv, cons(3) addvar} /*performs only the consolidation procedure with 3 clusters, based on the preceeding HCA procedure*/
{title:Aknowledgements}
{p 4 8 2} The author thanks Ronan Conroy for all the propositions of improvements.
{title:Reference}
{p 4 8 2} Vigneau E. and Qannari E. M. Clustering of variables around latent components. Communications in Statistics - Simulation and Computation. 32(4): 1131-1150, 2003.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}INSERM UMR 1246-SPHERE "MethodS in Patients-centered outcomes and HEalth ResEarch"{p_end}
{p 4 8 2}Nantes University - University of Tours{p_end}
{p 4 8 2}Institute for Research in Health 2 (IRS2), Nantes, France{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Website {browse "http://www.anaqol.org":AnaQol}

View File

@ -0,0 +1,907 @@
*! Version 2.12 1December2006
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.12 : December 1st, 2006 /*savedendro option*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005-2006 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

View File

@ -0,0 +1,916 @@
*! Version 2.14 20May2010
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005-2006, 2010 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

View File

@ -0,0 +1,946 @@
*! Version 2.15 14April2014
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
* Version 2.15 (2014-04-14): Jean-Benoit Hardouin /*save and use options*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.sphere-nantes.fr
*
* Copyright 2005-2006, 2010, 2014 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv215,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string) save(string) use(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"==""&"`use'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if "`use'"!="" {
local error=0
capture matrix `vp'=`use'_vp
if _rc!=0 {
local error=_rc
}
capture matrix `matclus'=`use'_matclus
if _rc!=0 {
local error=_rc
}
local varlist $`use'_varlist
local method $`use'_method
local kernel $`use'_kernel
if "`varlist'"==""|"`method'"=="" {
local error=1
}
if `error'!=0 {
di in red "You cannot use the {hi:use} option without a preliminary use of the {hi:save} option"
error 198
exit
}
}
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
if "`save'"!="" {
qui matrix `save'_vp=`vp'
qui matrix `save'_matclus=`matclus'
qui global `save'_varlist `varlist'
qui global `save'_method `method'
qui global `save'_kernel `kernel'
}
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,806 @@
*! Version 2.7 6October2005
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.7 : October 6, 2005 /*return, multiple graphs, polychoric+consolidation*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orl<72>ans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [, CUTnumber(int 30) bar CONSolidation(int 0) noDENdro noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar]
preserve
tempfile clvfile
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
*set trace on
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
/*
if "`method'"=="polychoric" {
di in red "The {hi:consolidation} is not possible with the {hi:polychoric} option"
error 198
exit
}
*/
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i''
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui count
local nbind=r(N)
local cons=`consolidation'
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i''
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
di
di in green "{hline 30}"
di in green "TOTAL VARIANCE: " in ye %14.3f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %7.0f `nbind'
di in green "METHOD:" in ye _col(`=31-length("`method'")') "`=upper("`method'")'"
di in green "{hline 30}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',10,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i''
matrix `vp'[`i',1]=r(Var)
}
else {
matrix `vp'[`i',1]=1
}
matrix `vp'[`i',8]=`i'
matrix `vp'[`i',9]=`totvar'
matrix `vp'[`i',10]=100
}
matrix `vp'[`nbitems',3]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 89}"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation"
di in green "{hline 89}"
}
else {
local crit T
di in green "{hline 100}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal"
}
else {
di in green _col(84) "Current"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation"
di in green "{hline 100}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj'
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk'
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk'
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'',cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k''
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
/*
local t1=`vp'[`j',1]
local t2=`vp'[`k',1]
di "`ev'=`t1'+`t2'-`lambda1'"
*/
local ev2=max(`vp'[`j',5],`vp'[`k',5],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'',cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k''
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12'
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
*set trace off
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
if "`method'"!="centroid" {
matrix `vp'[`clus',1]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*T CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
else {
matrix `vp'[`clus',1]=0 /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE G CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*G CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=0 /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',9]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
}
else {
local relv=(`minegenval'-`vp'[`=`clus'-1',2])/`vp'[`=`clus'-1',3]*100
local percent %
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `i'>1 {
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv'-`threshold'[`=`nbitems'-`i'+2',1]
}
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) _col(84) %5.2f `relv' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) %8.4f `vp'[`clus',5] _col(95) %5.2f `relv' "`percent'"
}
}
}
local i=2*`nbitems'-1
local relv=(`vp'[`i',3]-`vp'[`i',2])/`vp'[`i',3]*100
if "`method'"=="centroid" {
di in ye _col(84) %5.2f `relv' "`percent'"
}
else {
di in ye _col(95) %5.2f `relv' "`percent'"
}
matrix `threshold'[1,1]=`relv'
matrix `threshold'[1,2]=`relv'-`threshold'[2,1]
matrix `threshold'[1,3]=`vp'[`i',3]
*matrix list `threshold'
local best=0
local maxbest=0
local best2=0
local maxbest2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 100}"
di in green "Proposed best partitions: "
di in green "Based on the variation of the T criterion"
di in green _col(10) "1. Partitions in " in ye `best' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `best2' in green " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`nbitems'-1 {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in green "Based on a research of a threshold"
di in green _col(10) "1. Partitions in " in ye `bestt' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `bestt2' in green " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',2]=`totvar'-`T'
matrix `vp'[`i',9]=`T'
matrix `vp'[`i',10]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
}
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `vp' ,names(v)
qui drop in 1/`nbitems'
qui gen id=`nbitems'-_n
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v2 "T variation"
graph twoway bar v2 id, name(bar,replace) vert ,ylabel(0(0.5)2) xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',6]<=`nbitems') {
local first=`vp'[`ligne',6]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',7]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',2] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',10] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',8] in 1
}
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`dendro'"=="" {
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0)) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') `cut'
}
}
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/*
if `cons'!=0&("`method'"=="polychoric"|"`method'"=="polychoricv2") {
di in ye "The {hi:consolidation} is not possible with the {hi:polychoric} methods"
local cons=0
}
*/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
*cluster generate cut = groups(2/9) , name(clv)
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=1
local max=0
*set trace on
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "GROUP " %2.0f `g' _c
local col=`col'+10
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g''
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'',cov
matrix `Ev'=e(Ev)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'',score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local critT=`critT'+`lambda1'
}
else {
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su
local critT=`critT'+`r(Var)'
}
}
}
}
di
forvalues i=1/`max' {
local col=1
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+10
}
di
}
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr''
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g''
local tmpcorr=r(rho)
if ((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid") {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`cons' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h''
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else {
di in green "There is more than 800 individuals, so the {hi:biplot} is disabled"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
set matsize `matsize'
use `clvfile',replace
capture cluster delete clv,zap
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
end

View File

@ -0,0 +1,798 @@
*! Version 2.9 9December2005
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.9 : December 9, 2005 /*save the latent variables*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orl<72>ans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 30) bar CONSolidation(int 0) noDENdro noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string)]
preserve
tempfile clvfile
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
di
di in green "{hline 30}"
di in green "TOTAL VARIANCE: " in ye %14.3f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %7.0f `nbind'
di in green "METHOD:" in ye _col(`=31-length("`method'")') "`=upper("`method'")'"
di in green "{hline 30}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',10,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',1]=r(Var)
}
else {
matrix `vp'[`i',1]=1
}
matrix `vp'[`i',8]=`i'
matrix `vp'[`i',9]=`totvar'
matrix `vp'[`i',10]=100
}
matrix `vp'[`nbitems',3]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 89}"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation"
di in green "{hline 89}"
}
else {
local crit T
di in green "{hline 100}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal"
}
else {
di in green _col(84) "Current"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation"
di in green "{hline 100}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local ev2=max(`vp'[`j',5],`vp'[`k',5],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local eigen2=`lambda2'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval2=`ev2'
local eigen2=`ev2'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
*set trace off
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
if "`method'"!="centroid" {
matrix `vp'[`clus',1]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*T CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
else {
matrix `vp'[`clus',1]=0 /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE G CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*G CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=0 /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',9]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
}
else {
local relv=(`minegenval'-`vp'[`=`clus'-1',2])/`vp'[`=`clus'-1',3]*100
local percent %
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `i'>1 {
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv'-`threshold'[`=`nbitems'-`i'+2',1]
}
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) _col(84) %5.2f `relv' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) %8.4f `vp'[`clus',5] _col(95) %5.2f `relv' "`percent'"
}
}
}
local i=2*`nbitems'-1
local relv=(`vp'[`i',3]-`vp'[`i',2])/`vp'[`i',3]*100
if "`method'"=="centroid" {
di in ye _col(84) %5.2f `relv' "`percent'"
}
else {
di in ye _col(95) %5.2f `relv' "`percent'"
}
matrix `threshold'[1,1]=`relv'
matrix `threshold'[1,2]=`relv'-`threshold'[2,1]
matrix `threshold'[1,3]=`vp'[`i',3]
*matrix list `threshold'
local best=0
local maxbest=0
local best2=0
local maxbest2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 100}"
di
di in green "{hline 24}"
di in green "PROPOSED BEST PARTITIONS"
di in green "{hline 24}"
di
di in yellow _col(4) "Based on the variation of the T criterion"
di in green _col(10) "1. Partitions in " in ye `best' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `best2' in green " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`nbitems'-1 {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold"
di in green _col(10) "1. Partitions in " in ye `bestt' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `bestt2' in green " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',2]=`totvar'-`T'
matrix `vp'[`i',9]=`T'
matrix `vp'[`i',10]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
}
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `vp' ,names(v)
qui drop in 1/`nbitems'
qui gen id=`nbitems'-_n
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v2 "T variation"
graph twoway bar v2 id, name(bar,replace) vert ,ylabel(0(0.5)2) xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',6]<=`nbitems') {
local first=`vp'[`ligne',6]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',7]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',2] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',10] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',8] in 1
}
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`dendro'"=="" {
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0)) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') `cut'
}
}
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
*cluster generate cut = groups(2/9) , name(clv)
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=1
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "GROUP " %2.0f `g' _c
local col=`col'+10
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local critT=`critT'+`lambda1'
}
else {
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
forvalues i=1/`max' {
local col=1
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+10
}
di
}
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
set matsize `matsize'
use `clvfile',replace
capture cluster delete clv,zap
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
end

View File

@ -0,0 +1,291 @@
program define compart,rclass
version 8
syntax varlist [if] [in] [fweight iweight] [,part(numlist) Matrix(string) type(string) DETails noSTANDardized VARiables SQUare]
preserve
unab varlist:`varlist'
di "`varlist'"
tokenize `varlist'
marksample touse
local nbvar:word count `varlist'
qui count
local nbind=r(N)
tempname p
qui gen `p'=1
qui su `p' [`weight'`exp'] if `touse'
local nbind=r(N)
if "`square'"=="" {
local quad=1
}
else {
local quad=2
}
if "`type'"!=""&"`type'"!="polychoric" {
di in red "The type of the matrix is not authorized. Please correct your {hi:type} option."
error 198
}
if "`type'"!=""&"`matrix'"!="" {
di in red "You cannot define in the same time the {hi:type} and the {hi:matrix} options"
error 198
}
/* DEFINITION OF THE PARTITION OF THE VARIABLES*/
local newpart
foreach i in `part' {
if `i'!=0 {
local newpart `newpart' `i'
}
}
local part `newpart'
local meme=0
local diff=0
local nbpart:word count `part'
forvalues i=1/`nbpart' {
local iti:word `i' of `part'
local meme=`meme'+`iti'*(`iti'-1)/2
forvalues j=`=`i'+1'/`nbpart' {
local itj:word `j' of `part'
local diff=`diff'+`iti'*`itj'
}
}
local perc=`meme'/(`meme'+`diff')
di "meme: `meme' ; diff: `diff' ; perc: `perc'"
local test=0
local last0=0
forvalues i=1/`nbpart' {
local first`i'=`last`=`i'-1''+1
local size`i':word `i' of `part'
local last`i'=`first`i''+`size`i''-1
local test=`test'+`size`i''
local list`i'
forvalues j=`first`i''/`last`i'' {
local list`i' `list`i'' ``j''
}
}
if `test'!=`nbvar' {
di in red "{p}The described partition of the variables is composed of a number of variables different of the number of variables of varlist.{p_end}"
exit 198
}
/* BY DEFAULT, STANDARDIZATION*/
if "`standardized'"=="" {
forvalues i=1/`nbvar' {
qui su ``i'' [`weight'`exp']
qui replace ``i''=(``i''-r(mean))/r(sd)
}
}
tempname Cov W
if "`matrix'"==""&"`type'"!="polychoric" {
/* COVARIANCE OR CORRELATION MATRIX*/
qui matrix accum `Cov'=`varlist' [`weight'`exp'],nocons dev
qui matrix `Cov'=`Cov'/(`nbind'-1)
}
else if "`type'"=="polychoric" {
qui polychoric `varlist'
qui matrix `Cov'=r(R)
}
else {
qui matrix `Cov'=`matrix'
}
/* WE SAVE THE MATRIX AND WE COMPUTE THE AVERAGE COVARIANCE */
qui matrix `W'=`Cov'
local sum=0
forvalues i=1/`nbvar' {
forvalues j=`=`i'+1'/`nbvar' {
local sum=`sum'+ `W'[`i',`j']^`quad'
}
}
/* WE SAVE THE DATA AND WE COMPUTES THE USED PERCENTILES OF THE COVARIANCE*/
tempfile compartfile
qui save `compartfile',replace
drop _all
set obs `=`nbvar'*(`nbvar'-1)'
local n=1
qui gen i=.
qui gen j=.
qui gen corr=.
forvalues i=1/`nbvar' {
forvalues j=`=`i'+1'/`nbvar' {
qui replace i=`i' in `n'
qui replace j=`j' in `n'
qui replace corr=`W'[`i',`j']^`quad' in `n'
local ++n
}
}
matrix list `W'
su corr
sort corr
centile corr,centile(`=100-`perc'*100')
local centile=r(c_1)
if `diff'!=0 {
local perc2=(`meme'+1)/(`meme'+`diff')
centile corr,centile(`=100-`perc2'*100')
local centile2=r(c_1)
local centile=(`centile'+`centile2')/2
}
qui use `compartfile',clear
/***************************************************/
if `nbpart'==1 {
local mean=0
* local mean=(2*`sum')/(`nbvar'*(`nbvar'-1))
}
else {
local mean=(2*`sum')/(`nbvar'*(`nbvar'-1))
local mean=0
*local mean=`centile'
}
/*THE MATRIX IS CENTERED*/
forvalues i=1/`nbvar' {
matrix `W'[`i',`i']=0
forvalues j=`=`i'+1'/`nbvar' {
matrix `W'[`i',`j']=(`W'[`i',`j']^`quad'-`mean')
matrix `W'[`j',`i']=`W'[`i',`j']
}
}
/*WE COMPUTE THE INDEX D*/
local C=0
local C1=0
local C2=0
local minrho=2
local summeme=0
local sumdiff=0
if "`square'"!="" {
local maxrho=0
}
else {
local maxrho=-2
}
*set trace on
forvalues i=1/`nbpart' {
forvalues j=1/`nbpart' {
forvalues k=`first`i''/`last`i'' {
forvalues l=`first`j''/`last`j'' {
if `i'!=`j' {
if `k'>`l' {
local sumdiff=`sumdiff'+`Cov'[`k',`l']
}
if (`Cov'[`k',`l'])^`quad'>(`maxrho')^`quad' {
local maxrho=(`Cov'[`k',`l'])
}
matrix `W'[`k',`l']=-(`W'[`k',`l'])
}
else if (`Cov'[`k',`l'])^`quad'<(`minrho')^`quad'&`k'!=`l' {
local minrho=`Cov'[`k',`l']
}
if `i'==`j'&`k'>`l' {
local summeme=`summeme'+`Cov'[`k',`l']
}
local C=`C'+`W'[`k',`l']
local C1=`C1'+abs(`W'[`k',`l'])
if `W'[`k',`l']>+0 {
local ++C2
}
}
}
}
}
if `meme'!=0 {
local summeme=`summeme'/`meme'
}
if `diff'!=0 {
local sumdiff=`sumdiff'/`diff'
}
local diffsum=`summeme'-`sumdiff'
set trace off
di "Summeme: `summeme' ; Sumdiff: `sumdiff'"
local minrho=(`minrho')^(`quad')
local maxrho=(`maxrho')^(`quad')
local C=sign(`C')*(abs(`C'))^(1/`quad')/(`nbvar'*(`nbvar'-1))+`mean'
local C1=(`C1')^(1/`quad')/(`nbvar'*(`nbvar'-1))
local C2=(`C2')/(`nbvar'*(`nbvar'-1))*100
return local Pcov=`=`C'/`C1''
return local Pel=`=`C2'/100'
if `nbpart'==1 {
local C=`C'*(`nbvar'+1)/`nbvar'
}
*di "C:" `C' " C1: " `C1' " C2: " `C2'
if `nbpart'==1 {
local maxrho=0
}
if `nbpart'==`nbvar' {
local minrho=0
}
di " C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')"
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
di in green "{hline 80}"
di in green "Number of individuals: " _col(71) in ye %8.0f `nbind'
di in green "Number of variables: " _col(71) in ye %8.0f `nbvar'
di in green "COMPART index: " _col(71) in ye %8.6f `C'
di in green "Proportion of the covariances explained by the COMPART index: " _col(73) in ye %6.2f `=abs(`C')/`C1'*100' "%"
di in green "Proportion of positive elements in the matrix: " _col(73) in ye %6.2f `C2' "%"
di in green "Minimum correlation coefficient for 2 variables of the same group: " _col(74) in ye %5.2f `minrho'
di in green "Maximum correlation coefficient for 2 variables of two different groups: " _col(74) in ye %5.2f `maxrho'
di in green "Average correlation coefficient: " _col(74) in ye %5.2f `mean'
di in green "{hline 80}"
di
if "`details'"!="" {
di in green "Matrix of the coefficients"
di in green "{hline 26}"
matrix list `W' ,noheader format(%7.4f)
di
}
if "`variables'"!="" {
di in green "Details for each variable"
di in green "{hline 26}"
di
di in green "{hline 80}"
di in green "Items" _col(17) "COMPART" _col(26) "Problematic items"
di in green "{hline 80}"
tempname Cvar
matrix `Cvar'=J(1,`nbvar',0)
forvalues i=1/`nbvar' {
local C`i'=0
local pourri`i'
forvalues j=1/`nbvar' {
local C`i'=`C`i''+`W'[`i',`j']
if `W'[`i',`j']<0 {
local pourri`i' `pourri`i'' ``j''
}
}
local C`i'=`C`i''/(`nbvar'-1)
matrix `Cvar'[1,`i']=`C`i''
di in ye abbrev("``i''",14) _col(15) %9.6f `C`i'' _c
if "`pourri`i''"!="" {
di in ye _col(26) "`pourri`i''"
}
else {
di
}
}
di in green "{hline 80}"
matrix colnames `Cvar'=`varlist'
matrix rownames `Cvar'=Compart
return matrix Cvar=`Cvar'
}
local test=`maxrho'-`minrho'
local diffsum=(`summeme'*`meme'-`diff'*`sumdiff')/(`meme'+`diff')
local diffsum=`summeme'/*-`sumdiff'*/
return local compart `C'
return local mean `mean'
return local list `varlist'
return local part `part'
restore
end

View File

@ -0,0 +1,120 @@
program define compart2,rclass
version 9
syntax varlist [if] [in] [fweight iweight] [,part(numlist) type(string)]
preserve
unab varlist:`varlist'
di "`varlist'"
tokenize `varlist'
marksample touse
local nbvar:word count `varlist'
qui count
local nbind=r(N)
tempname p
qui gen `p'=1
qui su `p' [`weight'`exp'] if `touse'
local nbind=r(N)
if "`square'"=="" {
local quad=1
}
else {
local quad=2
}
if "`type'"=="" {
local type classical
}
if "`type'"!="classical"&"`type'"!="centroid"&"`type'"!="polychoric" {
di in red "The type of the matrix is not authorized. Please correct your {hi:type} option."
error 198
}
/* DEFINITION OF THE PARTITION OF THE VARIABLES*/
local newpart
foreach i in `part' {
if `i'!=0 {
local newpart `newpart' `i'
}
}
local part `newpart'
local meme=0
local diff=0
local nbpart:word count `part'
forvalues i=1/`nbpart' {
local iti:word `i' of `part'
local meme=`meme'+`iti'*(`iti'-1)/2
forvalues j=`=`i'+1'/`nbpart' {
local itj:word `j' of `part'
local diff=`diff'+`iti'*`itj'
}
}
local perc=`meme'/(`meme'+`diff')
di "meme: `meme' ; diff: `diff' ; perc: `perc'"
local test=0
local last0=0
forvalues i=1/`nbpart' {
local first`i'=`last`=`i'-1''+1
local size`i':word `i' of `part'
local last`i'=`first`i''+`size`i''-1
local test=`test'+`size`i''
local list`i'
forvalues j=`first`i''/`last`i'' {
local list`i' `list`i'' ``j''
}
}
if `test'!=`nbvar' {
di in red "{p}The described partition of the variables is composed of a number of variables different of the number of variables of varlist.{p_end}"
exit 198
}
forvalues g=1/`nbpart' {
tempname f1`g'
if `size`g''>1 {
if "`type'"=="classical" {
qui pca `list`g'',cov
qui predict `f1`g''
}
else if "`type'"=="polychoric" {
qui polychoricpca `list`g'',score(`f1`g'') nscore(1)
rename `f1`g''1 `f1`g''
}
else if "`type'"=="centroid" {
qui genscore `list`g'', score(`f1`g'') mean
}
}
else if `size`g''==1 {
qui gen `f1`g''=`list`g''
}
}
local minrho=2
local maxrho=-2
forvalue i=1/`nbvar' {
forvalues g=1/`nbpart' {
qui corr ``i'' `f1`g''
if `i'>=`first`g''&`i'<=`last`g'' {
if r(rho)<`minrho' {
local minrho=r(rho)
}
}
else if r(rho)>`maxrho' {
local maxrho=r(rho)
}
}
}
di "C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')"
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
*local C=`minrho'
di "C= `C' min=`minrho' max=`maxrho'"
return local compart `C'
*return local mean `mean'
*return local list `varlist'
return local part `part'
restore
end

View File

@ -0,0 +1,81 @@
capture program drop conc
program conc,rclass
syntax varlist, comp(varlist) [tconc(real 0.4)]
di as result "{hline}"
di "{bf:Concurrent validity}"
di as result "{hline}"
di
local n : word count `varlist'
local p : word count `comp'
matrix m = J(`n',`p',.)
matrix rownames m = `varlist'
matrix colnames m = `comp'
local r = 1
foreach i in `varlist' {
local c = 1
foreach j in `comp' {
qui corr `i' `j'
mat e = r(C)
local f = e[2,1]
mat m[`r',`c'] = `f'
local `++c'
}
local `++r'
}
*mat li m, format(%3.2f) noheader
tokenize `varlist'
local maxv = length("`1'")
forvalues i=1/`n' {
local lenv = length("``i''")
if `lenv' > `maxv' local maxv = `lenv'
}
local decv = `maxv'+6
tokenize `comp'
local maxc = length("`1'")
forvalues i=1/`p' {
local lenc = length("``i''")
if `lenc' > `maxc' local maxc = `lenc'
}
local decc = `maxc'+4
local col = `decv'
foreach c in `comp' {
di as result _col(`col') "`c'" _c
local col = `col'+`decc'
}
di
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
forvalues i=1/`n' {
di as result "`var`i''" _c
local col = `decv'
forvalues j=1/`p' {
local t = m[`i',`j']
if `t' > `tconc' | `t' < -`tconc' {
di as result _col(`=`col'-1') %5.2f `t' _c
}
else di as text _col(`=`col'-1') %5.2f `t' _c
local col = `col'+`decc'
}
di
}
end
*conc HA-MOC, comp(ioc1 ioc2) tconc(0.4)
*conc x1-x40, comp(x1 x2) tconc(0.4)

View File

@ -0,0 +1,298 @@
capture program drop convdiv
program convdiv
syntax varlist, PARTition(numlist integer >0) [SCOrename(string) TCONVdiv(real 0.4) convdivboxplots]
preserve
qui set autotabgraphs on
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit 119
}
local P:word count `partition'
if "`scorename'" !="" {
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit 119
}
}
qui detect `varlist', partition(`partition')
matrix A = r(Corrrestscores)
matrix B = r(Corrscores)
local i = 1
local y = 1
foreach x in `partition' {
if `i' == 1 local s = `x'
else local s = `s' +`x'
forvalues z = `y'/`s' {
matrix B[`z',`i'] = A[`z',`i']
}
local `i++'
local y = `s'+1
}
if "`scorename'"!="" {
matrix colnames B = `scorename'
}
else {
local name
local nname
forvalues i = 1/`P' {
local name "Dim`i'"
local nname `nname' `name'
}
local scorename = "`nname'"
matrix colnames B = `scorename'
}
/* coupure noms des scores */
/*
local i = 1
foreach s in `scorename' {
local len = length("`s'")
if `len' > 5 {
local c = substr("`s'",1,4)
local d = substr("`s'",-1,1)
local s`i' "`c'" "~" "`d'"
}
else local s`i' = "`s'"
local sc `sc' `s`i''
local `++i'
}
*/
/* coupure noms des items */
/*
local i = 1
foreach s in `varlist' {
local len = length("`s'")
if `len' > 10 {
local c = substr("`s'",1,9)
local d = substr("`s'",-1,1)
local var`i' "`c'" "~" "`d'"
}
else local var`i' = "`s'"
local `++i'
}
*/
local i = 1
foreach v in `varlist' {
local var`i' = abbrev("`v'",10)
local `++i'
}
local i = 1
foreach s in `scorename' {
local s`i' = abbrev("`s'",7)
local sc `sc' `s`i''
local `++i'
}
di as result "{hline}"
di "{bf:Correlation matrix}"
di "{hline}"
di
/*
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
*/
/*
tokenize `sc'
local max = 3
forvalues j=1/`P' {
local len`j' = length("`s`j''")
if `len`j'' > `max' local max = `len`j''
}
local maxit = 1
forvalues i=1/`nbvars' {
local len = length("`var`i''")
if `len' > `maxit' local maxit = `len'
}
*/
local dec = 10
local col = `dec'
local decit = 14
local colit = `decit'
local col1 = `decit'
forvalues i=1/`P' {
di _col(`col1') "{bf:`s`i''}" _c
local col1 = `col1' + `dec'
}
di
local h = (`P'-1)*`dec'+`decit'+4
di "{hline `h'}"
local i = 1
local j = 1
local y = 1
foreach p in `partition' {
if `j' == 1 local s = `p'
else local s = `s' +`p'
forvalues z = `y'/`s' {
di as text "{bf:`var`z''}" _c
local col = `decit'-1
local dd = `z' // [counting cptdiv (one per item)]
forvalues k = 1/`P' {
local t = B[`z',`k']
local t : di %6.3f `t'
if `k' == `i' {
if `t' < `tconvdiv' {
di in red _col(`col') "{bf:`t'}" _c
local cptconv = `cptconv'+1
local col = `col' + `dec'
}
else {
di _col(`col') "{bf:`t'}" _c
local col = `col' + `dec'
}
}
else {
if B[`z',`k'] > B[`z',`i'] {
di in red _col(`col') "`t'" _c
if `dd' == `z' local cptdiv = `cptdiv'+1 // [one per item]
local dd = 0
local col = `col' + `dec'
}
else {
di as text _col(`col') "{text:`t'}"_c
local col = `col' + `dec'
}
}
}
di
}
di "{dup `h':-}"
local `i++'
local `j++'
local y = `s'+1
}
local y = 1
local h = 1
local np : word count `partition'
foreach p in `partition' {
if `h' == 1 local s = `p'
else local s = `s' +`p'
forvalues j = 1/`np' {
mat C_`h'_`j' = B[`y'..`s',`j']
tempvar tp_`h'_`j'
mat colnames C_`h'_`j' = `tp_`h'_`j''
svmat C_`h'_`j', names(col)
*rename C_`h'_`j'
*mat li C_`h'_`j'
}
local `++h'
local y = `s'+1
}
if "`convdivboxplots'" != "" {
forvalues h = 1/`np' {
tokenize `scorename'
local call = ""
local callbox = ""
local callleg = ""
forvalues j = 1/`np' {
*rename C_`h'_`j' _``j''
*di "`tp_`h'_`j''"
local call `call' /*_``j''*/ `tp_`h'_`j''
*if `h' == `j' local color = "blue"
*else local color = ""
local callbox `callbox' box(`j',fcolor(`color') lcolor(`color')) marker(`j', mcolor(`color'))
local lab = "``j''"
local lab = `"`lab'"'
local callleg `callleg' `j' "`lab'"
*di `"`callleg'"'
*di "`call'"
}
graph box `call', name("Conv_div_``h''",replace) `callbox' legend(order(`"`callleg'"') stack rows(1) size(small)) title(Correlations between items of ``h'' and dimensions) yline(`tconvdiv', lpattern(dot) lcolor(black))
qui set autotabgraphs on
*drop `call'
}
}
/*
foreach var of varlist ioc1 ioc2 ioc3 {
loc varlab `""`:var l `var''""'
loc varlabs `"`varlabs'`varlab'"'
di "`varlab'"
}
*/
/*
svmat B
local y = 1
local h = 1
qui gen d = 0
foreach p in `partition' {
if `h' == 1 local s = `p'
else local s = `s' +`p'
replace d = `h' in `y'/`s'
local `++h'
local y = `s'+1
}
twoway (scatter B1 d)
*/
local t : di %5.3f `tconvdiv'
local p1 = (`nbvars'-`cptconv')/`nbvars'*100
local p1 : di %4.1f `p1'
local p2 = (`nbvars'-`cptdiv')/`nbvars'*100
local p2 : di %4.1f `p2'
di
di as result "Convergent validity:" _c
di as text " `=`nbvars'-`cptconv''/`nbvars' items (`p1'%) have a correlation coefficient with the score of "
di _col(22) "their own dimension greater than `t'"
di
di as result "Divergent validity:" _c
di as text " `=`nbvars'-`cptdiv''/`nbvars' items (`p2'%) have a correlation coefficient with the score"
di _col(22) "of their own dimension greater than those computed with other scores."
end
*convdiv ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(Hddfdfdffda PSE W BCC Afdfdfererdfc AE LI MOC) tconvdiv(0.4) // convdivboxplots
*convdiv x1-x40, partition(5 5 5 5 5 5 5 5) scorename(Hdfda PSE W BCC Afdfdfererdfc AE LI MOC) tconvdiv(0.4) // convdivboxplots