
********************************************************************************
***  Do-File zum Lehrbuch "Statistik. Eine Einfhrung fr Sozialwissenschaftler"
***  (c) 2014 Wolfgang Ludwig-Mayerhofer, Uta Liebeskind, Ferdinand Geiler
********************************************************************************

************************************
*** Kap. 4: Bivariate Analysen  ****
************************************

clear
set more off
version 13.0  // Version gegebenenfalls anpassen
capture log close


* ---------------------------------------------------------------------------- *


*** 5.1 - Analyse von Kreuztabellen
 
* Beispiele 5.1 und 5.2 (Tabellen 5.2 und 5.4, Abb. 5.1 und 5.2)
use "glhsteach.dta", clear

recode koho (1940 = 1 "1939-1941") (1955 = 2 "1954-1956") ///
(1971 = 3 "1971") (else = .), generate(gebkoho)

tabulate jobtil25 gebkoho
tabulate jobtil25 gebkoho, column

ssc install catplot, replace
catplot jobtil25 gebkoho, asyvars stack recast(bar) name(bi_cat1) ///
ylabel(,angle(0) labsize(*1.1)) var2opts(label(labsize(*1.1))) ///
title("Absolute Hufigkeiten") l1title("Hufigkeit", size(*1.1)) ///
graphregion(color(white)) bar(1, bcolor(white) blcolor(black)) ///
bar(2, bcolor(gs10) blcolor(black))
gr_edit .scaleaxis.title.draw_view.setstyle, style(no)

catplot jobtil25 gebkoho, asyvars stack recast(bar) percent(gebkoho) ///
name(bi_cat2) ylabel(,angle(0) labsize(*1.1)) var2opts(label(labsize(*1.1))) ///
title("Bedingte Anteilswerte") l1title("Prozent", size(*1.1)) ///
graphregion(color(white)) bar(1, bcolor(white) blcolor(black)) ///
bar(2, bcolor(gs10) blcolor(black))
gr_edit .scaleaxis.title.draw_view.setstyle, style(no)

graph combine bi_cat1 bi_cat2, graphregion(color(white)) 
graph export "catplot.pdf", replace


ssc install spineplot, replace
by gebkoho jobtil25, sort: gen N = _N

spineplot jobtil25 gebkoho, text(N, mlabcolor(black) mlabsize(*1.3)) ///
legend(textwidth(vhuge)) ///
xti("Anteil Geburtskohorte" " ", axis(1)) ///
xti(" " "Geburtskohorte", axis(2)) ///
yti ("Erwerbsbeteiligung bis 25 Jahre" " ", axis(2)) ///
bar1(color(gs10) lcolor(black))  bar2(color(white) lcolor(black)) ///
graphregion(color(white)) 

graph export "spineplot.pdf", replace


* Beispiele 5.3, 5.4 und 5.5
csi 61 18 416 332 , or


* Beispiel 5.6 (Chi-Quadrat-Test)
recode job1_egp (32=31) (43=42) (60=50) (72=71), gen(j1egp_r)

label define lj1egp_r 10 "I: Obere Dienstklasse" ///
20 "II: Untere Dienstklasse" 31 "IIIa+b: Nicht-manuelle Routineberufe" ///
42 "IVb+c: Kleine Selbstndige" 50 "V+VI: Meister und Facharbeiter" ///
71 "VIIa+b: An-/ungelernte Arbeiter"
lab val j1egp_r lj1egp_r
 
tabulate j1egp_r sex if gebkoho != . , chi2



*** 5.2 - Zusammenhang zweier metrischer Merkmale

* Beispiel 5.8, Abb. 5.6a
use "oecd_llg.dta", clear

correlate enrol_cc lfp07_moms
correlate enrol_cc lfp07_moms, covariance
scatter enrol_cc lfp07_moms

* Das Streudiagramm genau wie im Buch zu erzeugen ist aufwndiger:
generate str country2 = "."
replace country2 = "AU" if country == "Australia"
replace country2 = "AT" if country == "Austria"
replace country2 = "BE" if country == "Belgium"
replace country2 = "CA" if country == "Canada"
replace country2 = "DK" if country == "Denmark"
replace country2 = "FI" if country == "Finland"
replace country2 = "FR" if country == "France"
replace country2 = "DE" if country == "Germany"
replace country2 = "GR" if country == "Greece"
replace country2 = "IE" if country == "Ireland"
replace country2 = "IT" if country == "Italy"
replace country2 = "JP" if country == "Japan"
replace country2 = "NL" if country == "Netherlands"
replace country2 = "NZ" if country == "New Zealand"
replace country2 = "NO" if country == "Norway"
replace country2 = "PT" if country == "Portugal"
replace country2 = "ES" if country == "Spain"
replace country2 = "SE" if country == "Sweden"
replace country2 = "CH" if country == "Switzerland"
replace country2 = "GB" if country == "UK"
replace country2 = "US" if country == "US"
set scheme s1mono

*Scatterplot zur Erluterung der Kovarianz
summarize enrol_cc 
local y = r(mean) 
summarize lfp07_moms
local x = r(mean)
graph twoway scatter enrol_cc lfp07_moms, ///
  mlab(country2) mlabposition(12) mlabsize(*1.2) yline(`y') ///
  xline(`x') ylab(0(10)70, nogrid labsize(*1.1)) yscale(range(0 70)) ///
  ylabel(,angle(0)) xlab(, labsize(*1.1)) ///
  msize(*1.1) mcolor(gs0) mlabcolor(gs0) mlabcolor(gs0) ///
  xtitle(" " "Mttererwerbsbeteiligung", size(*1.1)) ///
  ytitle("Betreuungsquote fr Kinder" "unter sechs Jahren", size(*1.1)) || ///
       scatteri 24.8 63.1 33.755 63.1, recast(line) lwidth(*1.5) /// 
	     lcolor(gs0) legend(off) || ///
       scatteri 24.8 68.085 33.755 68.085, recast(line) lwidth(*1.5) ///
	     lcolor(gs0) legend(off)  || ///
       scatteri 24.8 63.1 24.8 68.085, recast(line) lwidth(*1.5) /// 
	     lcolor(gs0) legend(off) || ///
       scatteri 33.755 68.085 33.755 63.1, recast(line) lwidth(*1.5) ///
	      lcolor(gs0) legend(off)

graph export "OECD_lfp-enrolment_scatter1.pdf", replace


* Beispiel 5.9, Abb. 5.6 b
use "oecd_llg.dta", clear

correlate enrol_cc lfp07_moms if country != "Germany" & country != "Austria"
gen enr_neu = enrol_cc if country != "Germany" & country != "Austria"
scatter enr_neu lfp07_moms

* Das Streudiagramm genau wie im Buch zu erzeugen ist aufwndiger:
generate str country2 = "."
replace country2 = "AU" if country == "Australia"
replace country2 = "AT" if country == "Austria"
replace country2 = "BE" if country == "Belgium"
replace country2 = "CA" if country == "Canada"
replace country2 = "DK" if country == "Denmark"
replace country2 = "FI" if country == "Finland"
replace country2 = "FR" if country == "France"
replace country2 = "DE" if country == "Germany"
replace country2 = "GR" if country == "Greece"
replace country2 = "IE" if country == "Ireland"
replace country2 = "IT" if country == "Italy"
replace country2 = "JP" if country == "Japan"
replace country2 = "NL" if country == "Netherlands"
replace country2 = "NZ" if country == "New Zealand"
replace country2 = "NO" if country == "Norway"
replace country2 = "PT" if country == "Portugal"
replace country2 = "ES" if country == "Spain"
replace country2 = "SE" if country == "Sweden"
replace country2 = "CH" if country == "Switzerland"
replace country2 = "GB" if country == "UK"
replace country2 = "US" if country == "US"
set scheme s1mono

summarize enr_neu
local y = r(mean) 
summarize lfp07_moms if country!="Germany" & country!="Austria"
local x = r(mean)
graph twoway scatter enr_neu lfp07_moms, ///
  mlab(country2) mlabposition(12) mlabsize(*1.2) yline(`y') xline(`x') ///
  ylab(0(10)70, nogrid labsize(*1.1)) yscale(range(0 70)) ///
  ylabel(,angle(0)) xlab(, labsize(*1.1)) ///
  msize(*1.1) mcolor(gs0) mlabcolor(gs0) mlabcolor(gs0) ///
  xtitle(" " "Mttererwerbsbeteiligung", size(*1.1)) ///
  ytitle("Betreuungsquote fr Kinder" "unter sechs Jahren", size(*1.1))
  
graph export "OECD_lfp-enrolment_scatter2.pdf", replace



*** 5.3 - Zusammenhaenge zwischen ordinalskalierten Merkmalen

* Beispiel 5.12
clear
input up8603 up8604
1 2
2 2
2 2
2 2
2 4
3 3
3 3
3 5
4 3
5 3
end

spearman up8603 up8604
ktau up8603 up8604
tabulate up8603 up8604, gamma

ssc install somersd, replace
somersd up8604 up8603


* Beispiel 5.13
/* Wenn polychoric noch nicht auf dem Rechner ist, muss die Prozedur im 
   Anschluss an den findit-Befehl installiert werden (siehe das sich 
   ffnende Fenster).*/
findit polychoric
polychoric up8603 up8604



*** 5.4 - Varianzanalyse

* Beispiele 5.14 und 5.15
use "oecd_llg.dta", clear

anova lfp08 EA1990
estat esize
robvar lfp08, by(EA1990)


* ---------------------------------------------------------------------------- *
 
 
exit
