
********************************************************************************
***  Do-File zum Lehrbuch "Statistik. Eine Einfhrung fr Sozialwissenschaftler"
***  (c) 2014 Wolfgang Ludwig-Mayerhofer, Uta Liebeskind, Ferdinand Geiler
********************************************************************************

************************************
*** Kap. 6: Regressionsanalyse  ****
************************************

clear
set more off
version 13.0  // Version gegebenenfalls anpassen
capture log close


* ---------------------------------------------------------------------------- *


*** 6.1 - Idee der linearen Regressionsanalyse

* Beispiel 6.1 (Abb. 6.1)
use "oecd_llg.dta", clear

gen touse = !mi(enrol_cc, lfp07_mom, pubexp_child05)

generate str country2 = "."
replace country2 = "AU" if country == "Australia"
replace country2 = "AT" if country == "Austria"
replace country2 = "BE" if country == "Belgium"
replace country2 = "CA" if country == "Canada"
replace country2 = "DK" if country == "Denmark"
replace country2 = "FI" if country == "Finland"
replace country2 = "FR" if country == "France"
replace country2 = "DE" if country == "Germany"
replace country2 = "GR" if country == "Greece"
replace country2 = "IE" if country == "Ireland"
replace country2 = "IT" if country == "Italy"
replace country2 = "JP" if country == "Japan"
replace country2 = "NL" if country == "Netherlands"
replace country2 = "NZ" if country == "New Zealand"
replace country2 = "NO" if country == "Norway"
replace country2 = "PT" if country == "Portugal"
replace country2 = "ES" if country == "Spain"
replace country2 = "SE" if country == "Sweden"
replace country2 = "CH" if country == "Switzerland"
replace country2 = "GB" if country == "UK"
replace country2 = "US" if country == "US"

#delimit ;
twoway scatter enrol_cc lfp07_mom if touse,
mlab(country2) mlabposition(12) mlabsize(*1.5)
ylab(0(10)70, nogrid labsize(*1.5)) yscale(range(0 70)) ylabel(,angle(0))
xlab(, labsize(*1.5))
msize(*1.5) mcolor(gs0) mlabcolor(gs0)
plotregion(style(none))
xtitle(" " "Mttererwerbsbeteiligung", size(*1.5)) 
ytitle("Betreuungsquote fr Kinder" "unter sechs Jahren", size(*1.5)) 
legend(off) graphregion(color(white)) mlabcolor(gs0);
#delimit cr
graph export "scatterplot1.pdf", replace

#delimit ;
twoway scatter enrol_cc lfp07_mom if touse,
mlab(country2) mlabposition(12) mlabsize(*1.5)
ylab(0(10)70, nogrid labsize(*1.5)) yscale(range(0 70)) ylabel(,angle(0))
xlab(, labsize(*1.5))
msize(*1.5) mcolor(gs0) mlabcolor(gs0) mlabcolor(gs0)
|| lfit enrol_cc lfp07_mom if touse, 
lcolor(gs0) lwidth(*1.5)
plotregion(style(none))
xtitle(" " "Mttererwerbsbeteiligung", size(*1.5)) 
ytitle("Betreuungsquote fr Kinder" "unter sechs Jahren", size(*1.5)) 
legend(off) graphregion(color(white));
#delimit cr
graph export "scatterplot2.pdf", replace

regress enrol_cc lfp07_mom if touse


* Abbildung 6.2
* Vorhersagewert fr die Niederlande 
scalar xd = lfp07_mom[13] // die Niederlande stehen an 13. Stelle im Datensatz
scalar yd = enrol_cc[13]

summarize enrol_cc if lfp07_mom != . & enrol_cc != . & touse
scalar ymean = r(mean)

scalar predd = _b[_cons] + xd * _b[lfp07_mom]


di xd		// x-Wert von Niederlande
di ymean	// Mittelwert y-Variable
di yd		// y-Wert von Nierlande
di predd	// Vorhergesagter y-Wert fr Niederlande

di yd - ymean 		// Gesamtstreuung Niederlande
di predd - ymean 	// erklrte Streuung Nierderlande
di yd - predd 		// nicht-erklrte Streuung Nierdelande



*** 6.2 - Aufstellen eines Regressionsmodells in der Praxis

* Beispiel 6.2 (Tabelle 6.1)
use "soep_llg.dta", clear

recode upsbil (6 = 1 "Kein Abschluss") (1 = 2 "Hauptschule") ///
(2 = 3 "Realschule") (3 4 = 4 "(Fach-) Abitur") ///
(5 = .b "Anderer Abschluss") (7 = .c "Noch kein Abschluss"), gen(edu)
lab var edu "Hchster Schulabschluss"

tab edu, gen(edu_)
lab var edu_1 "Kein Abschluss"
lab var edu_2 "Hauptschule"
lab var edu_3 "Realschule"
lab var edu_4 "(Fach-) Abitur"

lab var alter "Alter"
lab var bweight "Krpergewicht in kg"

svyset psu [pweight=pw], strata(strat) 
svydes

gen touse = !mi(pcs, bweight, alter, geschl, edu_1, edu_2, edu_3, edu_4)

svy: regress pcs bweight alter geschl edu_1 edu_2 edu_3 if touse
est store mod1

/* Die Regressionstabelle lsst sich mit einigen Vorbereitungsschritten 
     wie folgt auch als fertig formatierte LaTeX-Tabelle abspeichern. */
foreach v of varlist edu_1 edu_2 edu_3 edu_4 {
label variable `v' `"- `: variable label `v''"'
}

lab var pcs "Physische Gesundheit"
lab var bweight "Krpergewicht (in kg)"
lab var alter "Alter"
lab var geschl "Mnnlich \emph{(Ref.: Weiblich)}"

esttab mod1 ///
using "reg_table1.tex", ///
replace varlabels(_cons "Konstante") ///
refcat(bweight "\emph{Haupteffekt:}" ///
alter "\emph{Kontrollvariablen:}" ///
edu_1 "{Hchster Schulabschluss \emph{(Ref.: (Fach-) Abitur)}:}", nolabel) ///
title("Multiple Lineare Regression - Physische Gesundheit") ///
label ///
b(%12.3f) t(%12.3f) ///
stat(N r2, fmt(0 3) labels(`"Beobachtungen"' `"\(R^{2}\)"')) se ///
star(* 0.10 ** 0.05 *** 0.01) ///
addnotes(Datenbasis: SOEP 2004; gewichtet)


* Beispiel 6.3
svy: regress pcs bweight c.alter##c.alter geschl edu_1 edu_2 edu_3 if touse

margins, at(alter=(17(5)95))

marginsplot, noci xlabel(20(10)95, labsize(*1.2)) ///
ylabel(30 (5) 60, nogrid labsize(*1.2) angle(0)) ///
title(" ") plot1opts(msize(*1.2) lwidth(*1.2) mcolor(gs0)) ///
l1title("Vorhersage physische Gesundheit", size(*1.2)) ///
xtitle(" " "Alter", size(*1.2)) ///
graphregion(color(white))
gr_edit .yaxis1.title.draw_view.setstyle, style(no)
graph export "marginsplot1.pdf", replace


* Beispiel 6.4
svy: regress pcs c.bweight##i.geschl c.alter edu_1 edu_2 edu_3 if touse

margins geschl, at(bweight=(40(10)120))

marginsplot, noci legend(size(*1.2)) ///
plot1opts(lpattern(dash) lwidth(*1.2)) ///
plot2opts(lpattern(solid) lwidth(*1.2)) ///
recast(line) ylab(42 (2) 52, nogrid) ///
title(" ") ///
xlabel(40(10)120, labsize(*1.2)) ///
ylabel(,angle(0) labsize(*1.2)) ///
l1title("Vorhersage physische Gesundheit", size(*1.2)) ///
xtitle(" " "Krpergewicht (in kg)", size(*1.2)) ///
graphregion(color(white))
gr_edit .yaxis1.title.draw_view.setstyle, style(no)
graph export "marginsplot2.pdf", replace



*** 6.3 - Inferenzstatistik und Regressionsdiagnostik

* Beispiel 6.5
use "oecd_llg.dta", clear

gen touse = !mi(enrol_cc, lfp07_mom, pubexp_child05)

regress enrol_cc lfp07_mom if touse
regress enrol_cc lfp07_mom pubexp_child05 if touse


* ---------------------------------------------------------------------------- *
 
 
exit 
