# Knihovna pro utilities theme, a save_plot
  source("c:/users/matous/desktop/cp oddluzeni data/kódy/utils.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
  library("tidyverse")
  library("AER")
## Warning: package 'AER' was built under R version 4.4.3
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## Loading required package: survival
  options(scipen=10)

  vysledky <- read_csv2("PS2025data20251005_csv/csv/pst4p.csv")
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 179877 Columns: 51── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl (51): ID_OKRSKY, TYP_FORM, OPRAVA, CHYBA, OKRES, OBEC, OKRSEK, KC_1, KST...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  vysledky
## # A tibble: 179,877 × 51
##    ID_OKRSKY TYP_FORM OPRAVA CHYBA OKRES   OBEC OKRSEK  KC_1 KSTRANA POC_HLASU
##        <dbl>    <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>   <dbl>     <dbl>
##  1         1        2      0     0  7204 500011      1     6       3         2
##  2         1        2      0     0  7204 500011      1     6       4         1
##  3         1        2      0     0  7204 500011      1     6       6        41
##  4         1        2      0     0  7204 500011      1     6       8         5
##  5         1        2      0     0  7204 500011      1     6      10         4
##  6         1        2      0     0  7204 500011      1     6      11       114
##  7         1        2      0     0  7204 500011      1     6      12         5
##  8         1        2      0     0  7204 500011      1     6      14         1
##  9         1        2      0     0  7204 500011      1     6      15         5
## 10         1        2      0     0  7204 500011      1     6      16        42
## # ℹ 179,867 more rows
## # ℹ 41 more variables: KC_2 <dbl>, HLASY_01 <dbl>, HLASY_02 <dbl>,
## #   HLASY_03 <dbl>, HLASY_04 <dbl>, HLASY_05 <dbl>, HLASY_06 <dbl>,
## #   HLASY_07 <dbl>, HLASY_08 <dbl>, HLASY_09 <dbl>, HLASY_10 <dbl>,
## #   HLASY_11 <dbl>, HLASY_12 <dbl>, HLASY_13 <dbl>, HLASY_14 <dbl>,
## #   HLASY_15 <dbl>, HLASY_16 <dbl>, HLASY_17 <dbl>, HLASY_18 <dbl>,
## #   HLASY_19 <dbl>, HLASY_20 <dbl>, HLASY_21 <dbl>, HLASY_22 <dbl>, …
  kandidati <- read_csv2("PS2025reg20251005_csv/csv/psrk.csv", locale = locale("cs", encoding = "windows-1250"))
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 4475 Columns: 19── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr  (9): JMENO, PRIJMENI, TITULPRED, TITULZA, POVOLANI, BYDLISTEN, PLATNOST...
## dbl (10): VOLKRAJ, KSTRANA, PORCISLO, VEK, PSTRANA, NSTRANA, POCHLASU, SKRUT...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  kandidati 
## # A tibble: 4,475 × 19
##    VOLKRAJ KSTRANA PORCISLO JMENO    PRIJMENI  TITULPRED TITULZA   VEK POVOLANI 
##      <dbl>   <dbl>    <dbl> <chr>    <chr>     <chr>     <chr>   <dbl> <chr>    
##  1       1       1        1 Miloslav Zientek   <NA>      <NA>       49 IT exper…
##  2       1       1        2 Michal   Kotrba    <NA>      <NA>       46 manažer …
##  3       1       1        3 Adam     Křikava   <NA>      <NA>       51 majitel …
##  4       1       1        4 Miroslav Stejskal  <NA>      <NA>       46 manažer …
##  5       1       1        5 Tereza   Sodomková <NA>      <NA>       38 manažerk…
##  6       1       1        6 Dominika Švarcová  <NA>      <NA>       27 marketin…
##  7       1       3        1 Vladimír Štěpán    Ing.      <NA>       74 expert v…
##  8       1       3        2 Josef    Smítka    <NA>      <NA>       59 jednatel…
##  9       1       3        3 Jiří     Hudec     <NA>      <NA>       66 důchodce 
## 10       1       3        4 Radek    Vaisocher Bc.       <NA>       51 podnikat…
## # ℹ 4,465 more rows
## # ℹ 10 more variables: BYDLISTEN <chr>, PSTRANA <dbl>, NSTRANA <dbl>,
## #   PLATNOST <chr>, POCHLASU <dbl>, POCPROC <chr>, MANDAT <chr>,
## #   SKRUTINIUM <dbl>, PORADIMAND <dbl>, PORADINAHR <dbl>
  strany <- read_csv2("PS2025reg20251005_csv/csv/psrkl.csv", locale = locale("cs", encoding = "windows-1250"))
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 26 Columns: 13── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (8): NAZEVCELK, NAZEV_STRK, ZKRATKAK30, ZKRATKAK8, SLOZENI, STAVREG, PLA...
## dbl (4): KSTRANA, VSTRANA, POCSTRVKO, POCMANDCR
## lgl (1): SLOZNEPLAT
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  strany
## # A tibble: 26 × 13
##    KSTRANA VSTRANA NAZEVCELK   NAZEV_STRK ZKRATKAK30 ZKRATKAK8 POCSTRVKO SLOZENI
##      <dbl>   <dbl> <chr>       <chr>      <chr>      <chr>         <dbl> <chr>  
##  1       1    1706 "\"Rebelov… "\"Rebelo… "\"Rebelo… Rebelové          1 1706   
##  2       2    1190 "Moravské … "Moravské… "Moravské… MZH               1 1190   
##  3       3    1282 "Jasný Sig… "Jasný Si… "Jasný Si… JaSaN             1 1282   
##  4       4    1704 "VÝZVA 202… "VÝZVA 20… "VÝZVA 20… VÝZVA             1 1704   
##  5       5    1061 "SMS – Stá… "SMS – St… "SMS – St… SMSka             1 1061   
##  6       6    1114 "Svoboda a… "Svoboda … "Svoboda … SPD               1 1114   
##  7       7     759 "ČSSD – Če… "ČSSD – Č… "Česká su… ČSSD              1 759    
##  8       8    1245 "PŘÍSAHA o… "PŘÍSAHA … "PŘÍSAHA … PŘÍSAHA           1 1245   
##  9       9      74 "Levice"    "Levice"   "Levice"   Levice            1 074    
## 10      10    1283 "Česká rep… "Česká re… "Česká re… ČR1               1 1283   
## # ℹ 16 more rows
## # ℹ 5 more variables: STAVREG <chr>, PLAT_STR <chr>, SLOZNEPLAT <lgl>,
## #   POCMANDCR <dbl>, NAZEVPLNY <chr>
  kraje <- read_csv2("PS2025ciselniky20251005_csv/csv/psvolkr.csv", locale = locale("cs", encoding = "windows-1250"))
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 14 Columns: 4── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): NAZVOLKRAJ
## dbl (3): VOLKRAJ, KRAJ, MAXKAND
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  kraje
## # A tibble: 14 × 4
##    VOLKRAJ NAZVOLKRAJ       KRAJ MAXKAND
##      <dbl> <chr>           <dbl>   <dbl>
##  1       1 Hl. m. Praha     1100      36
##  2       2 Středočeský      2100      34
##  3       3 Jihočeský        3100      22
##  4       4 Plzeňský         3200      20
##  5       5 Karlovarský      4100      14
##  6       6 Ústecký          4200      26
##  7       7 Liberecký        5100      17
##  8       8 Královéhradecký  5200      20
##  9       9 Pardubický       5300      19
## 10      10 Vysočina         6100      20
## 11      11 Jihomoravský     6200      34
## 12      12 Olomoucký        7100      23
## 13      13 Zlínský          7200      22
## 14      14 Moravskoslezský  8100      36
  obce <- read_csv2("PS2025ciselniky20251005_csv/csv/pscoco.csv", locale = locale("cs", encoding = "windows-1250")) %>% select(OBEC, VOLKRAJ)
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 6389 Columns: 10── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): NAZEVOBCE
## dbl (9): KRAJ, OKRES, CPOU, ORP, OBEC, VOLKRAJ, MINOKRSEK1, MAXOKRSEK1, OBEC...
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  zahrnute_strany <- c(6, 11, 16, 20, 22, 23, 25)
   
    
    
  nejvyssi_titul <- function(titul) {
    if(is.na(titul)) {
      return(0)
    }
    else if(str_detect(titul, "prof.")) {
      return(5)
    }
    else if(str_detect(titul, "(doc.|Csc.)")) {
      return(4)
    }
    else if(str_detect(titul, "Ph.D.")) {
      return(3)
    }
    else if(str_detect(titul, "(Ing.|Mgr.|MA.|MBA|LL.M.|MUDr.|JUDr.|PhDr.|RnDr.|MgA.|MVDr.|PharmDr.|PaeDr.|MSc.)")) {
      return(2)
    }
    else if(str_detect(titul, "(Bc.|BcA.|DiS.)")) {
      return(1)
    }
    else {
      return(0)
    }
  }
  
  vysledky_clean <- vysledky %>%
    select(OBEC, KSTRANA, POC_HLASU, starts_with("HLASY_")) %>%
    inner_join(obce, by = "OBEC") %>%
    filter(KSTRANA %in% zahrnute_strany) %>%
    summarise(.by = c(VOLKRAJ, KSTRANA), across(c(POC_HLASU, starts_with("HLASY_")), sum)) %>%
    pivot_longer(starts_with("HLASY_"), names_to = "PORCISLO", values_to = "preferencni_hlasy", names_transform = \(x) as.integer(str_remove(x, "HLASY_"))) %>%
    mutate(procento_preferencnich_hlasu = preferencni_hlasy / POC_HLASU) %>%
    inner_join(kandidati %>% select(VOLKRAJ, KSTRANA, PORCISLO, JMENO, PRIJMENI, TITULPRED, TITULZA, VEK, PSTRANA), by = c("VOLKRAJ", "KSTRANA", "PORCISLO")) %>%
    inner_join(strany %>% select(KSTRANA, STRANA = ZKRATKAK8), by = "KSTRANA")  %>%
    mutate(
      nejvyssi_titul = map2_int(TITULPRED, TITULZA, \(x, y) max(nejvyssi_titul(x), nejvyssi_titul(y))),
      pohlavi = factor(if_else(str_detect(PRIJMENI, "(ová|á|Demetrashivili|Decroix|Rázga|Crkvenjaš)$"), "F", "M"), levels = c("M", "F") )
    ) %>%
    mutate(.by = c(VOLKRAJ, KSTRANA, pohlavi), pohlavi_na_kandidatce = row_number(PORCISLO))
  
  vysledky_clean
## # A tibble: 2,396 × 16
##    VOLKRAJ KSTRANA POC_HLASU PORCISLO preferencni_hlasy procento_preferencnich…¹
##      <dbl>   <dbl>     <dbl>    <dbl>             <dbl>                    <dbl>
##  1      13       6     27784        1              3259                   0.117 
##  2      13       6     27784        2              1871                   0.0673
##  3      13       6     27784        3               785                   0.0283
##  4      13       6     27784        4              1089                   0.0392
##  5      13       6     27784        5               685                   0.0247
##  6      13       6     27784        6               435                   0.0157
##  7      13       6     27784        7               310                   0.0112
##  8      13       6     27784        8              2253                   0.0811
##  9      13       6     27784        9               552                   0.0199
## 10      13       6     27784       10               439                   0.0158
## # ℹ 2,386 more rows
## # ℹ abbreviated name: ¹​procento_preferencnich_hlasu
## # ℹ 10 more variables: JMENO <chr>, PRIJMENI <chr>, TITULPRED <chr>,
## #   TITULZA <chr>, VEK <dbl>, PSTRANA <dbl>, STRANA <chr>,
## #   nejvyssi_titul <int>, pohlavi <fct>, pohlavi_na_kandidatce <int>
  model <- tobit(
    procento_preferencnich_hlasu ~ I(cut(PORCISLO, c(1, 3, 5, 10, 15))) + I(VEK <= 30)*STRANA + nejvyssi_titul + pohlavi*STRANA,
    data = vysledky_clean %>% filter(PORCISLO <= 15),
    left = 0
  )
  
  summary(model)
## 
## Call:
## tobit(formula = procento_preferencnich_hlasu ~ I(cut(PORCISLO, 
##     c(1, 3, 5, 10, 15))) + I(VEK <= 30) * STRANA + nejvyssi_titul + 
##     pohlavi * STRANA, left = 0, data = vysledky_clean %>% filter(PORCISLO <= 
##     15))
## 
## Observations: (98 observations deleted due to missingness)
##          Total  Left-censored     Uncensored Right-censored 
##           1365              2           1363              0 
## 
## Coefficients:
##                                               Estimate Std. Error  z value
## (Intercept)                                  0.0456026  0.0024427   18.669
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(3,5]   -0.0207586  0.0020745  -10.007
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(5,10]  -0.0363358  0.0017450  -20.823
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(10,15] -0.0446298  0.0017664  -25.265
## I(VEK <= 30)TRUE                             0.0041220  0.0063788    0.646
## STRANAAUTO                                   0.0005756  0.0024613    0.234
## STRANAPiráti                                 0.0173375  0.0026742    6.483
## STRANASPD                                    0.0034914  0.0024901    1.402
## STRANASPOLU                                  0.0281675  0.0024588   11.456
## STRANAStačilo!                               0.0155291  0.0024754    6.273
## STRANASTAN                                   0.0122896  0.0025904    4.744
## nejvyssi_titul                               0.0028495  0.0005638    5.054
## pohlaviF                                     0.0034035  0.0033186    1.026
## I(VEK <= 30)TRUE:STRANAAUTO                  0.0045219  0.0082317    0.549
## I(VEK <= 30)TRUE:STRANAPiráti                0.0261316  0.0076116    3.433
## I(VEK <= 30)TRUE:STRANASPD                   0.0045764  0.0097586    0.469
## I(VEK <= 30)TRUE:STRANASPOLU                 0.0009624  0.0078684    0.122
## I(VEK <= 30)TRUE:STRANAStačilo!             -0.0053179  0.0088262   -0.603
## I(VEK <= 30)TRUE:STRANASTAN                  0.0226573  0.0077453    2.925
## STRANAAUTO:pohlaviF                         -0.0014557  0.0051763   -0.281
## STRANAPiráti:pohlaviF                        0.0304413  0.0044703    6.810
## STRANASPD:pohlaviF                          -0.0013954  0.0047059   -0.297
## STRANASPOLU:pohlaviF                         0.0038348  0.0048932    0.784
## STRANAStačilo!:pohlaviF                      0.0082897  0.0049128    1.687
## STRANASTAN:pohlaviF                          0.0105757  0.0045604    2.319
## Log(scale)                                  -3.8888377  0.0191607 -202.959
##                                                     Pr(>|z|)    
## (Intercept)                                          < 2e-16 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(3,5]            < 2e-16 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(5,10]           < 2e-16 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(10,15]          < 2e-16 ***
## I(VEK <= 30)TRUE                                    0.518151    
## STRANAAUTO                                          0.815100    
## STRANAPiráti                                0.00000000008975 ***
## STRANASPD                                           0.160869    
## STRANASPOLU                                          < 2e-16 ***
## STRANAStačilo!                              0.00000000035348 ***
## STRANASTAN                                  0.00000209240244 ***
## nejvyssi_titul                              0.00000043296589 ***
## pohlaviF                                            0.305085    
## I(VEK <= 30)TRUE:STRANAAUTO                         0.582777    
## I(VEK <= 30)TRUE:STRANAPiráti                       0.000597 ***
## I(VEK <= 30)TRUE:STRANASPD                          0.639098    
## I(VEK <= 30)TRUE:STRANASPOLU                        0.902649    
## I(VEK <= 30)TRUE:STRANAStačilo!                     0.546829    
## I(VEK <= 30)TRUE:STRANASTAN                         0.003441 ** 
## STRANAAUTO:pohlaviF                                 0.778534    
## STRANAPiráti:pohlaviF                       0.00000000000978 ***
## STRANASPD:pohlaviF                                  0.766837    
## STRANASPOLU:pohlaviF                                0.433217    
## STRANAStačilo!:pohlaviF                             0.091532 .  
## STRANASTAN:pohlaviF                                 0.020393 *  
## Log(scale)                                           < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale: 0.02047 
## 
## Gaussian distribution
## Number of Newton-Raphson Iterations: 5 
## Log-likelihood:  3364 on 26 Df
## Wald-statistic:  1656 on 24 Df, p-value: < 2.22e-16
  summary(tobit(
    procento_preferencnich_hlasu ~ I(cut(PORCISLO, c(1, 3, 5, 10, 15))) + I(VEK <= 30)*STRANA + pohlavi*nejvyssi_titul + pohlavi*STRANA,
    data = vysledky_clean %>% filter(PORCISLO <= 15, STRANA %in% c("Piráti", "STAN")),
    left = 0
  ))
## 
## Call:
## tobit(formula = procento_preferencnich_hlasu ~ I(cut(PORCISLO, 
##     c(1, 3, 5, 10, 15))) + I(VEK <= 30) * STRANA + pohlavi * 
##     nejvyssi_titul + pohlavi * STRANA, left = 0, data = vysledky_clean %>% 
##     filter(PORCISLO <= 15, STRANA %in% c("Piráti", "STAN")))
## 
## Observations: (28 observations deleted due to missingness)
##          Total  Left-censored     Uncensored Right-censored 
##            390              0            390              0 
## 
## Coefficients:
##                                               Estimate Std. Error  z value
## (Intercept)                                  0.0686965  0.0043477   15.800
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(3,5]   -0.0255292  0.0044819   -5.696
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(5,10]  -0.0451459  0.0037580  -12.013
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(10,15] -0.0554026  0.0037621  -14.727
## I(VEK <= 30)TRUE                             0.0316995  0.0049358    6.422
## STRANASTAN                                  -0.0052068  0.0031955   -1.629
## pohlaviF                                     0.0310849  0.0050674    6.134
## nejvyssi_titul                               0.0043963  0.0014976    2.936
## I(VEK <= 30)TRUE:STRANASTAN                 -0.0036620  0.0070385   -0.520
## pohlaviF:nejvyssi_titul                      0.0009958  0.0025056    0.397
## STRANASTAN:pohlaviF                         -0.0191228  0.0049896   -3.833
## Log(scale)                                  -3.7492350  0.0358057 -104.710
##                                                   Pr(>|z|)    
## (Intercept)                                        < 2e-16 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(3,5]   0.000000012258 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(5,10]         < 2e-16 ***
## I(cut(PORCISLO, c(1, 3, 5, 10, 15)))(10,15]        < 2e-16 ***
## I(VEK <= 30)TRUE                            0.000000000134 ***
## STRANASTAN                                        0.103221    
## pohlaviF                                    0.000000000855 ***
## nejvyssi_titul                                    0.003330 ** 
## I(VEK <= 30)TRUE:STRANASTAN                       0.602869    
## pohlaviF:nejvyssi_titul                           0.691052    
## STRANASTAN:pohlaviF                               0.000127 ***
## Log(scale)                                         < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Scale: 0.02354 
## 
## Gaussian distribution
## Number of Newton-Raphson Iterations: 5 
## Log-likelihood: 908.8 on 12 Df
## Wald-statistic: 502.3 on 10 Df, p-value: < 2.22e-16
koef <- tibble(
    variable = names(summary(model)$coefficients[,1]),
    estimate = summary(model)$coefficients[,1],
    se = summary(model)$coefficients[,2],
    z = summary(model)$coefficients[,3],
    p = summary(model)$coefficients[,4]
  )
  
  crit_val90 <- qnorm(0.95)
  crit_val95 <- qnorm(0.975)
  crit_val99 <- qnorm(0.995)
  save_plot("Kroužky ženy", plot = koef %>%
    filter(str_detect(variable, ":pohlavi")) %>%
    mutate(strana = replace_na(str_extract(variable, "STRANA([\\w!]+):", 1), "ANO")) %>%
    ggplot(aes(x = reorder(strana, desc(estimate)), y = estimate)) +
    geom_hline(yintercept = 0) +
    geom_pointrange(aes(ymin = estimate - crit_val90*se, ymax = estimate + crit_val90*se), alpha = 0.5, linewidth = 1.25, size = 0.75, colour = "#515") +
    geom_pointrange(aes(ymin = estimate - crit_val95*se, ymax = estimate + crit_val95*se), alpha = 0.5, linewidth = 1.25, size = 0.75, colour = "#515") +
    geom_pointrange(aes(ymin = estimate - crit_val99*se, ymax = estimate + crit_val99*se), alpha = 0.5, linewidth = 1.25, size = 0.75, colour = "#515") +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = c(seq(-0.05, 0.05, 0.01))) +
    theme_mf() +
    labs(
      title = "Ženy a mladé kandidáty\\*ky kroužkovali častěji pouze voliči Pirátů a STANu",
      subtitle = "",
      caption = "Výsledky očištěné od vlivu pořadí na kandidátce, věku a dosaženého vzdělání pomocí Tobit modelu.\nLinky zobrazují statistickou chybu s 90%, 95% a 99% CI.",
      x = "",
      y = "Boost preferenčních hlasů u žen"
    ))

pohlavi <- vysledky_clean %>%
  filter(PORCISLO <= 15) %>%
  summarise(.by = c(STRANA, pohlavi), preferencni_hlasy = sum(preferencni_hlasy), pocet_kandidatu = n())  %>%
  pivot_longer(c(preferencni_hlasy, pocet_kandidatu)) %>%
  pivot_wider(names_from = pohlavi, values_from = value) %>%
  mutate(celkem = `F` + M, pomer_zen = `F` / celkem) %>%
  select(-`F`, -M, -celkem) 

  pohlavi
## # A tibble: 14 × 3
##    STRANA   name              pomer_zen
##    <chr>    <chr>                 <dbl>
##  1 SPD      preferencni_hlasy     0.253
##  2 SPD      pocet_kandidatu       0.268
##  3 SPOLU    preferencni_hlasy     0.221
##  4 SPOLU    pocet_kandidatu       0.206
##  5 Piráti   preferencni_hlasy     0.509
##  6 Piráti   pocet_kandidatu       0.388
##  7 AUTO     preferencni_hlasy     0.138
##  8 AUTO     pocet_kandidatu       0.163
##  9 ANO      preferencni_hlasy     0.278
## 10 ANO      pocet_kandidatu       0.282
## 11 STAN     preferencni_hlasy     0.455
## 12 STAN     pocet_kandidatu       0.335
## 13 Stačilo! preferencni_hlasy     0.370
## 14 Stačilo! pocet_kandidatu       0.220
  save_plot("Pozice a křížky", plot = vysledky_clean %>%
  mutate(STRANA = case_when(STRANA == "Piráti" ~ "Piráti", STRANA == "STAN" ~ "STAN", .default = "Ostatní"),
         pohlavi = if_else(pohlavi == "M", "Muži", "Ženy")) %>%
  mutate(.by = c(VOLKRAJ, STRANA, pohlavi), nta_zena_muz = row_number(PORCISLO)) %>%
  summarise(.by = c(STRANA, pohlavi, nta_zena_muz), procento_preferencnich_hlasu = mean(procento_preferencnich_hlasu)) %>%
  filter(nta_zena_muz <= 10) %>%
  ggplot(aes(x = as.factor(nta_zena_muz), y = procento_preferencnich_hlasu, colour = pohlavi, group = pohlavi)) +
  geom_point() +
  geom_line() +
  facet_wrap(~fct_rev(STRANA)) +
  #facet_grid(fct_rev(STRANA) ~ pohlavi) +
  scale_y_continuous(labels = scales::percent, limits = c(0, NA)) +
  scale_colour_manual(values = c("#55a", "#e55")) +
  theme_mf() +
  labs(title = "Nezdá se, že by voliči Pirátů a STANu kroužkovali jen první dvě / čtyři ženy -- distribuci kroužků mají podobnou jako u můžů",
       x = "Pořádí kandidáta*ky daného genderu na kanidátce",
       y = "Průměrné procento preferenčních hlasů",
       caption = "Průměrné procento preferenčních hlasů podle pořadí na kandidátce napříč všemi volebními kraji",
       colour = "") +
    theme(legend.position = "top"), save_width = 8, save_height = 5)

pohlavi
## # A tibble: 14 × 3
##    STRANA   name              pomer_zen
##    <chr>    <chr>                 <dbl>
##  1 SPD      preferencni_hlasy     0.253
##  2 SPD      pocet_kandidatu       0.268
##  3 SPOLU    preferencni_hlasy     0.221
##  4 SPOLU    pocet_kandidatu       0.206
##  5 Piráti   preferencni_hlasy     0.509
##  6 Piráti   pocet_kandidatu       0.388
##  7 AUTO     preferencni_hlasy     0.138
##  8 AUTO     pocet_kandidatu       0.163
##  9 ANO      preferencni_hlasy     0.278
## 10 ANO      pocet_kandidatu       0.282
## 11 STAN     preferencni_hlasy     0.455
## 12 STAN     pocet_kandidatu       0.335
## 13 Stačilo! preferencni_hlasy     0.370
## 14 Stačilo! pocet_kandidatu       0.220
save_plot("Poměry kandidátek a kroužků", plot =
pohlavi %>%
  ggplot(aes(x = reorder(STRANA, desc(pomer_zen)), y = pomer_zen, fill = fct_rev(name))) +
  geom_hline(yintercept=  0.5, linetype = "dashed") +
  geom_col(position = "dodge") +
  theme_mf() +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  scale_fill_manual(values = c("#338", "#a55"), label = c("Procento preferenčních hlasů ženám", "Procento žen na kandidátkách")) +
  labs(
    title = "I u Pirátů a STANu udělili voliči ženám jen asi polovinu preferenčních hlasů. Každá žena ale dostala více hlasů, protože se hlasy rozdělily mezi méně žen",
    x = "",
    y = "", #"Procento preferenčních hlasů pro ženy\nProcento žen na kandidátkách",
    fill = ""
  ) +
  theme(legend.position = "top"))

save_plot("Kroužky pohlaví", plot =
pohlavi %>%
  filter(name == "preferencni_hlasy") %>%
  ggplot(aes(x = reorder(STRANA, desc(pomer_zen)), y = pomer_zen)) +
  geom_hline(yintercept=  0.5, linetype = "dashed") +
  geom_col(fill = "#338") + 
  theme_mf() +
  scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
  labs(
    title = "I Piráti a STAN udělili ženám jen asi polovinu preferenčních hlasů. Každá žena ale dostala více hlasů, protože se hlasy rozdělily mezi méně žen",
    x = "",
    y = "Procento preferenčních hlasů pro ženy",
  ) )