- df <- readxl::read_excel("dfs.xlsx")
- library(dplyr)
- library(tidyr)
- # show na rows
- df %>% filter(is.na(x1)|is.na(x2)|is.na(x3))
- # remove na obs in x3
- df <- df %>% select(-year, -category) %>% drop_na(x1, x2, x3)
- # method #1 by for loop
- df_lst <- split(df, df$obs)
- n <- length(df_lst)
- tbl <- matrix(NA, ncol = 6, nrow = n)
- for(i in 1:length(df_lst)){
- obs <- unique(df_lst[[i]]$obs)
- fit <- lm(y ~ ., data = df_lst[[i]][,-1])
- tbl[i,] <- c(obs, fit$coefficients, summary(fit)$r.squared)
- }
- colnames(tbl) <- c('obs','intercept', 'x1', 'x2', 'x3', 'r.squared')
当然,通常个人比较习惯的还是使用broom和tidyr的配合,这样根据实际情况筛选需要的统计量。可以参考这个回答。
https://stackoverflow.com/questions/22713325/fitting-several-regression-models-with-dplyr