我的数据是2004年1月到2019年12月的月度时间序列数据,共192个样本点。想用for循环进行1到7步预测,采用expanding的来滚动数据,模型用的是SARIMAX模型。因为进行多步预测,想先将解释变量预测了之后再预测被解释变量,但是我写的代码在预测x的时候出现了问题,AI了很久,还是没搞懂问题出在哪。球球大家帮帮忙呀  [em17]
下面是我的代码:
- usadata_y<-usadata[1:192,c(5,9)]##2004.01-2019.12
- usadata_x<-usadata[1:192,c(10,11,12,13,14,16,18,31,32,33)]##2004.01-2019.12
- #最大最小归一化
- min_max_norm_col <- function(mat) {
- apply(mat, 2, function(x) (x - min(x)) / (max(x) - min(x)))
- }
- ##1.log,预测x
- n_cols <- ncol(usadata_x)
- arima_predict_x<- array(NA, c(36,7,n_cols))#steps:7,obs:36
- test_x <- array(NA, c(36,7,n_cols))
- sarimax_mutil_step_forecast_values <- array(NA, c(36,7))
- sarimax_predict <- array(NA, c(36,7))
- real_values <- array(NA, c(36,7))
- dimnames(test_x) <- list(NULL, NULL, colnames(usadata_x))
- for (steps in 1:7) {
- for (i in 1:(37-steps)) {
- cat("steps",steps,"观测点",i)
-
- # 训练集和测试集数据
- train_data <- usadata_y[(1+steps):(155 + i),2] #Y训练集
- #test_data <- data_number[(395+i):(395+i+steps-1)]
- train_data_in<-ts(train_data,frequency=12,start = c(2004,01))#7
-
- datax1<-min_max_norm_col(usadata_x[1:(155+i),])
- datax2 <- as.data.frame(datax1)
-
- # 自变量(非虚拟变量)
- train_x <- as.matrix(datax2[1:(155+i-steps),]) #X训练集,有滞后
- for (j in 1:n_cols) {
- arima_model_x_j <- auto.arima(train_x[, j])
- arima_predict_x[i, steps, j] <- forecast(arima_model_x_j, h = steps)$mean[steps]
- }
- test_x[i,steps,] <- arima_predict_x[i,steps,]
- xreg_for_forecast <- as.matrix(test_x[i, steps, ])# 未来的外生变量
-
- # 使用auto.arima拟合SARIMA模型
- sarima_model <- auto.arima(train_data_in,xreg = train_x, seasonal = TRUE)
- sarimax_predict[i,steps] <- forecast(sarima_model, xreg = xreg_for_forecast,
- h=steps)$mean[steps]
- sarimax_mutil_step_forecast_values[i,steps] <- exp(sarimax_predict[i,steps])
- real_values[i,steps] <- usadata[(156+i+steps-1),5]
- }
- }
复制代码
|