前言: 这篇通过MARKDOWN写的课程,是为了教学国内的学子开发R工具包,其实R可以采用面向过程的简单函数组合发布工具包,比如国内其他人发的 pedquant package。当然国际当前最顶级的是如 rugarch 或者 VineCopula 这样调用 C/C++ 语言的工具包,前者是 S4 class package ,后者是 S3 class package.本文通过展示 ssgraph 工具包的内部来展示一个s3 class 工具包,包括它的C++ 语言函数。这个工具包说简单也简单只有一个 R 主函数和多个C++ 程序函数。说复杂也复杂,在R调用 .C读取 C++ 文件的时候读取了 Rstudio 编译过后的C++ 动态链接库文件 ,我没记错的话应该叫 .so 动态链接库。这和我们使用MATLAB 编译动态链接库 MATLAB 的.mexw64 或者以前最标准的 .dll 动态链接库文件不太一样。
作者:Daniel tulips liu . 刘旭东 Copyright © tulipsliu
2020教学001课
ssgraph NAMESPACE
useDynLib( ssgraph, .registration = TRUE )
import( "BDgraph" )
export( ssgraph,
summary.ssgraph,
plot.ssgraph,
print.ssgraph
)
S3method( "summary", "ssgraph" )
S3method( "plot" , "ssgraph" )
S3method( "print" , "ssgraph" )
ssgraph/R ssgraph.R
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# Copyright (C) 2018 - 2019 Reza Mohammadi |
# |
# This file is part of ssgraph package. |
# |
# ssgraph is free software: you can redistribute it and/or modify it under |
# the terms of the GNU General Public License as published by the Free |
# Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
# |
# Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# R code for Graphcial models based on spike and slab priors |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
ssgraph = function( data, n = NULL, method = "ggm", not.cont = NULL, iter = 5000,
burnin = iter / 2, var1 = 4e-04, var2 = 1, lambda = 1, g.prior = 0.5,
g.start = "full", sig.start = NULL, save = FALSE, print = 1000,
cores = NULL )
{
if( iter < burnin ) stop( " Number of iteration must be more than number of burn-in" )
if( var1 <= 0 ) stop( " 'var1' must be more than 0" )
if( var2 <= 0 ) stop( " 'var2' must be more than 0" )
burnin <- floor( burnin )
if( print > iter ) print = iter
cores = BDgraph::get_cores( cores = cores )
list_S_n_p = BDgraph::get_S_n_p( data = data, method = method, n = n, not.cont = not.cont )
S = list_S_n_p $ S
n = list_S_n_p $ n
p = list_S_n_p $ p
method = list_S_n_p $ method
colnames_data = list_S_n_p $ colnames_data
if( method == "gcgm" )
{
not.cont = list_S_n_p $ not.cont
R = list_S_n_p $ R
Z = list_S_n_p $ Z
data = list_S_n_p $ data
gcgm_NA = list_S_n_p $ gcgm_NA
}
g_prior = BDgraph::get_g_prior( g.prior = g.prior, p = p )
G = BDgraph::get_g_start( g.start = g.start, g_prior = g_prior, p = p )
if( ( class( g.start ) == "bdgraph" ) | ( class( g.start ) == "ssgraph" ) )
K <- g.start $ last_K
if( class( g.start ) == "sim" )
K <- g.start $ K
if( ( class( g.start ) != "sim" ) & ( class( g.start ) != "bdgraph" ) & ( class( g.start ) != "ssgraph" ) )
{
if( is.null( sig.start ) ) sigma = S else sigma = sig.start
K = solve( sigma ) # precision or concentration matrix (omega)
}
if( save == TRUE )
{
qp1 = ( p * ( p - 1 ) / 2 ) + 1
string_g = paste( c( rep( 0, qp1 ) ), collapse = '' )
sample_graphs = c( rep ( string_g, iter - burnin ) ) # vector of numbers like "10100"
graph_weights = c( rep ( 0, iter - burnin ) ) # waiting time for every state
all_graphs = c( rep ( 0, iter - burnin ) ) # vector of numbers like "10100"
all_weights = c( rep ( 1, iter - burnin ) ) # waiting time for every state
size_sample_g = 0
}
if( ( save == TRUE ) && ( p > 50 & iter > 20000 ) )
{
cat( " WARNING: Memory needs to run this function is around " )
print( ( iter - burnin ) * utils::ob ject.size( string_g ), units = "auto" )
}
p_links = matrix( 0, p, p )
K_hat = matrix( 0, p, p )
cat( paste( c( iter, " MCMC sampling ... in progress: \n" ), collapse = "" ) )
## - - main BDMCMC algorithms implemented in C++ - - - - - - - - - - - - - - - - - - - - - - - - -|
if( save == FALSE )
{
if( method == "ggm" )
{
result = .C( "ggm_spike_slab_ma", as.integer(iter), as.integer(burnin), G = as.integer(G), K = as.double(K), as.double(S), as.integer(p),
K_hat = as.double(K_hat), p_links = as.double(p_links), as.integer(n),
as.double(var1), as.double(var2), as.double(lambda), as.double(g_prior), as.integer(print), PACKAGE = "ssgraph" )
}
if( method == "gcgm" )
{
not_continuous = not.cont
result = .C( "gcgm_spike_slab_ma", as.integer(iter), as.integer(burnin), G = as.integer(G), K = as.double(K), as.double(S), as.integer(p),
K_hat = as.double(K_hat), p_links = as.double(p_links), as.integer(n),
as.double(Z), as.integer(R), as.integer(not_continuous), as.integer(gcgm_NA),
as.double(var1), as.double(var2), as.double(lambda), as.double(g_prior), as.integer(print), PACKAGE = "ssgraph" )
}
}else{
if( method == "ggm" )
{
result = .C( "ggm_spike_slab_map", as.integer(iter), as.integer(burnin), G = as.integer(G), K = as.double(K), as.double(S), as.integer(p),
K_hat = as.double(K_hat), p_links = as.double(p_links), as.integer(n),
all_graphs = as.integer(all_graphs), all_weights = as.double(all_weights),
sample_graphs = as.character(sample_graphs), graph_weights = as.double(graph_weights), size_sample_g = as.integer(size_sample_g),
as.double(var1), as.double(var2), as.double(lambda), as.double(g_prior), as.integer(print), PACKAGE = "ssgraph" )
}
if( method == "gcgm" )
{
not_continuous = not.cont
result = .C( "gcgm_spike_slab_map", as.integer(iter), as.integer(burnin), G = as.integer(G), K = as.double(K), as.double(S), as.integer(p),
K_hat = as.double(K_hat), p_links = as.double(p_links), as.integer(n),
all_graphs = as.integer(all_graphs), all_weights = as.double(all_weights),
sample_graphs = as.character(sample_graphs), graph_weights = as.double(graph_weights), size_sample_g = as.integer(size_sample_g),
as.double(Z), as.integer(R), as.integer(not_continuous), as.integer(gcgm_NA),
as.double(var1), as.double(var2), as.double(lambda), as.double(g_prior), as.integer(print), PACKAGE = "ssgraph" )
}
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
p_links = matrix( result $ p_links, p, p, dimnames = list( colnames_data, colnames_data ) )
K_hat = matrix( result $ K_hat , p, p, dimnames = list( colnames_data, colnames_data ) )
last_graph = matrix( result $ G , p, p, dimnames = list( colnames_data, colnames_data ) )
last_K = matrix( result $ K , p, p, dimnames = list( colnames_data, colnames_data ) )
p_links[ lower.tri( p_links, diag = TRUE ) ] = 0
nmc = iter - burnin
p_links = p_links / nmc
K_hat = K_hat / nmc
if( save == TRUE )
{
size_sample_g = result $ size_sample_g
sample_graphs = result $ sample_graphs[ 1 : size_sample_g ]
graph_weights = result $ graph_weights[ 1 : size_sample_g ]
all_graphs = result $ all_graphs + 1
all_weights = result $ all_weights
output = list( p_links = p_links, K_hat = K_hat, last_graph = last_graph, last_K = last_K,
sample_graphs = sample_graphs, graph_weights = graph_weights,
all_graphs = all_graphs, all_weights = all_weights )
}else{
output = list( p_links = p_links, K_hat = K_hat, last_graph = last_graph, last_K = last_K )
}
class( output ) = "ssgraph"
return( output )
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# Summary for the ssgraph boject |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
summary.ssgraph = function( ob ject, round = 2, vis = TRUE, ... )
{
p_links = ob ject $ p_links
selected_g = BDgraph::select( p_links, cut = 0.5 )
if( vis == TRUE )
{
if( !is.null( ob ject $ graph_weights ) )
op = graphics::par( mfrow = c( 2, 2 ), pty = "s", omi = c( 0.3, 0.3, 0.3, 0.3 ), mai = c( 0.3, 0.3, 0.3, 0.3 ) )
# - - - plot selected graph
sub_g = "Graph with edge posterior probability > 0.5"
BDgraph::plot.graph( selected_g, main = "Selected graph", sub = sub_g, ... )
if( !is.null( ob ject $ graph_weights ) )
{
sample_graphs = ob ject $ sample_graphs
graph_weights = ob ject $ graph_weights
sum_gWeights = sum( graph_weights )
# - - - plot posterior distribution of graph
graph_prob = graph_weights / sum_gWeights
graphics::plot( x = 1 : length( graph_weights ), y = graph_prob, type = "h", col = "gray60",
main = "Posterior probability of graphs",
ylab = "Pr(graph|data)", xlab = "graph", ylim = c( 0, max( graph_prob ) ) )
# - - - plot posterior distribution of graph size
sizesample_graphs = sapply( sample_graphs, function( x ) length( which( unlist( strsplit( as.character( x ), "" ) ) == 1 ) ) )
xx <- unique( sizesample_graphs )
weightsg <- vector()
for( i in 1 : length( xx ) ) weightsg[ i ] <- sum( graph_weights[ which( sizesample_graphs == xx[ i ] ) ] )
prob_zg = weightsg / sum_gWeights
graphics::plot( x = xx, y = prob_zg, type = "h", col = "gray10",
main = "Posterior probability of graphs size",
ylab = "Pr(graph size|data)", xlab = "Graph size",
ylim = c( 0, max( prob_zg ) ) )
# - - - plot trace of graph size
all_graphs = ob ject $ all_graphs
sizeall_graphs = sizesample_graphs[ all_graphs ]
graphics::plot( x = 1 : length( all_graphs ), sizeall_graphs, type = "l", col = "gray40",
main = "Trace of graph size", ylab = "Graph size", xlab = "Iteration" )
graphics::par( op )
}
}
return( list( selected_g = selected_g, p_links = round( p_links, round ), K_hat = round( ob ject $ K_hat, round ) ) )
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
# Plot for the ssgraph boject |
## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
plot.ssgraph = function( x, cut = 0.5, ... )
{
BDgraph::plot.graph( x, cut = cut, sub = paste0( "Edge posterior probability = ", cut ), ... )
}
调用C++ 文件的主函数
if( method == "gcgm" )
{
not_continuous = not.cont
result = .C( "gcgm_spike_slab_ma", as.integer(iter), as.integer(burnin), G = as.integer(G), K = as.double(K), as.double(S), as.integer(p),
K_hat = as.double(K_hat), p_links = as.double(p_links), as.integer(n),
as.double(Z), as.integer(R), as.integer(not_continuous), as.integer(gcgm_NA),
as.double(var1), as.double(var2), as.double(lambda), as.double(g_prior), as.integer(print), PACKAGE = "ssgraph" )
}
注意这里通过R 的内部函数 .C 调用C++ 函数,如果对R 精通的会知道这个。
ssgraph/src C++ file
copula.cpp
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Copyright (C) 2018 - 2019 Reza Mohammadi |
// |
// This file is part of ssgraph package. |
// |
// ssgraph is free software: you can redistribute it and/or modify it under |
// the terms of the GNU General Public License as published by the Free |
// Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
// |
// Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#include "copula.h"
extern "C" {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Computing mean for copula function
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void get_mean( double Z[], double K[], double *mu_ij, double *sigma, int *i, int *j, int *n, int *p )
{
int k, dim = *p, number = *n, row = *i, col = *j, jxp = col * dim;
double mu = 0.0;
for( k = 0; k < col; k++ ) mu += Z[ k * number + row ] * K[ jxp + k ];
for( k = col + 1; k < dim; k++ ) mu += Z[ k * number + row ] * K[ jxp + k ];
*mu_ij = - mu * *sigma;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Computing bounds for copula function
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void get_bounds( double Z[], int R[], double *lb, double *ub, int *i, int *j, int *n )
{
int kj, col = *j, number = *n, ij = col * number + *i;
double low_b = -1e308, upper_b = +1e308;
for( int k = 0; k < number; k++ )
{
kj = col * number + k;
if( R[ kj ] < R[ ij ] )
low_b = max( Z[ kj ], low_b );
else if( R[ kj ] > R[ ij ] )
upper_b = min( Z[ kj ], upper_b );
}
*lb = low_b;
*ub = upper_b;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// copula for BDMCMC sampling algorithm
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void copula( double Z[], double K[], int R[], int not_continuous[], int *n, int *p )
{
int number = *n, dim = *p, dimp1 = dim + 1, i, j, jxn;
double sigma, sd_j, mu_ij, lb, ub, runif_value, pnorm_lb, pnorm_ub;
for( j = 0; j < dim; j++ )
{
if( not_continuous[ j ] )
{
jxn = j * number;
sigma = 1.0 / K[ j * dimp1 ]; // 1.0 / K[ j * dim + j ];
sd_j = sqrt( sigma );
for( i = 0; i < number; i++ )
{
get_mean( Z, K, &mu_ij, &sigma, &i, &j, &number, &dim );
get_bounds( Z, R, &lb, &ub, &i, &j, &number );
pnorm_lb = Rf_pnorm5( lb, mu_ij, sd_j, TRUE, FALSE );
pnorm_ub = Rf_pnorm5( ub, mu_ij, sd_j, TRUE, FALSE );
//runif_value = runif( pnorm_lb, pnorm_ub );
runif_value = pnorm_lb + unif_rand() * ( pnorm_ub - pnorm_lb );
Z[ jxn + i ] = Rf_qnorm5( runif_value, mu_ij, sd_j, TRUE, FALSE );
}
}
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Computing bounds for copula function for data with missing values
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void get_bounds_NA( double Z[], int R[], double *lb, double *ub, int *i, int *j, int *n )
{
int kj, number = *n, col = *j, ij = col * number + *i;
double low_b = -1e308, upper_b = +1e308;
for( int k = 0; k < number; k++ )
{
kj = col * number + k;
if( R[ kj ] != 0 )
{
if( R[ kj ] < R[ ij ] )
low_b = max( Z[ kj ], low_b );
else if( R[ kj ] > R[ ij ] )
upper_b = min( Z[ kj ], upper_b );
}
}
*lb = low_b;
*ub = upper_b;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// copula for data with missing values
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void copula_NA( double Z[], double K[], int R[], int not_continuous[], int *n, int *p )
{
int number = *n, dim = *p, nxp = number * dim, dimp1 = dim + 1;
#pragma omp parallel
{
double sigma, sd_j, mu_ij, lb, ub, runif_value, pnorm_lb, pnorm_ub;
int i, j;
#pragma omp for
for( int counter = 0; counter < nxp; counter++ )
{
j = counter / number;
i = counter % number;
sigma = 1.0 / K[ j * dimp1 ]; // 1.0 / K[ j * dim + j ];
sd_j = sqrt( sigma );
get_mean( Z, K, &mu_ij, &sigma, &i, &j, &number, &dim );
if( R[ counter ] != 0 )
{
get_bounds_NA( Z, R, &lb, &ub, &i, &j, &number );
pnorm_lb = Rf_pnorm5( lb, mu_ij, sd_j, TRUE, FALSE );
pnorm_ub = Rf_pnorm5( ub, mu_ij, sd_j, TRUE, FALSE );
//runif_value = runif( pnorm_lb, pnorm_ub );
runif_value = pnorm_lb + unif_rand() * ( pnorm_ub - pnorm_lb );
Z[ counter ] = Rf_qnorm5( runif_value, mu_ij, sd_j, TRUE, FALSE );
}else
Z[ counter ] = mu_ij + norm_rand() * sd_j; // rnorm( mu_ij, sd_j );
}
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Calculating S for the MCMC sampling algorithm
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void get_S( double K[], double Z[], int R[], int not_continuous[], double S[], int *gcgm, int *n, int *p )
{
int dim = *p;
( *gcgm == 0 ) ? copula( Z, K, R, not_continuous, n, &dim ) : copula_NA( Z, K, R, not_continuous, n, &dim );
// S <- t(Z) %*% Z; NOTE, I'm using Ds instead of S, for saving memory
double alpha = 1.0, beta = 0.0;
char transA = 'T', transB = 'N';
F77_NAME(dgemm)( &transA, &transB, &dim, &dim, n, &alpha, Z, n, Z, n, &beta, S, &dim FCONE FCONE );
}
} // End of exturn "C"
ggm_spike_slab.cpp
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Copyright (C) 2018 - 2019 Reza Mohammadi |
// |
// This file is part of ssgraph package. |
// |
// ssgraph is free software: you can redistribute it and/or modify it under |
// the terms of the GNU General Public License as published by the Free |
// Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
// |
// Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#include "matrix.h"
extern "C" {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// MCMC sampling algorithm for Gaussian Graphical models using spike-and-slab priors
// it is for Bayesian model averaging (MA)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void ggm_spike_slab_ma( int *iter, int *burnin, int G[], double K[], double S[], int *p,
double K_hat[], double p_links[], int *n,
double *v1, double *v2, double *lambda, double g_prior[], int *print )
{
double lambda_c = *lambda;
double a_gam = 0.5 * *n + 1.0;
double sqrt_v1 = sqrt( *v1 );
double sqrt_v2 = sqrt( *v2 );
double inv_v1 = 1.0 / *v1;
double inv_v2 = 1.0 / *v2;
double alpha = 1.0, beta = 0.0, alpha1 = -1.0;
double w1, w2, prob_e, inv_V_ij, sigmaii_inv, Sii_lambda, b_gam, gam, K0ii;
int print_c = *print, iteration = *iter, burn_in = *burnin;
int info, one = 1, dim = *p, pxp = dim * dim;
int p1 = dim - 1, p1xp1 = p1 * p1, i, row_i, ii, k, ik, k_p1, G_ij;
char transT = 'T', transN = 'N', sideU = 'U', diagN = 'N';
vector<double> sigma( pxp );
vector<double> copyK( pxp );
memcpy( ©K[0], K, sizeof( double ) * pxp );
inverse( ©K[0], &sigma[0], &dim );
vector<double> sigma_11( p1xp1 ); // sigma_11 = sigma[ -i, -i ]
vector<double> sigma_12( p1 ); // sigma_12 = sigma[ -i, i ]
vector<double> K_11_inv( p1xp1 );
vector<double> K_u( p1xp1 ); // K_u = Sii_lambda * K_11_inv
vector<double> chol_K_u( p1xp1 ); // chol_K_u = chol( K_u )
vector<double> inv_chol_K_u( p1xp1 ); // inv_chol_K_u = solve( chol_K_u )
vector<double> sig_K_12( p1xp1, 0.0 ); // sig_K_12 = inv_chol_K_u %*% t( inv_chol_K_u )
gcgm_spike_slab.cpp
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Copyright (C) 2018 - 2019 Reza Mohammadi |
// |
// This file is part of ssgraph package. |
// |
// ssgraph is free software: you can redistribute it and/or modify it under |
// the terms of the GNU General Public License as published by the Free |
// Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
// |
// Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#include "matrix.h"
#include "copula.h"
extern "C" {
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// MCMC sampling algorithm for Gaussian copula graphical models using spike-and-slab priors
// it is for Bayesian model averaging (MA)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void gcgm_spike_slab_ma( int *iter, int *burnin, int G[], double K[], double S[], int *p,
double K_hat[], double p_links[], int *n,
double Z[], int R[], int not_continuous[], int *gcgm,
double *v1, double *v2, double *lambda, double g_prior[], int *print )
{
double lambda_c = *lambda;
double a_gam = 0.5 * *n + 1.0;
double sqrt_v1 = sqrt( *v1 );
double sqrt_v2 = sqrt( *v2 );
double inv_v1 = 1.0 / *v1;
double inv_v2 = 1.0 / *v2;
double alpha = 1.0, beta = 0.0, alpha1 = -1.0;
double w1, w2, prob_e, inv_V_ij, sigmaii_inv, Sii_lambda, b_gam, gam, K0ii;
int print_c = *print, iteration = *iter, burn_in = *burnin;
int info, one = 1, dim = *p, pxp = dim * dim;
int p1 = dim - 1, p1xp1 = p1 * p1, i, row_i, ii, k, ik, k_p1, G_ij;
char transT = 'T', transN = 'N', sideU = 'U', diagN = 'N';
vector<double> sigma( pxp );
vector<double> copyK( pxp );
memcpy( ©K[0], K, sizeof( double ) * pxp );
inverse( ©K[0], &sigma[0], &dim );
vector<double> sigma_11( p1xp1 ); // sigma_11 = sigma[ -i, -i ]
vector<double> sigma_12( p1 ); // sigma_12 = sigma[ -i, i ]
vector<double> K_11_inv( p1xp1 );
vector<double> K_u( p1xp1 ); // K_u = Sii_lambda * K_11_inv
vector<double> chol_K_u( p1xp1 ); // chol_K_u = chol( K_u )
vector<double> inv_chol_K_u( p1xp1 ); // inv_chol_K_u = solve( chol_K_u )
vector<double> sig_K_12( p1xp1, 0.0 ); // sig_K_12 = inv_chol_K_u %*% t( inv_chol_K_u )
vector<double> mean( p1 ); // mean = -sig_K_12 %*% S[ -i, i ]
vector<double> S_12( p1 );
vector<double> K_12( p1 ); // K_12 = ssgraph::rmvnorm( n = 1, mean = mean, sigma = sig_K_12 )
// vector<double> K_11_inv_X_K_12( p1 ); // K_11_inv_X_K_12 = K_11_inv %*% K_12;
// -- Main loop for birth-death MCMC - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
GetRNGstate();
for( int i_mcmc = 0; i_mcmc < iteration; i_mcmc++ )
{
if( ( i_mcmc + 1 ) % print_c == 0 ) Rprintf( " Iteration %d \n", i_mcmc + 1 );
// - - - STEP 1: copula - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
get_S( K, Z, R, not_continuous, S, gcgm, n, &dim );
// - - - STEP 2: updating graph G and precision matrix K, row by row - - - - - - - - - - - |
for( row_i = 0; row_i < dim; row_i++ )
{
ii = row_i * dim + row_i;
// - - updating precision matrix - - - - - - - - - - - - - - - - - - - - - - - - - - - |
sub_matrices1( &sigma[0], &sigma_12[0], &sigma_11[0], &row_i, &dim );
// K_11_inv = sigma_11 - sigma_12 %*% t( sigma_12 ) / sigma[ i, i ]
sigmaii_inv = - 1.0 / sigma[ ii ];
memcpy( &K_11_inv[0], &sigma_11[0], sizeof( double ) * p1xp1 );
F77_NAME(dger)( &p1, &p1, &sigmaii_inv, &sigma_12[0], &one, &sigma_12[0], &one, &K_11_inv[0], &p1 );
Sii_lambda = S[ ii ] + lambda_c; // Sii_lambda = S[ i, i ] + lambda
for( k = 0; k < p1xp1; k++ )
K_u[ k ] = Sii_lambda * K_11_inv[ k ];
for( k = 0; k < row_i; k++ )
{
inv_V_ij = ( G[ k * dim + row_i ] == 0 ) ? inv_v1 : inv_v2;
K_u[ k * p1 + k ] += inv_V_ij;
}
for( k = row_i + 1; k < dim; k++ )
{
k_p1 = k - 1;
inv_V_ij = ( G[ k * dim + row_i ] == 0 ) ? inv_v1 : inv_v2;
K_u[ k_p1 * p1 + k_p1 ] += inv_V_ij;
}
// - - sampling K_12 from N( mean, sig_K_12 ) - - - - - - - - - - - - - - - - - - - - -|
cholesky( &K_u[0], &chol_K_u[0], &p1 );
memcpy( &inv_chol_K_u[0], &chol_K_u[0], sizeof( double ) * p1xp1 );
F77_NAME(dtrtri)( &sideU, &diagN, &p1, &inv_chol_K_u[0], &p1, &info FCONE FCONE );
F77_NAME(dgemm)( &transN, &transT, &p1, &p1, &p1, &alpha, &inv_chol_K_u[0], &p1, &inv_chol_K_u[0], &p1, &beta, &sig_K_12[0], &p1 FCONE FCONE );
sub_row_mins( S, &S_12[0], &row_i, &dim ); // S_12 = S[ -i, i ]
F77_NAME(dgemv)( &transN, &p1, &p1, &alpha1, &sig_K_12[0], &p1, &S_12[0], &one, &beta, &mean[0], &one FCONE );
rmvnorm_chol( &K_12[0], &mean[0], &inv_chol_K_u[0], &p1 );
rmvnorm_chol( &K_12[0], &mean[0], &inv_chol_K_u[0], &p1 );
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
for( k = 0; k < row_i; k++ )
{
K[ k * dim + row_i ] = K_12[ k ]; // K[ -i, i ] = K_12
K[ row_i * dim + k ] = K_12[ k ]; // K[ i, -i ] = K_12
}
for( k = row_i + 1; k < dim; k++ )
{
k_p1 = k - 1;
K[ k * dim + row_i ] = K_12[ k_p1 ];
K[ row_i * dim + k ] = K_12[ k_p1 ];
}
b_gam = Sii_lambda * 0.5; // b_gam = ( S[ i, i ] + lambda ) * 0.5
gam = Rf_rgamma( a_gam, 1.0 / b_gam ); // gam = rgamma( n = 1, shape = a_gam, scale = 1 / b_gam )
vector<double> K_11_inv_X_K_12( p1 ); // K_11_inv_X_K_12 = K_11_inv %*% K_12;
F77_NAME(dgemv)( &transN, &p1, &p1, &alpha, &K_11_inv[0], &p1, &K_12[0], &one, &beta, &K_11_inv_X_K_12[0], &one FCONE );
K0ii = F77_NAME(ddot)( &p1, &K_12[0], &one, &K_11_inv_X_K_12[0], &one );
K[ ii ] = gam + K0ii; // K[ i, i ] = gam + t( K_12 ) %*% K_11_inv_X_K_12
// - - updating graph matrix - - - - - - - - - - - - - - - - - - - - - - - - - - - - --|
for( k = 0; k < row_i; k++ )
{
ik = k * dim + row_i;
w1 = ( 1.0 - g_prior[ ik ] ) * exp( - 0.5 * K_12[ k ] * K_12[ k ] * inv_v1 ) / sqrt_v1;
w2 = g_prior[ ik ] * exp( - 0.5 * K_12[ k ] * K_12[ k ] * inv_v2 ) / sqrt_v2;
prob_e = w2 / ( w1 + w2 );
G_ij = ( unif_rand() < prob_e ) ? 1 : 0;
G[ row_i * dim + k ] = G_ij; // G[ k, i ] = G_ij;
G[ k * dim + row_i ] = G_ij; // G[ i, k ] = G_ij;
}
for( k = row_i + 1; k < dim; k++ )
{
k_p1 = k - 1;
ik = k * dim + row_i;
w1 = ( 1.0 - g_prior[ ik ] ) * exp( - 0.5 * K_12[ k_p1 ] * K_12[ k_p1 ] * inv_v1 ) / sqrt_v1;
w2 = g_prior[ ik ] * exp( - 0.5 * K_12[ k_p1 ] * K_12[ k_p1 ] * inv_v2 ) / sqrt_v2;
prob_e = w2 / ( w1 + w2 );
G_ij = ( unif_rand() < prob_e ) ? 1 : 0;
G[ row_i * dim + k ] = G_ij; // G[ k, i ] = G_ij;
G[ k * dim + row_i ] = G_ij; // G[ i, k ] = G_ij;
}
// - - updating Covariance matrix according to one-column change of precision matrix --|
update_sigma( &sigma[0], &row_i, &K_11_inv[0], &K_11_inv_X_K_12[0], &gam, &dim );
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
}
// - - - saving result - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
if( i_mcmc >= burn_in )
{
counter = 0;
for( j = 1; j < dim; j++ )
for( i = 0; i < j; i++ )
char_g[ counter++ ] = G[ j * dim + i ] + '0';
for( i = 0; i < pxp ; i++ )
{
K_hat[ i ] += K[ i ];
p_links[ i ] += G[ i ];
}
string_g = string( char_g.begin(), char_g.end() );
this_one = false;
for( i = 0; i < size_sample_graph; i++ )
if( sample_graphs_C[ i ] == string_g )
{
graph_weights[ i ]++; // += all_weights[count_all_g];
all_graphs[ count_all_g ] = i;
this_one = true;
break;
}
if( !this_one || size_sample_graph == 0 )
{
sample_graphs_C[ size_sample_graph ] = string_g;
graph_weights[ size_sample_graph ] = all_weights[ count_all_g ];
all_graphs[ count_all_g ] = size_sample_graph;
size_sample_graph++;
}
count_all_g++;
}
// - - - End of saving result - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -|
}
PutRNGstate();
// -- End of main loop for MCMC algorithm - - - - - - - - - - - - - - - - - - - - - - - - - - -|
for( int i = 0; i < ( iteration - burn_in ); i++ )
{
sample_graphs_C[ i ].copy( sample_graphs[ i ], qp, 0 );
sample_graphs[ i ][ qp ] = '\0';
}
*size_sample_g = size_sample_graph;
}
} // End of exturn "C"
ssgraph/src 最重要的文件
matrix.h头文件
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Copyright (C) 2018 - 2019 Reza Mohammadi |
// |
// This file is part of ssgraph package. |
// |
// ssgraph is free software: you can redistribute it and/or modify it under |
// the terms of the GNU General Public License as published by the Free |
// Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
// |
// Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#ifndef matrix_H
#define matrix_H
#include "util.h"
extern "C" {
void sub_row_mins( double A[], double sub_A[], int *sub, int *p );
void sub_matrices1( double A[], double A12[], double A22[], int *sub, int *p );
void sub_matrix_22( double A[], double A22[], int *sub, int *p );
void inverse( double A[], double A_inv[], int *p );
void cholesky( double A[], double U[], int *p );
void update_sigma( double sigma[], int *sub, double K_11_inv[], double K_11_inv_X_K_12[], double *gam, int *p );
void rmvnorm_chol( double sample[], double mean[], double chol_sig[], int *p );
}
#endif
matrix.cpp主函数文件
这个文件是比较核心的,以后我会抽空讲,大概今年八月或者九月,我会把R工具包开发尤其是S3 class 工具包里面加入C++ 语言讲清楚,为什么R工具包里面需要编译C++/FORTRAN语言文件?
© 因为C++语言的速度是R语言的一百倍。
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Copyright (C) 2018 - 2019 Reza Mohammadi |
// |
// This file is part of ssgraph package. |
// |
// ssgraph is free software: you can redistribute it and/or modify it under |
// the terms of the GNU General Public License as published by the Free |
// Software Foundation; see <https://cran.r-project.org/web/licenses/GPL-3>. |
// |
// Maintainer: Reza Mohammadi <a.mohammadi@uva.nl> |
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
#include "matrix.h"
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Takes square matrix A (p x p) and
// retrieves vector sub_A which is 'sub' th row of matrix A, minus 'sub' element
// Likes A[j, -j] in R
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void sub_row_mins( double A[], double sub_A[], int *sub, int *p )
{
int subj = *sub, pdim = *p, subxp = subj * pdim;
memcpy( sub_A , A + subxp , sizeof( double ) * subj );
memcpy( sub_A + subj, A + subxp + subj + 1, sizeof( double ) * ( pdim - subj - 1 ) );
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Takes symmatric matrix A (p x p) and
// retrieves A12(1x(p-1)) and A22((p-1)x(p-1))
// Like A12=A[j, -j], and A22=A[-j, -j] in R
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void sub_matrices1( double A[], double A12[], double A22[], int *sub, int *p )
{
int pdim = *p, p1 = pdim - 1, psub = *sub, subxp = psub * pdim, mpsub = pdim - psub - 1;
int size_psub = sizeof( double ) * psub;
int size_mpsub = sizeof( double ) * mpsub;
memcpy( A12, A + subxp, size_psub );
memcpy( A12 + psub, A + subxp + psub + 1, size_mpsub );
#pragma omp parallel
{
int i, ixpdim, ixp1;
#pragma omp for
for( i = 0; i < psub; i++ )
{
ixpdim = i * pdim;
ixp1 = i * p1;
memcpy( A22 + ixp1 , A + ixpdim , size_psub );
memcpy( A22 + ixp1 + psub, A + ixpdim + psub + 1, size_mpsub );
}
}
#pragma omp parallel
{
int i, ixpdim, ixp1;
#pragma omp for
for( i = psub + 1; i < pdim; i++ )
{
ixpdim = i * pdim;
ixp1 = ( i - 1 ) * p1;
memcpy( A22 + ixp1 , A + ixpdim , size_psub );
memcpy( A22 + ixp1 + psub, A + ixpdim + psub + 1, size_mpsub );
}
}
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// inverse function for symmetric positive-definite matrices (p x p)
// WARNING: Matrix you pass is overwritten with the result
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void inverse( double A[], double A_inv[], int *p )
{
int info, dim = *p;
char uplo = 'U';
// creating an identity matrix
#pragma omp parallel for
for( int i = 0; i < dim; i++ )
for( int j = 0; j < dim; j++ )
A_inv[ j * dim + i ] = ( i == j );
// LAPACK function: computes solution to A * X = B, where A is symmetric positive definite matrix
F77_NAME(dposv)( &uplo, &dim, &dim, A, &dim, A_inv, &dim, &info FCONE );
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Cholesky decomposition of symmetric positive-definite matrix
// A = U' %*% U
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void cholesky( double A[], double U[], int *p )
{
char uplo = 'U';
int info, dim = *p;
memcpy( U, A, sizeof( double ) * dim * dim );
F77_NAME(dpotrf)( &uplo, &dim, U, &dim, &info FCONE );
#pragma omp parallel for
for( int i = 0; i < dim; i++ )
for( int j = 0; j < i; j++ )
U[ j * dim + i ] = 0.0;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// update sigma for gm_spike_slab functions
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void update_sigma( double sigma[], int *sub, double K_11_inv[], double K_11_inv_X_K_12[], double *gam, int *p )
{
int i = *sub, dim = *p, p1 = dim - 1; //, one, p1xp1 = p1 * p1;
double alpha_gam = 1.0 / *gam;
double alpha_ij = - alpha_gam;
// sigma[ -i, -i ] = K_11_inv + K_11_inv_X_K_12 %*% t( K_11_inv_X_K_12 ) / gam;
#pragma omp parallel for
for( int col = 0; col < i; col++ )
{
int col_x_p = col * dim;
int col_x_p1 = col * p1;
for( int row = 0; row < i; row++ )
sigma[ col_x_p + row ] = K_11_inv[ col_x_p1 + row ] + K_11_inv_X_K_12[ col ] * K_11_inv_X_K_12[ row ] * alpha_gam;
for( int row = i; row < p1; row++ )
sigma[ col_x_p + ( row + 1 ) ] = K_11_inv[ col_x_p1 + row ] + K_11_inv_X_K_12[ col ] * K_11_inv_X_K_12[ row ] * alpha_gam;
}
#pragma omp parallel for
for( int col = i; col < p1; col++ )
{
int col1_x_p = ( col + 1 ) * dim;
int col_x_p1 = col * p1;
for( int row = 0; row < i ; row++ )
sigma[ col1_x_p + row ] = K_11_inv[ col_x_p1 + row ] + K_11_inv_X_K_12[ col ] * K_11_inv_X_K_12[ row ] * alpha_gam;
for( int row = i; row < p1; row++ )
sigma[ col1_x_p + ( row + 1 ) ] = K_11_inv[ col_x_p1 + row ] + K_11_inv_X_K_12[ col ] * K_11_inv_X_K_12[ row ] * alpha_gam;
}
/*
vector<double> copy_sigma_11( p1xp1 );
memcpy( ©_sigma_11[0], &K_11_inv[0], sizeof( double ) * p1xp1 );
// K_11_inv := K_11_inv + K_11_inv_X_K_12 %*% t( K_11_inv_X_K_12 ) / gam;
F77_NAME(dger)( &p1, &p1, &alpha_gam, &K_11_inv_X_K_12[0], &one, &K_11_inv_X_K_12[0], &one, ©_sigma_11[0], &p1 );
// sigma[ -i, -i ] = sigma_11
#pragma omp parallel for
for( int col = 0; col < i; col++ )
{
for( int row = 0; row < i; row++ ) sigma[ col * dim + row ] = copy_sigma_11[ col * p1 + row ];
for( int row = i; row < p1; row++ ) sigma[ col * dim + ( row + 1 ) ] = copy_sigma_11[ col * p1 + row ];
//memcpy( &sigma[0] + col * dim , ©_sigma_11[0] + col * p1 , sizeof( double ) * i );
//memcpy( &sigma[0] + col * dim + i + 1, ©_sigma_11[0] + col * p1 + i, sizeof( double ) * ( p1 - i ) );
}
#pragma omp parallel for
for( int col = i; col < p1; col++ )
{
for( int row = 0; row < i ; row++ ) sigma[ ( col + 1 ) * dim + row ] = copy_sigma_11[ col * p1 + row ];
for( int row = i; row < p1; row++ ) sigma[ ( col + 1 ) * dim + ( row + 1 ) ] = copy_sigma_11[ col * p1 + row ];
//memcpy( &sigma[0] + ( col + 1 ) * dim , ©_sigma_11[0] + col * p1 , sizeof( double ) * i );
//memcpy( &sigma[0] + ( col + 1 ) * dim + i + 1, ©_sigma_11[0] + col * p1 + i, sizeof( double ) * ( p1 - i ) );
}
*/
#pragma omp parallel for
for( int k = 0; k < i; k++ )
{
sigma[ i * dim + k ] = K_11_inv_X_K_12[ k ] * alpha_ij;
sigma[ k * dim + i ] = K_11_inv_X_K_12[ k ] * alpha_ij;
}
#pragma omp parallel for
for( int k = ( i + 1 ); k < dim; k++ )
{
int k_p1 = k - 1;
sigma[ k * dim + i ] = K_11_inv_X_K_12[ k_p1 ] * alpha_ij;
sigma[ i * dim + k ] = K_11_inv_X_K_12[ k_p1 ] * alpha_ij;
}
/*
F77_NAME(dscal)( &p1, &alpha_ij, &K_11_inv_X_K_12[0], &one );
memcpy( &sigma[0] + i * dim , &K_11_inv_X_K_12[0] , sizeof( double ) * i );
memcpy( &sigma[0] + i * dim + i + 1, &K_11_inv_X_K_12[0] + i, sizeof( double ) * ( p1 - i ) );
for( int k = 0; k < i; k++ ) sigma[ k * dim + i ] = K_11_inv_X_K_12[ k ];
for( int k = ( i + 1 ); k < dim; k++ ) sigma[ k * dim + i ] = K_11_inv_X_K_12[ k - 1 ];
*/
sigma[ i * dim + i ] = alpha_gam; // sigma[ i, i ] = 1 / gam;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
// Simulate one sample from multivarate normal distribution R ~ N_p( mu, sig )
// where chol_sig = chol( sig )
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
void rmvnorm_chol( double sample[], double mean[], double chol_sig[], int *p )
{
// GetRNGstate();
int p1 = *p, one = 1;
char transN = 'N';
double alpha = 1.0, beta1 = 1.0;
// vector<double> chol_sig( p1 * p1 );
// cholesky( &sig[0], &chol_sig[0], &p1 );
//for( int row = 0; row < p1; row++ )
// for( int col = 0; col < row; col++ )
// chol_sig[ row * p1 + col ] = 0.0;
vector<double> z_N( p1 );
for( int row = 0; row < p1; row++ ) z_N[ row ] = norm_rand();
memcpy( &sample[0], &mean[0], sizeof( double ) * p1 );
F77_NAME(dgemv)( &transN, &p1, &p1, &alpha, &chol_sig[0], &p1, &z_N[0], &one, &beta1, &sample[0], &one FCONE );
// PutRNGstate();
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
总结与未来展望
- 将为国内未来五年培育一批达到国际顶级计量经济学家和计量金融学者奉献自己的绵薄之力
Enjoy
Yours
Danile tulips liu ,(刘旭东, 丹尼尔·郁金香·刘)
Tencent: 280201722