### ---- set up ----

rm(list=ls())
gc()

library(ergm) ; library(sna)

### ---- data ----

### loading R environment: includes data and model results

load("Diviak_Ocelik_data.RData")

## when using this dataset, make sure that you use the complete bibliographical citation

## more complex models (models 3 and 4) take substantially more time to compute
## it might be therefore more convenient to inspect the models from the saved objects

## note that we used 4.1.2 version for model fitting; you may get somewhat different 
## results when using the more recent versions of the ergm package

### relational data stored in 68x68 adjacency matrices (binary directed networks)

str(info) # expert information exchange network
info

str(collab) # political collaboration network
collab

str(infl) # perceived influence network
infl

### attribute data stored as case-by-variable matrices

head(att) 
## political beliefs scores measured on continuous scale <0,1> 
## 0 = very strong pro-coal position
## 1 = very strong anti-coal position
## organizational type attribute is categorical
table(att$orgType)

### network data stored as a network object or matrices

## converting a network matrix into a network object
## allows to include dyadic and nodal attributes used in the modelling
infoNet <- network(info, directed = T)
infoNet

## information network is a dependent variable
## collaboration and influence networks will be used as dyadic covariates
## it is sufficient to keep them as adjacency matrices

## attaching nodal attributes to the information network
infoNet %v% "beliefs" <- att$beliefs  
infoNet %v% "orgType" <- att$orgType
infoNet # network object now includes also nodal attributes

## accessing nodal (vertex) attributes

## vector of policy belief scores for each organization
get.vertex.attribute(infoNet, "beliefs")

## vector of organizational type categories for each organization
get.vertex.attribute(infoNet, "orgType")

### ---- descriptives ----

### let's look at basic descriptive measures

## starting with our dependent variable: expert information network

dyad.census(info) # dyad census
## note there are 157 mutual dyads including 314 ties
## 314 reciprocal ties + 473 asymmetric ties = 787 ties in total

triad.census(info, mode = "digraph") # triad census
## 030T shows the number of transitive triads
## 030C shows the number of cyclic triads

## network density (number of observed ties / number of all possible ties)
gden(info, mode = "digraph") 

## reciprocity measured as a share of the reciprocal ties on the total number of ties 
grecip(info, measure = "edgewise") 
2*dyad.census(info)[1] / sum(info)

## transitivity measured as an average clustering coefficient
## it takes a mean value of all nodes' neighborhood densities
## the measure is not implemented in the sna package, thus we use igraph package
info.igraph <- igraph::graph_from_adjacency_matrix(info, mode = "directed")
igraph::transitivity(info.igraph, type = "average")

## calculating average degree
## mean total degree (includes both incoming and outgoing ties)
mean ( degree(info, gmode = "digraph", cmode = "freeman") )
## on average, actors are directly connected to 23 out of 68 (34%) nodes 
## standard deviation of the total degree
sd ( degree(info, gmode = "digraph", cmode = "freeman") )

## mean indegree and outdegree 
mean ( degree(info, gmode = "digraph", cmode = "indegree") )
mean ( degree(info, gmode = "digraph", cmode = "outdegree") )
## since the number of indegrees equals the number of outdegrees
## the mean indegree and outdegree are the same 

## plotting degree distribution 
par( mfrow = c(1,2) )
plot( table( degree(info, gmode = "digraph", cmode = "indegree") ), 
      main = "indegree distribution", ylab = "" )
plot( table( degree(info, gmode = "digraph", cmode = "outdegree") ), 
      main = "outdegree distribution", ylab = "" )
par( mfrow = c(1,1) )

## note that for indegree, we observe a few outliers 
## these actors (esp. Ministry of Environment with 43 ties) are "targeted" by others
## in attempts to influence the actors' position

## for outdegree, we observed 11 actors not sending any ties, i.e.
## not providing any expert information to others

## both distributions are right-skewed, which is typical for many social networks
## this indicates uneven distribution of ties within the network (centralization)

## now, let's make a quick overview of the two dyadic covariates:
## collaboration and perceived influence networks

gden(collab, mode = "digraph") # collaboration network density
gden(infl, mode = "digraph") # perceived influence network density

## mean degree 
mean ( degree(collab, gmode = "digraph", cmode = "freeman") )
mean ( degree(infl, gmode = "digraph", cmode = "freeman") )

## indegree and outdegree distributions 
par( mfrow = c(2,2) )
plot( table( degree(collab, gmode = "digraph", cmode = "indegree") ), 
      main = "collaboration: indegree distribution", ylab = "" )
plot( table( degree(collab, gmode = "digraph", cmode = "outdegree") ), 
      main = "collaboration: outdegree distribution", ylab = "" )
plot( table( degree(infl, gmode = "digraph", cmode = "indegree") ), 
      main = "influence: indegree distribution", ylab = "" )
plot( table( degree(infl, gmode = "digraph", cmode = "outdegree") ), 
      main = "influence: outdegree distribution", ylab = "" )
par( mfrow = c(1,1) )

## finally, let's look at network correlations
## the correlation magnitude indicates potential entrainment effects
gcor(info, collab)
gcor(info, infl)
gcor(infl, collab)
## these results indicate there might be a stronger entrainment based on 
## collaboration than influence

## however, all such assumptions need to be tested by using ERGM to control 
## for other possible confounding effects

### ---- ERGM ----

help("ergm-terms") # overview of ERGM effects
search.ergmTerms(keyword = "homophily") # looking for specific terms
?control.ergm # algorithmic settings

### ---- model 0 ----

### model 0 includes only edges (intercept-like) term

## using ergm function for estimation
model0 <- ergm(infoNet ~ edges)
summary(model0) # overview of the model

## returns coefficient values
model0$coefficients # includes only intercept (edges term)

## converting from logarithmic odds (logits) to probabilities
exp(model0$coefficients) / ( 1 + exp(model0$coefficients) )
plogis(model0$coefficients) # same result
## note that the 0.17 probability of edge occurrence corresponds with
## the observed density
gden(infoNet, mode = "digraph")
## the empty or null model thus captures a baseline tendency of tie formation

### ---- model 1 ----

### model 1 adds mutual term capturing reciprocity

## summary function can be used also to calculate count of configurations
summary(infoNet ~ mutual) # returns number of observed reciprocated dyads
dyad.census(infoNet) # same result

## the number of ties in reciprocal (mutual) dyads can be obtained as
## the sum of the product of a matrix with its transpose
sum( info * t(info) ) 
## the number of reciprocal dyads is then half of this value
sum( info * t(info) ) / 2

## using ergm function for estimation 
model1 <- ergm(infoNet ~ edges + mutual, 
               # sets a seed to make the output reproducible
               control = control.ergm(seed = 42) 
              )
summary(model1)

## what is the probability that newly formed tie will form reciprocated dyad?
model1$coefficients # we need to consider both edges and mutual coefficient
## thus we sum up the coefficients of the edges and mutual terms
exp( sum(model1$coefficients) ) / ( 1 + exp( sum(model1$coefficients) ) ) 
## or more succinctly with the plogis function
plogis( sum(model1$coefficients) )
## the investigated probability is about 40% 

### ---- model 2 ----

### model 2 adds collaboration and perceived influence entertainment mechanisms

## entertainment is a dyadic mechanism operationalized by 
## edge covariate (edgecov) term
model2 <- ergm(infoNet ~ edges + mutual +
                 # 1st argument: adjacency matrix, 2nd argument: name of the effect
                 # collaboration-based entrainment
                 edgecov(collab, attrname = 'collaboration') +
                 # influence-based entrainment
                 edgecov(infl, attrname = 'influence'),
               control = control.ergm(seed = 42)
               )
summary(model2)
## reciprocity (mutual) and two entrainment effects are in operation

## let's evaluate the model convergence
mcmc.diagnostics(model2) 
## mcmc.diagnostic returns both printed and graphical output

## most importantly, we look at the "P-val." in the printed output 
## values > 0.05 indicate that the sample statistics 
## are not statistically significantly different from the observed network
## except reciprocity (mutual)
## in the graphical output, there needs to be a "zigzag" pattern at trace plots
## and bell-like curves at histograms 

### ---- model 3 ----

### model 3 adds attribute mechanisms, specifically:

## attractivity: tendency of actors' with a certain attribute to receive more or less ties 
## activity: tendency of actors' with a certain attribute to send more or less ties 
## homophily: tendency of actors' with a shared attribute to be mutually tied

model3 <- ergm(infoNet ~ edges + mutual +
                 # 1st argument: adjacency matrix, 2nd argument: name of the effect
                 # collaboration-based entrainment
                 edgecov(collab, attrname = 'collaboration') +
                 # influence-based entrainment
                 edgecov(infl, attrname = 'influence') +
                 # attractivity based on org type (reference category: ENGOs)
                 nodeifactor('orgType') + 
                 # activity based on org type (reference category: ENGOs)
                 nodeofactor('orgType') + 
                 # inter-organizational (categorical) homophily
                 nodematch('orgType') +
                 # control term (nodal covariate) for belief/ideological homophily
                 nodecov('beliefs') + 
                 # belief homophily: absolute difference of attribute values
                 absdiff('beliefs'),
               # init argument sets coefficients from the loaded model as initial values 
               control = control.ergm(seed = 42, init = coef(model3))
               )
summary(model3)
## all the mechanisms included in the previous model are present
## plus some of the attribute mechanisms, in comparison to ENGOs:
## lower attractivity of parties and marginally professional NGOs
## lower activity of industry orgs, parties, research orgs, and state orgs
## belief homophily, note that the coefficient is negative
## it means that the greater is the absolute difference in actors' beliefs, 
## the less likely is the tie formation between them

## let's examine convergence of the model
mcmc.diagnostics(model3)
## we see p-values < 0.05 for several model terms, specifically:  
## NGO-based attractivity (nodeifactor.orgType.ngo), 
## party-based attractivity (nodeifactor.orgType.party), 
## state-based attractivity (nodeifactor.orgType.state), 
## research-based attractivity (nodeifactor.orgType.research), 
## industry-based activity (nodeofactor.orgType.industry)
## party-based activity (nodeofactor.orgType.party)
## state-based activity (nodeofactor.orgType.state)
## inter-organizational homophily (nodematch.orgType)

## since the estimates in the mcmc.diagnostics typically understate model performance
## let's evaluate also the goodness-of-fit

## here we include also global network properties that are not specified by model
## (of course, it is going to be inadequate considering the convergence diagnostics)
m3gof <- gof(model3)
m3gof # printed output for goodness-of-fit statistics
# interpretation is analogical: p-values < 0.05 indicate 
## that given configurations and properties are not adequately represented
## note that fit of the model terms improved: all the corresponding p-values are > 0.05
## this is because MCMC diagnostics are calculated on the last round of simulation, 
## not the final computation of parameter estimates 
## see the last paragraph of the mcmc.diagnostics function

## graphical overview is typically quicker 
plot(m3gof)
## the black line represents the observed value of statistics
## the box plots represent the values of statistics from simulated distribution
## optimally, the black line goes through the center of the box plots
## this is what we observe in the model statistics
## while for outdegree or edge-wise share partners we observe marked deviations
## the conclusion is that the model does not represent observed network well

### ---- model 4 ----

### model 4 adds structural (endogenous) mechanisms, specifically:

## cumulative advantage on incoming and outgoing ties
## gwidegree: positive values indicate indegree decentralization, negative centralization
## gwodegree: positive values indicate outdegree decentralization, negative centralization
## geometrically weighted transitive dyad-wise shared partners (nested transitive 2-paths)
## dgwdsp ("OTP"): positive values indicate open nested transitive triangles, negative lack of them
## geometrically weighted transitive edge-wise shared partners (nested transitive triangles) 
## dgwesp ("OTP"): positive values indicate closed nested transitive triangles, negative lack of them
## geometrically weighted cyclical edge-wise shared partners (nested cyclical triangles) 
## dgwesp ("ITP"): positive values indicate closed nested cyclical triangles, negative lack of them

## therefore:
## transitive closure = positive dgwesp("OTP") and negative dgwdsp("OTP")
## transitive closure indicates hierarchy
## cyclical closure = positive dgwesp("ITP") and negative dgwdsp("OTP")
## cyclical closure indicates lack of hierarchy

model4 <- ergm(infoNet ~ edges + mutual +
                 # 1st argument: adjacency matrix, 2nd argument: name of the effect
                 # collaboration-based entrainment
                 edgecov(collab, attrname = 'collaboration') +
                 # influence-based entrainment
                 edgecov(infl, attrname = 'influence') +
                 # attractiveness based on org type (reference category: ENGOs)
                 nodeifactor('orgType') + 
                 # activity based on org type (reference category: ENGOs)
                 nodeofactor('orgType') + 
                 # inter-organizational (categorical) homophily
                 nodematch('orgType') +
                 # control term (nodal covariate) for belief/ideological homophily
                 nodecov('beliefs') + 
                 # belief homophily: absolute difference of attribute values
                 absdiff('beliefs') +
                 # cumulative advantage (centralization) on indegree
                 gwidegree(fixed = T, decay = 0.7) + 
                 # cumulative advantage (centralization) on outdegree
                 gwodegree(fixed = T, decay = 0.7) +
                 # nested transitive 2-paths (open triangles)
                 dgwdsp(fixed = T, decay = 0.7, type = "OTP") +
                 # nested cyclical closed triangles
                 dgwesp(fixed = T, decay = 0.7, type = "OTP") + 
                 # nested transitive closed triangles
                 dgwesp(fixed = T, decay = 0.7, type = "ITP"),
              # increasing sample size to 3000 (more accurate estimation)
              control = control.ergm(seed = 42, init = coef(model4), MCMC.samplesize = 3000) 
              )

## before interpreting the model, we should start with the diagnostics
## let's first look at the convergence
mcmc.diagnostics(model4)
## we see p-values < 0.05 for several model terms, specifically:
## influence-based entrainment (edgecov.influence)
## industry-based attractivity (nodeifactor.orgType.industry)
## research-based attractivity (nodeifactor.orgType.research)
## industry-based activity (nodeofactor.orgType.industry)
## NGO-based activity (nodeofactor.orgType.ngo)
## inter-organizational homophily (nodematch.orgType)
## lower-order control (nodal covariate) for belief homophily (nodecov.beliefs)
## belief homophily (absdiff.beliefs)
## and all structural configurations except cumulative advantage on outdegre (gwodeg.fixed.0.7)
## and reciprocity (mutual)

## however, since this is not the final refinement of the parameter estimates
## let's continue with the evaluation of goodness-of-fit
m4gof <- gof(model4)
m4gof
plot(m4gof)
## none of the terms included in the model is statistically significantly (s.s.) different
## from the observed network (p-value < 0.05; black line goes through the boxplots)
## global properties from the simulated distribution exhibit just slight deviations
## this applies especially to higher values of edge-wise shared partners
## overall, the model captures adequately the observed network structure
## i.e., the included mechanisms are sufficient to explain the tie formation in the network
## since the goodness-of-fit is adequate, we can continue with the results interpretation

summary(model4)
## as in the previous models, we see that both entrainment mechanisms and
## reciprocity increase the probability of tie formation
## regarding the attribute mechanisms, in comparison to ENGOs (reference category)
## political parties are less likely to send as well as receive ties to others
## professional NGOs are marginally less likely to receive ties, and 
## state organizations are less likely to send ties to others
## both homophily mechanisms are present, i.e.: 
## organizations of the same type are more likely be tied (to exchange expert info)
## organizations with similar beliefs are more likely be tied 
## as for the structural mechanisms, there are tendencies to:
## reciprocity (included in all the previous non-null models)
## negative cumulative advantage on indegree (decentralization), i.e.
## low-degree nodes are more likely to received another tie than high-degree nodes
## transitive closure given by the s.s. positive effect of dgwesp("OTP"), i.e.
## geometrically weighted (gw) transitive edge-wise shared partners (nested transitive triangles)
## and negative effect of gw transitive dyad-wise shared partners (nested transitive 2-paths)
## the gw cyclical edge-wise shared partners and cumulative advantage on outdegree 
## are not present

### ---- summarizing table ----

### finally, let's produce an overview of all 5 model specifications

library(texreg)

## plotting a table including all 5 models
screenreg( list(model0, model1, model2, model3, model4), single.row = TRUE)

# ---- end of the script ----