### ---- set up ---- rm(list=ls()) gc() library(ergm) ; library(sna) ### ---- data ---- ### loading R environment: includes data and model results load("Diviak_Ocelik_data.RData") ## when using this dataset, make sure that you use the complete bibliographical citation ## more complex models (models 3 and 4) take substantially more time to compute ## it might be therefore more convenient to inspect the models from the saved objects ## note that we used 4.1.2 version for model fitting; you may get somewhat different ## results when using the more recent versions of the ergm package ### relational data stored in 68x68 adjacency matrices (binary directed networks) str(info) # expert information exchange network info str(collab) # political collaboration network collab str(infl) # perceived influence network infl ### attribute data stored as case-by-variable matrices head(att) ## political beliefs scores measured on continuous scale <0,1> ## 0 = very strong pro-coal position ## 1 = very strong anti-coal position ## organizational type attribute is categorical table(att$orgType) ### network data stored as a network object or matrices ## converting a network matrix into a network object ## allows to include dyadic and nodal attributes used in the modelling infoNet <- network(info, directed = T) infoNet ## information network is a dependent variable ## collaboration and influence networks will be used as dyadic covariates ## it is sufficient to keep them as adjacency matrices ## attaching nodal attributes to the information network infoNet %v% "beliefs" <- att$beliefs infoNet %v% "orgType" <- att$orgType infoNet # network object now includes also nodal attributes ## accessing nodal (vertex) attributes ## vector of policy belief scores for each organization get.vertex.attribute(infoNet, "beliefs") ## vector of organizational type categories for each organization get.vertex.attribute(infoNet, "orgType") ### ---- descriptives ---- ### let's look at basic descriptive measures ## starting with our dependent variable: expert information network dyad.census(info) # dyad census ## note there are 157 mutual dyads including 314 ties ## 314 reciprocal ties + 473 asymmetric ties = 787 ties in total triad.census(info, mode = "digraph") # triad census ## 030T shows the number of transitive triads ## 030C shows the number of cyclic triads ## network density (number of observed ties / number of all possible ties) gden(info, mode = "digraph") ## reciprocity measured as a share of the reciprocal ties on the total number of ties grecip(info, measure = "edgewise") 2*dyad.census(info)[1] / sum(info) ## transitivity measured as an average clustering coefficient ## it takes a mean value of all nodes' neighborhood densities ## the measure is not implemented in the sna package, thus we use igraph package info.igraph <- igraph::graph_from_adjacency_matrix(info, mode = "directed") igraph::transitivity(info.igraph, type = "average") ## calculating average degree ## mean total degree (includes both incoming and outgoing ties) mean ( degree(info, gmode = "digraph", cmode = "freeman") ) ## on average, actors are directly connected to 23 out of 68 (34%) nodes ## standard deviation of the total degree sd ( degree(info, gmode = "digraph", cmode = "freeman") ) ## mean indegree and outdegree mean ( degree(info, gmode = "digraph", cmode = "indegree") ) mean ( degree(info, gmode = "digraph", cmode = "outdegree") ) ## since the number of indegrees equals the number of outdegrees ## the mean indegree and outdegree are the same ## plotting degree distribution par( mfrow = c(1,2) ) plot( table( degree(info, gmode = "digraph", cmode = "indegree") ), main = "indegree distribution", ylab = "" ) plot( table( degree(info, gmode = "digraph", cmode = "outdegree") ), main = "outdegree distribution", ylab = "" ) par( mfrow = c(1,1) ) ## note that for indegree, we observe a few outliers ## these actors (esp. Ministry of Environment with 43 ties) are "targeted" by others ## in attempts to influence the actors' position ## for outdegree, we observed 11 actors not sending any ties, i.e. ## not providing any expert information to others ## both distributions are right-skewed, which is typical for many social networks ## this indicates uneven distribution of ties within the network (centralization) ## now, let's make a quick overview of the two dyadic covariates: ## collaboration and perceived influence networks gden(collab, mode = "digraph") # collaboration network density gden(infl, mode = "digraph") # perceived influence network density ## mean degree mean ( degree(collab, gmode = "digraph", cmode = "freeman") ) mean ( degree(infl, gmode = "digraph", cmode = "freeman") ) ## indegree and outdegree distributions par( mfrow = c(2,2) ) plot( table( degree(collab, gmode = "digraph", cmode = "indegree") ), main = "collaboration: indegree distribution", ylab = "" ) plot( table( degree(collab, gmode = "digraph", cmode = "outdegree") ), main = "collaboration: outdegree distribution", ylab = "" ) plot( table( degree(infl, gmode = "digraph", cmode = "indegree") ), main = "influence: indegree distribution", ylab = "" ) plot( table( degree(infl, gmode = "digraph", cmode = "outdegree") ), main = "influence: outdegree distribution", ylab = "" ) par( mfrow = c(1,1) ) ## finally, let's look at network correlations ## the correlation magnitude indicates potential entrainment effects gcor(info, collab) gcor(info, infl) gcor(infl, collab) ## these results indicate there might be a stronger entrainment based on ## collaboration than influence ## however, all such assumptions need to be tested by using ERGM to control ## for other possible confounding effects ### ---- ERGM ---- help("ergm-terms") # overview of ERGM effects search.ergmTerms(keyword = "homophily") # looking for specific terms ?control.ergm # algorithmic settings ### ---- model 0 ---- ### model 0 includes only edges (intercept-like) term ## using ergm function for estimation model0 <- ergm(infoNet ~ edges) summary(model0) # overview of the model ## returns coefficient values model0$coefficients # includes only intercept (edges term) ## converting from logarithmic odds (logits) to probabilities exp(model0$coefficients) / ( 1 + exp(model0$coefficients) ) plogis(model0$coefficients) # same result ## note that the 0.17 probability of edge occurrence corresponds with ## the observed density gden(infoNet, mode = "digraph") ## the empty or null model thus captures a baseline tendency of tie formation ### ---- model 1 ---- ### model 1 adds mutual term capturing reciprocity ## summary function can be used also to calculate count of configurations summary(infoNet ~ mutual) # returns number of observed reciprocated dyads dyad.census(infoNet) # same result ## the number of ties in reciprocal (mutual) dyads can be obtained as ## the sum of the product of a matrix with its transpose sum( info * t(info) ) ## the number of reciprocal dyads is then half of this value sum( info * t(info) ) / 2 ## using ergm function for estimation model1 <- ergm(infoNet ~ edges + mutual, # sets a seed to make the output reproducible control = control.ergm(seed = 42) ) summary(model1) ## what is the probability that newly formed tie will form reciprocated dyad? model1$coefficients # we need to consider both edges and mutual coefficient ## thus we sum up the coefficients of the edges and mutual terms exp( sum(model1$coefficients) ) / ( 1 + exp( sum(model1$coefficients) ) ) ## or more succinctly with the plogis function plogis( sum(model1$coefficients) ) ## the investigated probability is about 40% ### ---- model 2 ---- ### model 2 adds collaboration and perceived influence entertainment mechanisms ## entertainment is a dyadic mechanism operationalized by ## edge covariate (edgecov) term model2 <- ergm(infoNet ~ edges + mutual + # 1st argument: adjacency matrix, 2nd argument: name of the effect # collaboration-based entrainment edgecov(collab, attrname = 'collaboration') + # influence-based entrainment edgecov(infl, attrname = 'influence'), control = control.ergm(seed = 42) ) summary(model2) ## reciprocity (mutual) and two entrainment effects are in operation ## let's evaluate the model convergence mcmc.diagnostics(model2) ## mcmc.diagnostic returns both printed and graphical output ## most importantly, we look at the "P-val." in the printed output ## values > 0.05 indicate that the sample statistics ## are not statistically significantly different from the observed network ## except reciprocity (mutual) ## in the graphical output, there needs to be a "zigzag" pattern at trace plots ## and bell-like curves at histograms ### ---- model 3 ---- ### model 3 adds attribute mechanisms, specifically: ## attractivity: tendency of actors' with a certain attribute to receive more or less ties ## activity: tendency of actors' with a certain attribute to send more or less ties ## homophily: tendency of actors' with a shared attribute to be mutually tied model3 <- ergm(infoNet ~ edges + mutual + # 1st argument: adjacency matrix, 2nd argument: name of the effect # collaboration-based entrainment edgecov(collab, attrname = 'collaboration') + # influence-based entrainment edgecov(infl, attrname = 'influence') + # attractivity based on org type (reference category: ENGOs) nodeifactor('orgType') + # activity based on org type (reference category: ENGOs) nodeofactor('orgType') + # inter-organizational (categorical) homophily nodematch('orgType') + # control term (nodal covariate) for belief/ideological homophily nodecov('beliefs') + # belief homophily: absolute difference of attribute values absdiff('beliefs'), # init argument sets coefficients from the loaded model as initial values control = control.ergm(seed = 42, init = coef(model3)) ) summary(model3) ## all the mechanisms included in the previous model are present ## plus some of the attribute mechanisms, in comparison to ENGOs: ## lower attractivity of parties and marginally professional NGOs ## lower activity of industry orgs, parties, research orgs, and state orgs ## belief homophily, note that the coefficient is negative ## it means that the greater is the absolute difference in actors' beliefs, ## the less likely is the tie formation between them ## let's examine convergence of the model mcmc.diagnostics(model3) ## we see p-values < 0.05 for several model terms, specifically: ## NGO-based attractivity (nodeifactor.orgType.ngo), ## party-based attractivity (nodeifactor.orgType.party), ## state-based attractivity (nodeifactor.orgType.state), ## research-based attractivity (nodeifactor.orgType.research), ## industry-based activity (nodeofactor.orgType.industry) ## party-based activity (nodeofactor.orgType.party) ## state-based activity (nodeofactor.orgType.state) ## inter-organizational homophily (nodematch.orgType) ## since the estimates in the mcmc.diagnostics typically understate model performance ## let's evaluate also the goodness-of-fit ## here we include also global network properties that are not specified by model ## (of course, it is going to be inadequate considering the convergence diagnostics) m3gof <- gof(model3) m3gof # printed output for goodness-of-fit statistics # interpretation is analogical: p-values < 0.05 indicate ## that given configurations and properties are not adequately represented ## note that fit of the model terms improved: all the corresponding p-values are > 0.05 ## this is because MCMC diagnostics are calculated on the last round of simulation, ## not the final computation of parameter estimates ## see the last paragraph of the mcmc.diagnostics function ## graphical overview is typically quicker plot(m3gof) ## the black line represents the observed value of statistics ## the box plots represent the values of statistics from simulated distribution ## optimally, the black line goes through the center of the box plots ## this is what we observe in the model statistics ## while for outdegree or edge-wise share partners we observe marked deviations ## the conclusion is that the model does not represent observed network well ### ---- model 4 ---- ### model 4 adds structural (endogenous) mechanisms, specifically: ## cumulative advantage on incoming and outgoing ties ## gwidegree: positive values indicate indegree decentralization, negative centralization ## gwodegree: positive values indicate outdegree decentralization, negative centralization ## geometrically weighted transitive dyad-wise shared partners (nested transitive 2-paths) ## dgwdsp ("OTP"): positive values indicate open nested transitive triangles, negative lack of them ## geometrically weighted transitive edge-wise shared partners (nested transitive triangles) ## dgwesp ("OTP"): positive values indicate closed nested transitive triangles, negative lack of them ## geometrically weighted cyclical edge-wise shared partners (nested cyclical triangles) ## dgwesp ("ITP"): positive values indicate closed nested cyclical triangles, negative lack of them ## therefore: ## transitive closure = positive dgwesp("OTP") and negative dgwdsp("OTP") ## transitive closure indicates hierarchy ## cyclical closure = positive dgwesp("ITP") and negative dgwdsp("OTP") ## cyclical closure indicates lack of hierarchy model4 <- ergm(infoNet ~ edges + mutual + # 1st argument: adjacency matrix, 2nd argument: name of the effect # collaboration-based entrainment edgecov(collab, attrname = 'collaboration') + # influence-based entrainment edgecov(infl, attrname = 'influence') + # attractiveness based on org type (reference category: ENGOs) nodeifactor('orgType') + # activity based on org type (reference category: ENGOs) nodeofactor('orgType') + # inter-organizational (categorical) homophily nodematch('orgType') + # control term (nodal covariate) for belief/ideological homophily nodecov('beliefs') + # belief homophily: absolute difference of attribute values absdiff('beliefs') + # cumulative advantage (centralization) on indegree gwidegree(fixed = T, decay = 0.7) + # cumulative advantage (centralization) on outdegree gwodegree(fixed = T, decay = 0.7) + # nested transitive 2-paths (open triangles) dgwdsp(fixed = T, decay = 0.7, type = "OTP") + # nested cyclical closed triangles dgwesp(fixed = T, decay = 0.7, type = "OTP") + # nested transitive closed triangles dgwesp(fixed = T, decay = 0.7, type = "ITP"), # increasing sample size to 3000 (more accurate estimation) control = control.ergm(seed = 42, init = coef(model4), MCMC.samplesize = 3000) ) ## before interpreting the model, we should start with the diagnostics ## let's first look at the convergence mcmc.diagnostics(model4) ## we see p-values < 0.05 for several model terms, specifically: ## influence-based entrainment (edgecov.influence) ## industry-based attractivity (nodeifactor.orgType.industry) ## research-based attractivity (nodeifactor.orgType.research) ## industry-based activity (nodeofactor.orgType.industry) ## NGO-based activity (nodeofactor.orgType.ngo) ## inter-organizational homophily (nodematch.orgType) ## lower-order control (nodal covariate) for belief homophily (nodecov.beliefs) ## belief homophily (absdiff.beliefs) ## and all structural configurations except cumulative advantage on outdegre (gwodeg.fixed.0.7) ## and reciprocity (mutual) ## however, since this is not the final refinement of the parameter estimates ## let's continue with the evaluation of goodness-of-fit m4gof <- gof(model4) m4gof plot(m4gof) ## none of the terms included in the model is statistically significantly (s.s.) different ## from the observed network (p-value < 0.05; black line goes through the boxplots) ## global properties from the simulated distribution exhibit just slight deviations ## this applies especially to higher values of edge-wise shared partners ## overall, the model captures adequately the observed network structure ## i.e., the included mechanisms are sufficient to explain the tie formation in the network ## since the goodness-of-fit is adequate, we can continue with the results interpretation summary(model4) ## as in the previous models, we see that both entrainment mechanisms and ## reciprocity increase the probability of tie formation ## regarding the attribute mechanisms, in comparison to ENGOs (reference category) ## political parties are less likely to send as well as receive ties to others ## professional NGOs are marginally less likely to receive ties, and ## state organizations are less likely to send ties to others ## both homophily mechanisms are present, i.e.: ## organizations of the same type are more likely be tied (to exchange expert info) ## organizations with similar beliefs are more likely be tied ## as for the structural mechanisms, there are tendencies to: ## reciprocity (included in all the previous non-null models) ## negative cumulative advantage on indegree (decentralization), i.e. ## low-degree nodes are more likely to received another tie than high-degree nodes ## transitive closure given by the s.s. positive effect of dgwesp("OTP"), i.e. ## geometrically weighted (gw) transitive edge-wise shared partners (nested transitive triangles) ## and negative effect of gw transitive dyad-wise shared partners (nested transitive 2-paths) ## the gw cyclical edge-wise shared partners and cumulative advantage on outdegree ## are not present ### ---- summarizing table ---- ### finally, let's produce an overview of all 5 model specifications library(texreg) ## plotting a table including all 5 models screenreg( list(model0, model1, model2, model3, model4), single.row = TRUE) # ---- end of the script ----