User Tools

Site Tools


sna_r

Contributed by Michal Kosinski - worked for me! Update the wiki if you corrected / improved this code - and let me know, please!

  1. Import fb_friendship to your mySQL server
  2. create a result table
  CREATE TABLE sna (
    id int(11) NOT NULL AUTO_INCREMENT,
    userid bigint(20) unsigned DEFAULT NULL,
    network_size int(11) DEFAULT NULL,
    network_size_inc int(11) DEFAULT NULL,
    betweenness float DEFAULT NULL,
    n_betweenness float DEFAULT NULL,
    density float DEFAULT NULL,
    brokerage float DEFAULT NULL,
    nbrokerage float DEFAULT NULL,
    transitivity float DEFAULT NULL,
    done int(11) NOT NULL DEFAULT '0',
    PRIMARY KEY (id),
    UNIQUE KEY uix (userid)
    ) ENGINE=MyISAM;
  1. Copy into sna table the list of userids you want to analyse - choose only those that you really need - e.g. have personality scores for (it takes some time!)
  2. Use this R code to extract egocentric networks from fb_friendships table. you can also uncomment the part saving files for UCINET. Mind you - filer out networks that are too small/ too large or of too low / too large density - it might be indicative of incomplete list of nodes for a given ego.
####### libraries
require(RODBC)
rm(list=ls())
#sink(file="c:\\MPcooked\\sink.txt")
results <- NULL;
options(digits=22)

#### gets the list of tasks
con<-odbcConnect("Rodbc", uid="root", pwd="[YOUR PASS]")  #### connect with your SQL server, provide the password
idlist<- sqlQuery(con, "SELECT userid FROM sna WHERE done = 0 and network_size_inc > 10 and network_size_inc < 2000 order by network_size_inc asc")     ##### selects only those that were not done yet
#idlist <- fetch(rs,-1)
#sqlDrop(rs)
idlist <- idlist[,1]
print(paste("number of egos:",length(idlist)))
odbcClose(con)

#### starts workers
require(doSMP)
rmSessions(all.names=TRUE)
workers <- startWorkers(6)
registerDoSMP(workers)

### main loop

foreach (tasks=1:length(idlist)) %dopar% {

## Parralel connections
  library(RODBC)
  library(sna)
  con<-odbcConnect("Rodbc", uid="root", pwd="[YOUR PASS]")

   rid <- idlist[tasks]
   Fetch<- sqlQuery(con, paste("SELECT uid2 FROM fb_friendship WHERE uid1 = ",  rid, " UNION SELECT uid1 FROM fb_friendship WHERE uid2 = ",  rid, sep=""))
   if (nrow(Fetch)> 1){
  
   Fetch<-Fetch[,1]
   fno <- length(Fetch) # NUMBER OF FRIENDS
   ### only less than 2000 friends
   if (fno > 10 & fno<2000){
   network_size <- fno + 1 # NETWORK SIZE
   pairs <- fno*(fno-1)   # possible connections except ego - note that it is bidirectional network so do not divide by two!!!!!
  
   # GET friends connection from DB
   infriends <- paste("(", rid, ",", paste(Fetch, collapse=","),")", sep="")
   d <-  sqlQuery(con, paste("SELECT uid1, uid2 FROM fb_friendship WHERE uid1 IN", infriends  ,"AND uid2 IN", infriends ))
 
   d<-unique(d)    ### added to remove duplicates
   if (nrow(d)>1.1*fno & nrow(d)<50000 ){
   print(paste("EGO:", rid, "Number of connections:", nrow(d), "Number of friends:", fno))
   #prepare matrix for SNA
    rownames(d)<-NULL
    colnames(d)<-c("f1", "f2")
  ##### renaming connections so tuhey start from 1
    temp<-(cbind(c(rid,Fetch), 1:network_size))
    colnames(temp)<-c("f", "id")
    
    newd <- merge(d, temp, by.x="f1", by.y="f")
    newd <- merge(newd, temp, by.x="f2", by.y="f") 
    d<-newd[c(3,4)]
     
    ego<-unique(newd[newd[,"f1"]==rid, 3])    # ego_id_as coded in a network
    
    
    d<-rbind(as.matrix(d), as.matrix(d[,c(2,1)])) ##### reverse connections?
    d<-unique(d)    ### added to remove duplicates
    d<-cbind(d, 1) # add 1 to make sna happy
  
    d<-as.matrix(d)
    attr(d, "n") <- network_size
 
    ##### save for ucinet
#   filename<-paste("c:\\MPcooked\\networks\\", rid, ".txt", sep="") 
#   line1<- paste("DL n=" , network_size, "format = edgelist1 \n data:") 
#   write.table(line1, file = filename, quote = F, append = F, sep = " ", row.names = F, col.names = F)
#   write.table(d[,c(1,2)], file = filename, quote = F, append = T, sep = " ", row.names = F, col.names = F)
    
####### clean up before caculations
    rm(temp)
    rm(newd)
  
  # sanity check
  
   if (network_size==length(unique(c(d[,1], d[,2])))){
   
    #### density
    dens <- gden(d, mode="digraph", ignore.eval=TRUE)
  
    #### betweenness
    b <- betweenness(d, gmode="digraph", nodes= ego)/2 ### to keep it compatibile with UCI
    bnorm <- b*2/pairs # NORMALIZED B = B/PAIRS BY TWO - UNDIRECTED
  
    if (length(b)<1){b<-0
                     bnorm<-0}
    
    ##### brokerage == number of disconnected pairs
    brok<-(pairs-(nrow(d)-2*fno))/2  ### # of pairs - number of connections without conenctions with ego
  
    ###### transitivity "strong", "weakcensus", "strongcensus"
    gt <- gtrans(d, mode="digraph", measure = "weak")
  
    #### results
   query.result <- sqlQuery(con, paste(
   "UPDATE sna SET network_size=", network_size, 
   ", betweenness=", b ,
   ", n_betweenness=", bnorm*100,
   ", density=", dens,
   ", brokerage=", brok, 
   ", nbrokerage=", brok/pairs, 
   ", transitivity=", gt,
   ", done= 1 WHERE userid=", rid))
  } else {print(paste("EGO:", rid, "ERROR, FRIENDS MATRIX PROBLEM, network_size:", network_size, "NODES IN MATRIX:", length(unique(c(d[,1], d[,2])))))
          query.result <- sqlQuery(con, paste("UPDATE sna SET done= 102,network_size =",network_size," WHERE userid=", rid))}
  } else {print(paste("EGO:", rid, "ERROR, CONNECTIONS OUT OF BOUND:", nrow(d)))
          query.result <- sqlQuery(con, paste("UPDATE sna SET done= 101, network_size =",network_size," WHERE userid=", rid))}
  } else {print(paste("EGO:", rid, "ERROR, TOO MANY FRIENDS:", fno))
          query.result <- sqlQuery(con, paste("UPDATE sna SET done= 100 WHERE userid=", rid))}
  } else {print(paste("EGO:", rid, "ERROR, FETCH EMPTY"))
          query.result <- sqlQuery(con, paste("UPDATE sna SET done= 99 WHERE userid=", rid))}
  print(paste("cleaning up... MEM SIZE:", memory.size()))
  rm(d)
  rm(Fetch)
 odbcClose(con)
 for (i in 1:10) {gc()}
 }
  
 stopWorkers(workers)
 rmSessions(all.names=TRUE)
sna_r.txt · Last modified: 2012/10/15 14:49 by Michal Kosinski