#import urllib.request as urllib class Community ( ) : ''' use set operation to optimize calculation ''' def __init__ ( self , G , alpha = 1.0 ) : self . _G = G self . _alpha = alpha self . _nodes = set ( ) self . _k_in = 0 self . _k_out = 0 def add_node ( self , node ) : neighbors = set ( self . _G . neighbors ( node ) ) #print("添加令居节点",neighbors , self._nodes,neighbors & self._nodes) node_k_in = len ( neighbors & self . _nodes ) #neighbor和self._nodes公有节点的数目存入node_k_in #print("node_k_in",node_k_in) node_k_out = len ( neighbors ) - node_k_in #print("node_k_out",node_k_out) self . _nodes . add ( node ) self . _k_in += 2 * node_k_in self . _k_out = self . _k_out + node_k_out - node_k_in def remove_node ( self , node ) : neighbors = set ( self . _G . neighbors ( node ) ) community_nodes = self . _nodes #print("community_nodes",community_nodes) node_k_in = len ( neighbors & community_nodes ) node_k_out = len ( neighbors ) - node_k_in self . _nodes . remove ( node ) self . _k_in -= 2 * node_k_in self . _k_out = self . _k_out - node_k_out + node_k_in def cal_add_fitness ( self , node ) : #fitness适应度 neighbors = set ( self . _G . neighbors ( node ) ) old_k_in = self . _k_in old_k_out = self . _k_out vertex_k_in = len ( neighbors & self . _nodes ) #vertex顶点 vertex_k_out = len ( neighbors ) - vertex_k_in new_k_in = old_k_in + 2 * vertex_k_in new_k_out = old_k_out + vertex_k_out - vertex_k_in new_fitness = new_k_in / ( new_k_in + new_k_out ) ** self . _alpha #幂次 old_fitness = old_k_in / ( old_k_in + old_k_out ) ** self . _alpha return new_fitness - old_fitness def cal_remove_fitness ( self , node ) : neighbors = set ( self . _G . neighbors ( node ) ) new_k_in = self . _k_in new_k_out = self . _k_out node_k_in = len ( neighbors & self . _nodes ) node_k_out = len ( neighbors ) - node_k_in old_k_in = new_k_in - 2 * node_k_in old_k_out = new_k_out - node_k_out + node_k_in old_fitness = old_k_in / ( old_k_in + old_k_out ) ** self . _alpha new_fitness = new_k_in / ( new_k_in + new_k_out ) ** self . _alpha return new_fitness - old_fitness def recalculate ( self ) : for vid in self . _nodes : fitness = self . cal_remove_fitness ( vid ) if fitness < 0.0 : return vid return None def get_neighbors ( self ) : neighbors = set ( ) for node in self . _nodes : neighbors . update ( set ( self . _G . neighbors ( node ) ) - self . _nodes ) return neighbors def get_fitness ( self ) : return float ( self . _k_in ) / ( ( self . _k_in + self . _k_out ) ** self . _alpha ) class LFM ( ) : def __init__ ( self , G , alpha ) : self . _G = G self . _alpha = alpha def execute ( self ) : communities = [ ] print ( "嘿嘿" , list ( self . _G . node . keys ( ) ) ) print ( "---------------------" ) node_not_include = list ( self . _G . node . keys ( ) ) while ( len ( node_not_include ) != 0 ) : c = Community ( self . _G , self . _alpha ) #print("self._alpha",self._alpha)#0.9 # randomly select a seed node seed = random . choice ( node_not_include ) c . add_node ( seed ) print ( "随机选取节点是:" , seed ) to_be_examined = c . get_neighbors ( ) print ( "c.get_neighbors()" , c . get_neighbors ( ) ) while ( to_be_examined ) : #largest fitness to be added m = { } for node in to_be_examined : fitness = c . cal_add_fitness ( node ) #计算点的适应度》0加入,小于0删除 m [ node ] = fitness to_be_add = sorted ( m . items ( ) , key = lambda x : x [ 1 ] , reverse = True ) [ 0 ] #啥意思??? #适应度降序排列 #stop condition if ( to_be_add [ 1 ] < 0.0 ) : break c . add_node ( to_be_add [ 0 ] ) to_be_remove = c . recalculate ( ) while ( to_be_remove != None ) : c . remove_node ( to_be_remove ) to_be_remove = c . recalculate ( ) to_be_examined = c . get_neighbors ( ) for node in c . _nodes : if ( node in node_not_include ) : node_not_include . remove ( node ) communities . append ( c . _nodes ) return communities if ( __name__ == "__main__" ) : #G = nx.karate_club_graph()#一个边集一个点集 # G = nx.florentine_families_graph() zf = zipfile . ZipFile ( 'football.zip' ) # zipfile object txt = zf . read ( 'football.txt' ) . decode ( ) # read info file gml = zf . read ( 'football.gml' ) . decode ( ) # read gml data # throw away bogus first line with # from mejn files gml = gml . split ( '\n' ) [ 1 : ] G = nx . parse_gml ( gml ) # parse gml data print ( txt ) # print degree for each team - number of games for n , d in G . degree ( ) : print ( '%s %d' % ( n , d ) ) options = { 'node_color' : 'red' , 'node_size' : 50 , 'line_color' : 'grey' , 'linewidths' : 0 , 'width' : 0.1 , nx . draw ( G , ** options ) #networkx.draw(G, with_labels=True) plt . show ( ) algorithm = LFM ( G , 0.9 ) communities = algorithm . execute ( ) for c in communities : print ( len ( c ) , sorted ( c ) )

社区网络:
初步社区网络结构图
重叠社区划分结果:
社区划分结果

基于LFM的重叠社区发现算法python代码实现import randomimport networkx as nximport matplotlib.pyplot as pltimport zipfile#import urllib.request as urllibclass Community(): ''' use set operation to optimize ca... 解决java.lang.IllegalArgumentException: Result Maps collection does not contain value for com.example.