LocalGraphClustering
LocalGraphClustering copied to clipboard
Graph drawing tools
We have a number of common visualization patterns that we'd like to do. I've been using NetworkX for this, but this seems like overkill as there is a big translation between their ids and our ids, which makes the process slightly tedious and error-prone.
- standard graph drawing given xy or xyz coordinates for each vertex.
- a standard graph drawing (xy or xyz coords) for each vertex and a subset set of nodes highlighted.
- a standard graph drawing (xy or xyz coords) for each vertex and a vector of data highlighted (e.g. a float value for each node).
G = GraphLocal()
Here xy, xyz are arrays with a row for each vertex with 1 or 2 coordinates.
G.draw(xy)
G.draw(xyz)
G.draw(xy, nodemarkersize=0)
G.draw(xy, set=S)
G.draw(xy, set=S)
G.draw(xy, values=f)
G.draw(xyz, groups=g) # this is a partition.
Parameters
G.draw(coords, ...)
coords: a n-by-2 or n-by-3 array with coordinates for each node of the graph.
Optional parameters:
alpha: [0, 1] the overall alpha scaling of the plot
nodealpha: [0, 1]
edgealpha:
setalpha:
nodecolor:
edgecolor:
setcolor:
nodesize:
linewidth:
ax=None (default) will create a new figure, or this will plot in ax if not None.
Return a dictionary with:
fig, ax, nodes, edges, setnodes, setedges, groupnodes, groupedges
these are the handles to the actual plot elements, so that you could change
values after the fact.
"""
Here are some of the codes I'm using to do this now.
def ourncp(N, rholist=[1e-2,1e-3,1e-4]):
return lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(
rholist=rholist,deep=False,
neighborhoods=False,localmins=False, nthreads=64, timeout=30)
def ncp_min_feature_by_group_binned(df, feature, group, edges=None, nbins=50, log=False):
xs = df[group].values.copy()
xs.sort()
xs = xs.astype(np.float64)
if log is True:
xs = np.log10(xs)
if edges is None:
edges = np.power(10.0,np.histogram(xs, bins=nbins)[1]) # second output
else:
if edges is None:
edges = np.histogram(xs, bins=nbins)[1]
print(edges)
buckets = pd.cut(df[group], edges)
return df.groupby(buckets).apply(lambda x: lgc.ncpplots._ncp_min(x, feature)), edges
def minline(ncp, feature, group, nbins=100, edges=None, log=True):
ncpdata = ncp.as_data_frame()
dfmin, edges = ncp_min_feature_by_group_binned(ncpdata, feature, group,
nbins=nbins, edges=edges, log=log)
dfmin = dfmin.dropna(axis=0)
y = dfmin[feature]
x = dfmin[group]
pos = dfmin["best"]
tmp = list(zip(x,y))
tmp.sort(key = lambda x: x[0])
x = [i[0] for i in tmp]
y = [i[1] for i in tmp]
return x,y, edges
def ncpplot(N,**kwargs):
ncp = ourncp(N, **kwargs)
lgc.NCPPlots(ncp).cond_by_size()
return ncp
def ncpplotline(ax,N,name,nbins=20, **kwargs):
ncp = ourncp(N, **kwargs)
linedata = lgc.NCPPlots(ncp).feature_by_group_min_line(
"output_cond","output_sizeeff", ax=ax, label=name, nbins=nbins)
ax.loglog()
return ncp, linedata
def matrix2dict(A,G):
return {v:list(A[i]) for i,v in enumerate(G.nodes())}
def fiedler_view(N):
F = lgc.algorithms.eig2_nL(lgc.GraphLocal().from_networkx(N),dim=2)[0]
return nx.draw(N,matrix2dict(F,N),node_size=12, width=0.5, alpha=0.5), F
def ncpsetview(N,pos,ax=None,ncp=None,nbins=20,rholist=[1e-2,1e-3,1e-4]):
fig,axs = plt.subplots(4,5, figsize=(8,8))
axs = axs.flatten()
# We need to be a bit hacky here because feature_by_group_min_line
# assumes you are plotting. So if the ax=None, then we don't want
# to plot the line at all, instead, what we do is just plot
# to the first set of axes that will get the graph eventually.
# then we clear
if ax is None:
lineax = axs[0]
else:
lineax = ax
if ncp is None:
ncp = lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(rholist=rholist)
# make a nodeid map
id2nx = [ v for v in N.nodes() ]
setdata = lgc.NCPPlots(ncp).feature_by_group_min_line(
"output_cond","output_sizeeff", ax=lineax, label="", nbins=nbins)
if ax is None:
lineax.clear()
for ax in axs:
ax.axis('off')
for i,d in enumerate(setdata):
S,cond = ncp.output_set(int(d[2])) # get the output set
SN = [ id2nx[v] for v in S ]
R = set(N)
R -= set(SN)
# make sure we always look at the small size
if len(R) < len(SN):
R, SN = SN, R # swap!
#nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=8)
#print(S)
nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=2, node_color='k', nodelist=list(R))
nx.draw_networkx_nodes(N, pos, ax=axs[i], node_size=14, nodelist=list(SN), node_color='r')
nx.draw_networkx_edges(N, pos, ax=axs[i], alpha=0.5, width=0.5)
axs[i].set_title('|S|=' + str(len(S)) + '\ncond=%.4f'%(d[1]),fontsize=10)
axs[i].axis('tight')
return fig
There is some additional stuff in the above, but it has the things you need :)
And in 3d
# From https://www.idtools.com.au/3d-network-graphs-python-mplot3d-toolkit/
from mpl_toolkits.mplot3d import Axes3D
def draw3d(G, pos, **kwargs):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
xs = [p[0] for v,p in pos.items()]
ys = [p[1] for v,p in pos.items()]
zs = [p[2] for v,p in pos.items()]
ax.scatter(xs, ys, zs, alpha=0.5)
for i,j in enumerate(G.edges()):
x = np.array((pos[j[0]][0], pos[j[1]][0]))
y = np.array((pos[j[0]][1], pos[j[1]][1]))
z = np.array((pos[j[0]][2], pos[j[1]][2]))
# Plot the connecting lines
ax.plot(x, y, z, c='black', alpha=0.5, linewidth=0.5)
#ax.view_init(30, 0)
ax.set_axis_off()
return fig
def fiedler_view3(N):
F = lgc.algorithms.eig2_nL(lgc.GraphLocal().from_networkx(N),dim=3)[0]
return draw3d(N,matrix2dict(F,N)), F
# This is still not done...
def ncpsetview3(N, pos, ax=None,ncp=None,nbins=20,rholist=[1e-2,1e-3,1e-4]):
assert(nbins == 20) # todo make this flexible
fig = plt.figure(figsize=(8,8))
axs = [ fig.add_subplot(4,5,i+1, projection='3d') for i in range(4*5)]
# We need to be a bit hacky here because feature_by_group_min_line
# assumes you are plotting. So if the ax=None, then we don't want
# to plot the line at all, instead, what we do is just plot
# to the first set of axes that will get the graph eventually.
# then we clear
if ax is None:
lineax = axs[0]
else:
lineax = ax
if ncp is None:
ncp = lgc.NCPData(lgc.GraphLocal().from_networkx(N)).approxPageRank(rholist=rholist)
setdata = lgc.NCPPlots(ncp).feature_by_group_min_line(
"output_cond","output_sizeeff", ax=lineax, label="", nbins=nbins)
if ax is None:
lineax.clear()
for ax in axs:
ax.axis('off')
# convert coordinates
xs = [pos[v][0] for v in N.nodes()]
ys = [pos[v][1] for v in N.nodes()]
zs = [pos[v][2] for v in N.nodes()]
for i,d in enumerate(setdata):
S,cond = ncp.output_set(int(d[2])) # get the output set
# for this we are going to use indices from 0 to N
R = set(range(len(pos)))
R -= set(S)
# make sure we always look at the small size
if len(R) < len(S):
R, S = S, R # swap!
axs[i].scatter([xs[v] for v in R], [ys[v] for v in R], [zs[v] for v in R], alpha=0.1, c='k')
axs[i].scatter([xs[v] for v in S], [ys[v] for v in S], [zs[v] for v in S], alpha=0.5, c='r')
#nx.draw_networkx_nodes(N, pos, ax=axs[i], alpha=0.5, node_size=2, node_color='k', nodelist=list(R))
#nx.draw_networkx_nodes(N, pos, ax=axs[i], node_size=14, nodelist=list(SN), node_color='r')
#nx.draw_networkx_edges(N, pos, ax=axs[i], alpha=0.5, width=0.5)
axs[i].set_title('|S|=' + str(len(S)) + '\ncond=%.4f'%(d[1]),fontsize=10)
axs[i].axis('tight')
return fig