Blist Multilingual Theme
*

NetworkX - Visualizing PytorchGeometric Datasets

Posted on *  •  9 minutes  • 1888 words
import networkx as nx

In NetworkX, nodes can be any hashable object (eg., text, image, an XML object, another graph, a customized node object)

NetworkX includes many graph generator functions and facilities to read and write graphs in many formats.

Create a graph

G = nx.Graph()
# nx.draw(G) #=> will be empty canvas

# add one node at a time,
G.add_node(1)
# nx.draw(G)
# Add from a collection
G.add_nodes_from([2,3,4,5])
#nx.draw(G)
G.add_edge(5,1)
#nx.draw(G)
G.add_nodes_from([6,7,8])
G.add_edges_from([(6,7),(7,8), (1,4), (2,3), (3,6),(5,6)])
nx.draw(G)

png

print(f"Number of nodes = {G.number_of_nodes()}")
print(f"Number of edges = {G.number_of_edges()}")
print(G.edges)
print(G.nodes)
Number of nodes = 8
Number of edges = 7
[(1, 5), (1, 4), (2, 3), (3, 6), (5, 6), (6, 7), (7, 8)]
[1, 2, 3, 4, 5, 6, 7, 8]

Export graph as JSON

from networkx.readwrite import json_graph
json_data = json_graph.node_link_data(G)
json_data
{'directed': False,
 'multigraph': False,
 'graph': {},
 'nodes': [{'id': 1},
  {'id': 2},
  {'id': 3},
  {'id': 4},
  {'id': 5},
  {'id': 6},
  {'id': 7},
  {'id': 8}],
 'links': [{'source': 1, 'target': 5},
  {'source': 1, 'target': 4},
  {'source': 2, 'target': 3},
  {'source': 3, 'target': 6},
  {'source': 5, 'target': 6},
  {'source': 6, 'target': 7},
  {'source': 7, 'target': 8}]}

Read a json graph

json_data_to_graph = json_graph.node_link_graph(json_data)
json_data_to_graph
<networkx.classes.graph.Graph at 0x7e7f04523580>

Generate Graphml

for line in nx.generate_graphml(G):
  print(line)
<graphml xmlns="http://graphml.graphdrawing.org/xmlns" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
  <graph edgedefault="undirected">
    <node id="1" />
    <node id="2" />
    <node id="3" />
    <node id="4" />
    <node id="5" />
    <node id="6" />
    <node id="7" />
    <node id="8" />
    <edge source="1" target="5" />
    <edge source="1" target="4" />
    <edge source="2" target="3" />
    <edge source="3" target="6" />
    <edge source="5" target="6" />
    <edge source="6" target="7" />
    <edge source="7" target="8" />
  </graph>
</graphml>

Remove Nodes or Clear graph

G.remove_nodes_from([1,3])
G.clear()

More of it

G.add_edges_from([(1,2),(1,3)])
print(G.edges())
print(G.nodes())
nx.draw(G)
[(1, 2), (1, 3)]
[1, 2, 3]

png

G.add_node("spam")       # adds node "spam"
G.add_nodes_from("spam") # adds 4 nodes: 's', 'p', 'a', 'm'
print(G.edges())
print(G.nodes())
#nx.draw(G)

[(1, 2), (1, 3)]
[1, 2, 3, 'spam', 's', 'p', 'a', 'm']

Order, Density and Degree of Graph

Order => number of nodes Density for undirected graph:

$$ d = \frac{2e}{n(n-1)} $$ where $n$ is the number of nodes and $e$ is the number of edges

Degree : Returns a degree view

# Number of Nodes
G.order()
8
from networkx.classes.function import density
density(G)
0.07142857142857142
from networkx.classes.function import degree
degree(G, nbunch=None, weight=None)
DegreeView({1: 2, 2: 1, 3: 1, 'spam': 0, 's': 0, 'p': 0, 'a': 0, 'm': 0})
# In above graph G, degreeview shows 5 0's; 2 1's and 1 2;s
from networkx.classes.function import degree_histogram
degree_histogram(G)
[5, 2, 1]
from networkx.classes.function import neighbors
neighbors(G, 'spam')
<dict_keyiterator at 0x7e7f0442e070>
for node in G.nodes():
  print(node, G.neighbors(node))
1 <dict_keyiterator object at 0x7e7f044dc400>
2 <dict_keyiterator object at 0x7e7f044dc400>
3 <dict_keyiterator object at 0x7e7f044dc400>
spam <dict_keyiterator object at 0x7e7f044dc400>
s <dict_keyiterator object at 0x7e7f044dc400>
p <dict_keyiterator object at 0x7e7f044dc400>
a <dict_keyiterator object at 0x7e7f044dc400>
m <dict_keyiterator object at 0x7e7f044dc400>
for node in G.nodes():
  print(node, list(G.neighbors(node)))
1 [2, 3]
2 [1]
3 [1]
spam []
s []
p []
a []
m []

Graph Generators

https://networkx.org/documentation/stable/reference/generators.html

G.clear()
G = nx.binomial_tree(4)
nx.draw(G)

png

# default is 3x3 = 9 nodes
G = nx.sudoku_graph()
print(G.number_of_nodes())
print(G.number_of_edges())
print("--------------------")
A = nx.adjacency_matrix(G)
print(A.todense())
nx.draw(G)
81
810
--------------------
[[0 1 1 ... 0 0 0]
 [1 0 1 ... 0 0 0]
 [1 1 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 1 1]
 [0 0 0 ... 1 0 1]
 [0 0 0 ... 1 1 0]]

png

Directed Graphs

Neighbor And Adjacency

G = nx.DiGraph()
G.add_edge('a', 'b', weight=1)
G.add_edge('c', 'b', weight=5)
G.add_edge('m', 'n', weight=25)
G.add_edge('m', 'b', weight=50)
nx.draw(G)
print(nx.is_weighted(G))
print(nx.is_directed(G))
print(G.order())
print(G.number_of_edges())
print(G.number_of_nodes())
print(G.edges)
print(G.nodes)
True
True
5
4
5
[('a', 'b'), ('c', 'b'), ('m', 'n'), ('m', 'b')]
['a', 'b', 'c', 'm', 'n']

png

print([n for n in G.neighbors('a')])
print("===========")

for node in G.nodes():
  print(node, list(G.neighbors(node)))

print("===========")
print([n for n in G.neighbors('m')])
['b']
===========
a ['b']
b []
c ['b']
m ['n', 'b']
n []
===========
['n', 'b']
for x in G.nodes:
  print('Neighbors for ' + x + ':')
  print([n for n in G.neighbors(x)])
Neighbors for a:
['b']
Neighbors for b:
[]
Neighbors for c:
['b']
Neighbors for m:
['n', 'b']
Neighbors for n:
[]
A = nx.adjacency_matrix(G)
print(A)
print("====")
print(A.todense())
  (0, 1)	1
  (2, 1)	5
  (3, 1)	50
  (3, 4)	25
====
[[ 0  1  0  0  0]
 [ 0  0  0  0  0]
 [ 0  5  0  0  0]
 [ 0 50  0  0 25]
 [ 0  0  0  0  0]]
# Is it self-looped
A.diagonal()
array([0, 0, 0, 0, 0])
# From G to numpy array gives A
A = nx.to_numpy_array(G)
print(A)
[[ 0.  1.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  5.  0.  0.  0.]
 [ 0. 50.  0.  0. 25.]
 [ 0.  0.  0.  0.  0.]]

Multiple Edge Attributes

import numpy as np

G = nx.Graph()
G.add_edge(0, 1, weight=10)
G.add_edge(1, 2, cost=5)
G.add_edge(2, 3, weight=3, cost=-4.0)
dtype = np.dtype([("weight", int), ("cost", float)])
# To create adjacency matrices from structured dtypes, use `weight=None`
A = nx.to_numpy_array(G, dtype=dtype, weight=None)
print("weight --------")
print(A["weight"])
print("cost --------")
print(A["cost"])

print(G.edges)
print(G.nodes)
weight --------
[[ 0 10  0  0]
 [10  0  1  0]
 [ 0  1  0  3]
 [ 0  0  3  0]]
cost --------
[[ 0.  1.  0.  0.]
 [ 1.  0.  5.  0.]
 [ 0.  5.  0. -4.]
 [ 0.  0. -4.  0.]]
[(0, 1), (1, 2), (2, 3)]
[0, 1, 2, 3]
G = nx.Graph(
    [
        ("A", "B", {"cost": 1, "weight": 7}),
        ("C", "E", {"cost": 9, "weight": 10}),
    ]
)
print(G.edges)
print(G.nodes)
print("---------- #For entire graph ----------")
df = nx.to_pandas_edgelist(G)
print(df)

print("--------#for selected list BE------------")
df = nx.to_pandas_edgelist(G, nodelist=["B", "E"]) #for selected list
print(df)
print("--------------------")
df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
print(df)
print("--------------------")
df[["source", "target", "cost", "weight"]]
[('A', 'B'), ('C', 'E')]
['A', 'B', 'C', 'E']
---------- #For entire graph ----------
  source target  cost  weight
0      A      B     1       7
1      C      E     9      10
--------#for selected list BE------------
  source target  cost  weight
0      B      A     1       7
1      E      C     9      10
--------------------
  source target  cost  weight
0      A      B     1       7
1      C      E     9      10
--------------------
# Only weights
A = nx.adjacency_matrix(G)
print(A.todense())
[[ 0  7  0  0]
 [ 7  0  0  0]
 [ 0  0  0 10]
 [ 0  0 10  0]]

Viewing Datasets from Pytorch Geometry

1. Enzyme Dataset

!python -c "import torch; print(torch.__version__)"
2.1.0+cu121
!pip install torch-scatter -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
!pip install torch-geometric
from torch_geometric.datasets import TUDataset
dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')
Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Processing...
Done!
print(f"{dataset} : {len(dataset)}")
print(f"Num classes : {dataset.num_classes}")
print(f"Num classes : {dataset.num_node_features}")
ENZYMES(600) : 600
Num classes : 6
Num classes : 3
data = dataset[0]
data
Data(edge_index=[2, 168], x=[37, 3], y=[1])
from torch_geometric.utils import to_networkx
print(type(data))

networkX_graph = to_networkx(data)
print(type(networkX_graph))
<class 'torch_geometric.data.data.Data'>
<class 'networkx.classes.digraph.DiGraph'>
import networkx as nx
nx.draw(networkX_graph)

png

2. Karate Dataset

# Helper function for visualization.
%matplotlib inline
import matplotlib.pyplot as plt
from torch_geometric.datasets import KarateClub
dataset = KarateClub()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of Node Features: {dataset.num_node_features}')
print(f'Number of Edge Features: {dataset.num_edge_features}')
Dataset: KarateClub():
======================
Number of graphs: 1
Number of features: 34
Number of classes: 4
Number of Node Features: 34
Number of Edge Features: 0
data = dataset[0]  # Get the first graph object.
print(data)
# (1) The edge_index property holds the information about the graph connectivity, i.e., a tuple of source and destination node indices for each edge.
# (2) node features as x (each of the 34 nodes is assigned a 34-dim feature vector)
# (3) node labels as y (each node is assigned to exactly one class).
Data(x=[34, 34], edge_index=[2, 156], y=[34], train_mask=[34])
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is Directed: {data.is_directed()}')
print(f'Is undirected: {data.is_undirected()}')
print('==============================================================')

print(f'Edge weight: {data.edge_weight}')
print(f'Graph contains isolated nodes: {data.contains_isolated_nodes()}')

print('==============================================================')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')

==============================================================
Number of nodes: 34
Number of edges: 156
Average node degree: 4.59
Has isolated nodes: False
Has self-loops: False
Is Directed: False
Is undirected: True
==============================================================
Edge weight: None
Graph contains isolated nodes: False
==============================================================
Number of training nodes: 4
Training node label rate: 0.12


/usr/local/lib/python3.10/dist-packages/torch_geometric/deprecation.py:26: UserWarning: 'contains_isolated_nodes' is deprecated, use 'has_isolated_nodes' instead
  warnings.warn(out)
data.to_dict()
{'x': tensor([[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 1., 0.],
         [0., 0., 0.,  ..., 0., 0., 1.]]),
 'edge_index': tensor([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,
           1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,
           3,  3,  3,  3,  3,  4,  4,  4,  5,  5,  5,  5,  6,  6,  6,  6,  7,  7,
           7,  7,  8,  8,  8,  8,  8,  9,  9, 10, 10, 10, 11, 12, 12, 13, 13, 13,
          13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21,
          21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27,
          27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,
          31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,
          33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33],
         [ 1,  2,  3,  4,  5,  6,  7,  8, 10, 11, 12, 13, 17, 19, 21, 31,  0,  2,
           3,  7, 13, 17, 19, 21, 30,  0,  1,  3,  7,  8,  9, 13, 27, 28, 32,  0,
           1,  2,  7, 12, 13,  0,  6, 10,  0,  6, 10, 16,  0,  4,  5, 16,  0,  1,
           2,  3,  0,  2, 30, 32, 33,  2, 33,  0,  4,  5,  0,  0,  3,  0,  1,  2,
           3, 33, 32, 33, 32, 33,  5,  6,  0,  1, 32, 33,  0,  1, 33, 32, 33,  0,
           1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33,  2, 23,
          24, 33,  2, 31, 33, 23, 26, 32, 33,  1,  8, 32, 33,  0, 24, 25, 28, 32,
          33,  2,  8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33,  8,  9, 13, 14, 15,
          18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32]]),
 'y': tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
         2, 2, 0, 0, 2, 0, 0, 2, 0, 0]),
 'train_mask': tensor([ True, False, False, False,  True, False, False, False,  True, False,
         False, False, False, False, False, False, False, False, False, False,
         False, False, False, False,  True, False, False, False, False, False,
         False, False, False, False])}
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
<IPython.core.display.Javascript object>
def visualize_graph(G, color):
    plt.figure(figsize=(5,5))
    plt.xticks([])
    plt.yticks([])
    nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
                     node_color=color, cmap="Set2")
    plt.show()

karate_undirected_graph = to_networkx(data, to_undirected=True)
visualize_graph(karate_undirected_graph, color=data.y)

png

plt.figure(figsize=(5,5))
nx.draw(karate_undirected_graph, cmap=plt.get_cmap('viridis'), with_labels=True, node_color=data.y, font_color='white')
# 4 Classes are visible

png


Follow me

I work on everything - molecular simulations, data science and coding