import networkx as nx
from faker import Faker
import random
import pandas as pd
from pykeen.triples import TriplesFactory
from pykeen.pipeline import pipeline
random.seed(2023)
# Create a synthetic graph with 500 nodes, 4 average degree, and 0.5 rewiring probability
G = nx.watts_strogatz_graph(n=500, k=4, p=0.5, seed=42)
# node names
faker = Faker()
Faker.seed(0)
node_names = []
# for loop to generate 500 unique names
for i in range(501):
name = faker.name()
if name not in node_names:
node_names.append(name)
# relabel node names of the graph G
mapping = {i: node_names[i] for i in range(len(node_names))}
G = nx.relabel_nodes(G, mapping)
# remove any possible selfloops
G.remove_edges_from(nx.selfloop_edges(G))
#edge attribute names
attributes = ['friend', 'family', 'acquaintance',
'colleague', 'classmate', 'neighbor',
'schoolmate']
# probability distribution for each attribute
prob_dist = {'friend': 0.3, 'family': 0.05, 'acquaintance': 0.2,
'colleague': 0.05, 'university': 0.15, 'neighbor': 0.15,
'school': 0.1}
# assign edge attributes
for edge in G.edges:
G[edge[0]][edge[1]]['relation'] = random.choices(attributes,weights = list(prob_dist.values()),k=1)[0]
# graph to triple dataframe
triples = []
for edge in G.edges:
triples.append([edge[0], G[edge[0]][edge[1]]['relation'], edge[1]])
# create dataframe
df = pd.DataFrame(triples)
df.columns = ['h', 'r', 't']
# create triples using pykeen
tf = TriplesFactory.from_labeled_triples(df.values)
training,testing = tf.split([0.8,0.2], random_state=42)
# train the TransR model using PyKEEN
result = pipeline(
training=training,
testing=testing,
model = "TransR",
model_kwargs=dict(embedding_dim=128),
optimizer = "adamw",
training_kwargs=dict(num_epochs=50, use_tqdm_batch=False),
random_seed=42,
device='cpu',
negative_sampler = 'bernoulli',
negative_sampler_kwargs = dict(num_negs_per_pos = 5))
# store rank based metrics in data frame
df_metrics = result.metric_results.to_df()
df_metrics.to_csv('output/df_metrics.csv')