After struggling with this amazing facebookresearch / PyTorch-BigGraph project, and its impossible API, I managed to get a grip on how to run it (thanks to stand alone simple example)
My system restrictions do not allow me to train the dense (embedding) representation of all edges, and I need from time to time to upload past embeddings and train the model using both new edges and existing nodes, notice that nodes in past and new edge list do not necessarily overlap.
I tried to understand from here: see the context section how to do it, so far with no success.
Following is a stand-alone PGD code, that turned batch_edges
into an embedding node list, however, I need it to use pre-trained nodes list past_trained_nodes
.
import os
import shutil
from pathlib import Path
from torchbiggraph.config import parse_config
from torchbiggraph.converters.importers import TSVEdgelistReader, convert_input_data
from torchbiggraph.train import train
from torchbiggraph.util import SubprocessInitializer, setup_logging
DIMENSION = 4
DATA_DIR = 'data'
GRAPH_PATH = DATA_DIR + '/output1.tsv'
MODEL_DIR = 'model'
raw_config = dict(
entity_path=DATA_DIR,
edge_paths=[DATA_DIR + '/edges_partitioned', ],
checkpoint_path=MODEL_DIR,
entities={"n": {"num_partitions": 1}},
relations=[{"name": "doesnt_matter", "lhs": "n", "rhs": "n", "operator": "complex_diagonal", }],
dynamic_relations=False, dimension=DIMENSION, global_emb=False, comparator="dot",
num_epochs=7, num_uniform_negs=1000, loss_fn="softmax", lr=0.1, eval_fraction=0.,)
batch_edges = [["A", "B"], ["B", "C"], ["C", "D"], ["D", "B"], ["B", "D"]]
# I want the model to use these pretrained nodes, Notice that Node A exist, And F Does not
#I dont have all past nodes, as some are gained from data
past_trained_nodes = {'A': [0.5, 0.3, 1.5, 8.1], 'F': [3, 0.6, 1.2, 4.3]}
try:
shutil.rmtree('data')
except:
pass
try:
shutil.rmtree(MODEL_DIR)
except:
pass
os.makedirs(DATA_DIR, exist_ok=True)
with open(GRAPH_PATH, 'w') as f:
for edge in batch_edges:
f.write('\t'.join(edge) + '\n')
setup_logging()
config = parse_config(raw_config)
subprocess_init = SubprocessInitializer()
input_edge_paths = [Path(GRAPH_PATH)]
convert_input_data(config.entities, config.relations, config.entity_path, config.edge_paths,
input_edge_paths, TSVEdgelistReader(lhs_col=0, rel_col=None, rhs_col=1),
dynamic_relations=config.dynamic_relations, )
train(config, subprocess_init=subprocess_init)
How can I use my pre-trained nodes in the current model?
Thanks in advance!