Temporal Triangles ODBC#

This notebook walks through running the temporal triangles benchmark on xGT using MySQL.

from trovares_connector import ODBCConnector, SQLODBCDriver
import xgt
import time
import pyodbc
import requests

Connect to xGT and MySQL#

connection_string = 'Driver={MariaDB};Server=127.0.0.1;Port=3306;Database=test;Uid=test;Pwd=foo;'
xgt_server = xgt.Connection()
odbc_driver = SQLODBCDriver(connection_string)   
c = ODBCConnector(xgt_server, odbc_driver)       
pyodbc_driver = pyodbc.connect(connection_string)
cursor = pyodbc_driver.cursor()

Transfer the dataset locally#

file_object = requests.get('https://datasets.trovares.com/TT/tt.1M')
with open('tt.1M', 'wb') as local_file:
    local_file.write(file_object.content)

Load the data into MySQL#

cursor.execute("DROP TABLE IF EXISTS Edges")
cursor.execute("CREATE TABLE Edges (src BIGINT, trg BIGINT, timestamp BIGINT)")
cursor.execute("LOAD DATA LOCAL INFILE './tt.1M' INTO TABLE Edges FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';") 
pyodbc_driver.commit()

Query to run#

threshold=15
query = """
    MATCH (v1)-[e1:Edges]->(v2)
              -[e2:Edges]->(v3)
              -[e3:Edges]->(v1)
    WHERE v1 <> v2 AND v1 <> v3 AND v2 <> v3
      AND e1.timestamp <= e2.timestamp
      AND e2.timestamp <= e3.timestamp
      AND e3.timestamp - e1.timestamp < {:d}
    RETURN v1.key as v1id,
           e1.timestamp as e1_timestamp,
           v2.key as v2id,
           e2.timestamp as e2_timestamp,
           v3.key as v3id,
           e3.timestamp as e3_timestamp,
           e3.timestamp - e1.timestamp as delta
  """.format(threshold)

Transfer data to xGT#

t0 = time.time()
c.transfer_to_xgt([('Edges', ('Vertex', 'Vertex', 'src', 'trg'))], easy_edges=True)
t_duration = time.time() - t0
print(f"Transfer to xgt time: {t_duration:,.2f}")
Transferring: [############################################################] 1000000/1000000 in 0:00:00.8s (1306231.1/s, eta: 0:00:00.0s)     
Transfer to xgt time: 0.85

Run query on xGT#

t0 = time.time()
job = xgt_server.run_job(query)
q_duration = time.time() - t0
print(job.get_data())
print(f"Query to xgt time: {q_duration:,.2f}")
print(f"Total xgt time including transfer: {t_duration + q_duration:,.2f}")
[[26563, 7598, 51565, 7598, 15727, 7612, 14]]
Query to xgt time: 0.57
Total xgt time including transfer: 1.43