Non-Graph analytics ODBC#

This notebook walks through using cypher queries on tables from a MySQL database. It provides an alternative to accelerating analytics instead of building a database index.

from trovares_connector import ODBCConnector, SQLODBCDriver
import xgt
import time
import pyodbc
import requests

Connect to xGT and MySQL#

connection_string = 'Driver={MariaDB};Server=127.0.0.1;Port=3306;Database=test;Uid=test;Pwd=foo;'
xgt_server = xgt.Connection()
odbc_driver = SQLODBCDriver(connection_string)   
c = ODBCConnector(xgt_server, odbc_driver)       
pyodbc_driver = pyodbc.connect(connection_string)
cursor = pyodbc_driver.cursor()

Transfer the dataset locally#

file_object = requests.get('https://datasets.trovares.com/TT/tt.1M')
with open('tt.1M', 'wb') as local_file:
    local_file.write(file_object.content)

Load the data into MySQL#

t0 = time.time()
cursor.execute("DROP TABLE IF EXISTS Events")
cursor.execute("CREATE TABLE Events (src BIGINT, trg BIGINT, timestamp BIGINT)")
cursor.execute("LOAD DATA LOCAL INFILE './tt.1M' INTO TABLE Events FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';") 
pyodbc_driver.commit()
t_duration = time.time() - t0
print(f"Load time: {t_duration:,.2f}")
Load time: 2.36

Query to run#

cypher_query = "MATCH (row:Events) WHERE row.timestamp < 1 return count(*)"
sql_query = "SELECT COUNT(*) FROM Events WHERE timestamp < 1"

Transfer data to xGT#

t0 = time.time()
c.transfer_to_xgt(['Events'])
t_duration = time.time() - t0
print(f"Transfer to xgt time: {t_duration:,.2f}")
Transferring: [############################################################] 1000000/1000000 in 0:00:00.7s (1393061.2/s, eta: 0:00:00.0s)     
Transfer to xgt time: 0.73

Run query on xGT#

t0 = time.time()
job = xgt_server.run_job(cypher_query)
q_duration = time.time() - t0
print(job.get_data())
print(f"Query to xgt time: {q_duration:,.2f}")
print(f"Total xgt time including transfer: {t_duration + q_duration:,.2f}")
[[94]]
Query to xgt time: 0.01
Total xgt time including transfer: 0.74

Run query in MySQL#

t0 = time.time()
cursor.execute(sql_query)
rows = cursor.fetchall()
for row in rows:
    print(row[0])
n_duration = time.time() - t0
print(f"MySQL query time: {n_duration:,.2f}")
94
MySQL query time: 0.28
print(f"Query speedup: {n_duration/q_duration:,.2f}X")
Query speedup: 24.79X