Transfer or backup data to MySQL#

This notebook walks through transferring data from xGT to MySQL.

from trovares_connector import ODBCConnector, SQLODBCDriver
import xgt
import time
import pyodbc
import requests
import os

Connect to xGT and MySQL#

connection_string = 'Driver={MariaDB};Server=127.0.0.1;Port=3306;Database=test;Uid=test;Pwd=foo;'
xgt_server = xgt.Connection()
xgt_server.set_default_namespace('odbc')
odbc_driver = SQLODBCDriver(connection_string)   
c = ODBCConnector(xgt_server, odbc_driver)       
pyodbc_driver = pyodbc.connect(connection_string)
cursor = pyodbc_driver.cursor()

Transfer the dataset locally#

file_object = requests.get('https://datasets.trovares.com/TT/tt.10M')
with open('tt.10M', 'wb') as local_file:
    local_file.write(file_object.content)

Load the data into MySQL#

cursor.execute("DROP TABLE IF EXISTS Events1")
cursor.execute("DROP TABLE IF EXISTS Events2")
cursor.execute("CREATE TABLE Events1 (src BIGINT, trg BIGINT, timestamp BIGINT)")
cursor.execute("CREATE TABLE Events2 (src BIGINT, trg BIGINT, timestamp BIGINT)")
t0 = time.time()
cursor.execute("LOAD DATA LOCAL INFILE './tt.10M' INTO TABLE Events1 FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n';") 
pyodbc_driver.commit()
t_duration = time.time() - t0
print(f"Load time: {t_duration:,.2f}")
Load time: 25.33

Query to run#

cypher_query = "MATCH (row:Events) WHERE row.timestamp < 1 return count(*)"
sql_query = "SELECT COUNT(*) FROM Events WHERE timestamp < 1"

Transfer data to MySQL#

file = 'xgtd://' + os.getcwd() + '/tt.10M'
xgt_server.drop_frame('Events2')
t0 = time.time()
xgt_server.create_table_frame_from_data(file, 'Events2',
                                        schema=[('src', xgt.INT), ('trg', xgt.INT), ('timestamp', xgt.INT)])
t_duration = time.time() - t0
print(f"Load xgt time: {t_duration:,.2f}")
t0 = time.time()
c.transfer_to_odbc(tables=['Events2'])
t_duration = time.time() - t0
print(f"Transfer to MySQL time: {t_duration:,.2f}")
Load xgt time: 0.94
Transferring: [############################################################] 10000000/10000000 in 0:00:33.3s (300334.1/s, eta: 0:00:00.0s)     
Transfer to MySQL time: 33.30

Here we see the performance for transfer is comparable to loading from CSV in MySQL and that CSV loading is much faster in xGT compared to MySQL.