In my previous Community posts I have presented Data Lineage Python scripts that help understanding connections between all Strategy objects like Reports, Attributes, Metrics, Filters, Tables and others. Those connections were "hypothetical" meaning that relation between an attribute and table exist, but in reality this connection may never been used in real data queries (because other, faster, relations to other tables exist).
This example focuses on real relations and real SQL queries. Please see below Python code that you can use to extract this information from your projects. This has been tested with Strategy One update 11 and mstrio 11.3.10.101
Please note - I've added color formatting in the code below ( http://hilite.me/). Code copied from here might not work due to indentation issues. In that case, use attached file with sample code. Also this CRM is adding ';' after URLs.
from mstrio.project_objects import list_olap_cubes, OlapCube, SuperCube, list_all_cubes, list_reports, Report
from mstrio import connection
#from mstrio.project_objects.report import list_reports
import re
import datetime
import getpass
import csv, json, pickle, itertools
import pandas as pd
# libraries that you should install to parse SQL and get tables
from sql_metadata import Parser
# Define a variable which can be later used in a script
SERVER_URL = "https://env-XXXXXX.customer.cloud.Strategy.com/StrategyLibrary"
USERNAME = "mstr"
PASSWORD = getpass.getpass(prompt='Password ')
PROJECT_NAME = "Strategy Tutorial"
def export_to_csv(level_id, df):
df.to_csv(f"{level_id}.csv", index=False, encoding='utf-8', sep=",", quoting=csv.QUOTE_ALL, escapechar="\\", header=True)
def unique_list(alist): # sorts a list of lists
alist.sort()
alist=list(alist for alist,_ in itertools.groupby(alist))
return alist
#conn = get_connection(workstationData, project_name=PROJECT_NAME)
conn = connection.Connection(SERVER_URL, username=USERNAME, password=PASSWORD, project_name=PROJECT_NAME)
conn.connect()
# List OLAP Cubes with different conditions
#list_of_all_cubes = list_all_cubes(connection=conn)
list_of_all_cubes_as_dicts = list_all_cubes(connection=conn, to_dictionary=True)
print("\nFound " + str(len(list_of_all_cubes_as_dicts)) + " cubes\n")
# Limit number of cubes to be analyzed, 779 is data import cube (might contain custom SQL code)
cubes_subtypes=[776, 779]
list_of_all_cubes_as_dicts=[cu for cu in list_of_all_cubes_as_dicts if cu['subtype'] in cubes_subtypes]
print(f"Number of cubes: {len(list_of_all_cubes_as_dicts)}")
# Print
for cube in list_of_all_cubes_as_dicts:
print(f"{cube['name']} - ID: {cube['id']} - type: {cube['type']} - subtype: {cube['subtype']} - ext_type: {cube['ext_type']}")
tables_list, columns_list=[],[]
cubes_list=list_of_all_cubes_as_dicts[:]
pattern = r"(select\s+.*?)\n\n"
for i,cube in enumerate(cubes_list):
cube_id, cube_name, cube_supbtype = cube["id"], cube["name"], cube["subtype"]
print(f"\n{'='*5}| {i+1} / {len(cubes_list)} - {cube_name} - id: {cube_id} |{'='*30}")
try:
current_cube = OlapCube(conn, cube_id)
sql_view = current_cube.export_sql_view()
#print(sql_view)
except:
print(f"Failed to get the SQL for cube {cube_name} {cube_id}")
sql_view = ""
if sql_view:
matches = re.findall(pattern, sql_view, flags=re.DOTALL | re.IGNORECASE)
for match in matches:
match=" ".join(match.split())
parser = Parser(match)
for table in parser.tables:
if not table.startswith(("ZZ","*")):
#print(f"\"{cube_id}\",\"{cube_name}\",\"{table}\"")
tables_list.append([PROJECT_NAME,cube_id,cube_name,cube_supbtype,table])
for column in parser.columns:
if not column.startswith(("ZZ","*")):
#print(f"\"{cube_id}\",\"{cube_name}\",\"{column}\"")
columns_list.append([PROJECT_NAME,cube_id,cube_name,cube_supbtype,column])
print("FINITO")
### Export to CSV
tables_list=unique_list(tables_list)
columns_list=unique_list(columns_list)
headers=["Project Name", "Cube ID", "Cube Name", "Cube Suntype", "Table Name"]
df = pd.DataFrame(tables_list, columns = headers)
headers=["Project Name", "Cube ID", "Cube Name", "Cube Subtype", "Column Name"]
dfc = pd.DataFrame(columns_list, columns = headers)
export_to_csv("tables", df)
export_to_csv("columns", dfc)
# List all reports in the project
# Future development
reports = list_reports(connection=conn, to_dictionary=True)
rep=Report(conn, "CEF3C2B64463B7202F49AF81FCA9BE35")