Starting with the release of Strategy ONE (March 2024), dossiers are also known as dashboards.
This example leverages the open source h2o.ai engine to predict those customers who are most at risk of stopping telco service. When predictive analytics are made available in the Strategy platform, organizations can more easily take action to prevent customer attrition: offer an appropriate bundled service, identify root cause more quickly, or more precisely articulate their competitive differentiators. These actions might surface on a call center screen, a call center supervisor screen, a marketing managers campaign application, or within technical support. All facets of an organization can benefit from a customer-focused and enterprise-grade predictive analytics engine.
H20 was chosen for this example because it is the free & open sourced advanced analytics engine behind H20 Driverless AI and H2O Enterprise. This example uses AutoML, an engine designed to be accessible to the citizen data scientist. The H20 project has over 100 contributors and is available under the Apache-2.0 license at GitHub:https://github.com/h2oai/h2o-3
# Import packages and initialize the h2o application server
import h2o import pandas as pd import pandas_profiling import matplotlib.pyplot as plt import time from h2o.automl import H2OAutoML from mstrio import Strategy from IPython.core.display import display, HTML h2o.init(log_level="INFO")
# connect to Strategy using mstrio and view the customer history dataframe ## update the code block below with your connection info and cube IDs, or simply replace the entire block using the mstrio jupyter plugin
from mstrio.Strategy import Connection
from mstrio.cube import Cube
from mstrio.report import Report
from mstrio.dataset import Dataset
import getpass
mstr_username = input('username: ')
mstr_password = getpass.getpass('password: ')
base_url = 'https://__your_url_here__/StrategyLibrary/api'
login_mode = 1
project_id = '__your_project_id_here__'
conn = Connection(base_url, mstr_username, mstr_password, project_id=project_id, login_mode=login_mode)
conn.connect()
current_customers = Cube(conn, '__current_customers_cube_id_here__')
current_customers.to_dataframe()
current_customers_df = current_customers.dataframe
historical_outcomes = Cube(conn, '__historical_outcomes_cube_id_here__')
historical_outcomes.to_dataframe()
historical_outcomes_df = historical_outcomes.dataframe
print("mstrio: data successfully loaded")
history = h2o.H2OFrame(historical_outcomes_df)
curr = h2o.H2OFrame(current_customers_df)
all_columns = history.columns
# create a list of column names that should not be used as predictors (IDs, row counts, and the response column)
ignore_columns = ["customerID"]
for i in all_columns:
if i[0:12] == "Row Count - ":
ignore_columns.append(i)
print("Ignore Fields: " + str(ignore_columns))
# define the response (target) field
response = "Churn"
print(history.describe())
# define model inputs, split into training and validation sets, train & validate the model
runtime = int(input("How long should we allow the model to build? (in seconds) "))
# define the predictors (include factors)
predictors = set(all_columns).difference(ignore_columns)
predictors = list(predictors)
for i in predictors:
if i == response:
predictors.remove(i)
# define training and validation splits
history[response] = history[response].asfactor() # for binary classification, response should be cast as a factor
train, valid = history.split_frame(ratios=[.8], seed=1234)
# build the model
# http://docs.h2o.ai/h2o/latest-stable/h2o-docs/automl.html
m = H2OAutoML(max_runtime_secs=runtime, max_models=40, seed=5678)
start_time = time.time()
m.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
finish_time = time.time()
print("model build time (seconds): " + str(int(finish_time - start_time)))
# validate model accuracy using the leaderboard
lb = m.leaderboard.as_data_frame()
lb.sort_values(by="auc", ascending=True)
use_models = lb["model_id"].tolist()
use_performance = lb["auc"].tolist()
# iterate thru the model names for readability
for index,item in enumerate(use_models):
use_models[index] = use_models[index][:use_models[index].replace("_", " ", 1).find("_")] # trim the model name starting with the second underscore
plt.barh(use_models, use_performance)
plt.xlim(min(use_performance)-.01, max(use_performance)+.002)
plt.title("Model Accuracy")
plt.xlabel("Area Under Curve")
plt.ylabel("Model")
display(HTML("<a href='http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science.html#supervised'>Model Descriptions</a>"))
plt.show()
m_performance = m.leader.model_performance()
m_performance.plot(type="roc")
print(m.leader.confusion_matrix())
# Score current customers and prepare the dataframe for writeback into Strategy
# score current current customers for churn using the highest performing model predict = m.leader.predict(curr) # bind the prediction to the original dataset and convert to a dataframe curr = curr.cbind(predict) current_customers_df = pd.DataFrame(curr.as_data_frame(), columns=curr.names) all_columns = current_customers_df.columns for i in all_columns: if i[0:12] == "Row Count - ": del current_customers_df[i] print(current_customers_df.loc[:10,["customerID","predict","No","Yes"]])
## update the code block below with your connection info and cube IDs, or simply replace the entire block using the mstrio jupyter plugin
dataset = Dataset(connection=conn, name="Scored Customers") dataset.add_table(name="Scored Customers", data_frame=current_customers_df, update_policy="add") dataset.create(folder_id="__your_folder_id_here__")
# shutdown the h2o application server
h2o.cluster().shutdown()
h2o jupyter matplotlib mstrio-py numpy pandas pandas-profiling
jupyter notebook --ip 0.0.0.0 --port 9999 --allow-root --no-browser
jupyter notebook