customers_who_might_be_interested_in_investments
select
customer_id
from
hive_metastore.odap_features.customer
where
transactions_sum_amount_in_last_30d >= 50000 and
investice_web_visits_count_in_last_90d > 0 and
customers_who_havent_paid_their_loans_for_3_months
select
customer_id
from
hive_metastore.odap_features.customer as c
join
hive_metastore.sdm.loans as l
where
c.gender == "M" and
l.last_payment_date < (current_date() - INTERVAL 3 MONTH)
customers_with_high_ptb_mortgage
import mlflow
from pyspark.sql.functions import struct, col
logged_model = "runs:/7eab177aa30543db8daf6e22e9dede85/ptb_mortgage"
# Load model as a Spark UDF. Override result_type if the model does not return double values.
loaded_model = mlflow.pyfunc.spark_udf(
spark, model_uri=logged_model, result_type="double"
)
# Predict ptb_mortgage, filter for high values, select customer_ids
df_final = (
df.withColumn("ptb_mortgage", loaded_model(struct(*map(col, df.columns))))
.filter(col("ptb_mortgage") > 0.7)
.select("customer_id")
)