Tabular Classification
Scikit-learn
English
hierarchical
healthcare
ehr
copd
clinical-risk
tabular
scikit-learn
clustering
unsupervised
Instructions to use stormid/copd-model-e with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use stormid/copd-model-e with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("stormid/copd-model-e", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| # Import libraries | |
| import pandas as pd | |
| # Set file paths | |
| file_path = '<YOUR_DATA_PATH>/' | |
| input_file_path = file_path + 'data_for_model_e_columns/' | |
| def read_data(file): | |
| """ | |
| Read in data source | |
| -------- | |
| :param file: string filename | |
| :return: dataframe | |
| """ | |
| df = pd.read_csv(file) | |
| return df | |
| def format_data(data, IDs, onboard): | |
| """ | |
| Convert datetime columns to datetime format, filter to only include RECEIVER and scale up IDs, | |
| and join oboarding dates | |
| -------- | |
| :param data: NIV dataframe | |
| :param IDs: dataframe containing Study IDs | |
| :param onboard: dataframe containing onboarding dates | |
| :return: formatted dataframe | |
| """ | |
| data = data[['Study_ID', 'ie_ratio_value_50', 'ie_ratio_value_95', | |
| 'ie_ratio_maximum_value', 'resp_events_AHI', | |
| 'resp_events_HI', 'Stop_time', 'Start_time']] | |
| data['Stop_time'] = pd.to_datetime(data['Stop_time']) | |
| onboard['OB_date'] = pd.to_datetime(onboard['OB_date']) | |
| onboard['yearcensor'] = onboard['OB_date'] + pd.offsets.DateOffset(days=365) | |
| data = pd.merge(IDs, data, on="Study_ID", how="left") | |
| data = pd.merge(data, onboard, on="Study_ID", how="left") | |
| return data | |
| def filter_study_censor(data): | |
| """ | |
| Filter the dataframe to only contain data obtained before the study censor date | |
| -------- | |
| :param data: dataframe | |
| :return: dataframe containing data obtained before the study censor date | |
| """ | |
| return data[data['Stop_time'] < '2021-09-01'] | |
| def filter_first_year(data): | |
| """ | |
| Filter the dataframe to only contain data obtained in the first year post-onboarding | |
| -------- | |
| :param data: dataframe | |
| :return: dataframe containing only data obtained in the first year post-onboarding | |
| """ | |
| return data[data['yearcensor'] >= data['Stop_time']] | |
| def mean_max_summary(data, col): | |
| """ | |
| Create a dataframe showing mean and max values per group | |
| -------- | |
| :param data: dataframe | |
| :param col: parameter to group on | |
| :return: summary dataframe showing mean and max scores for each study ID | |
| """ | |
| summary_metrics = ['mean', 'max', 'count'] | |
| return data.groupby(col).agg( | |
| {'ie_ratio_value_50': summary_metrics, | |
| 'ie_ratio_value_95': summary_metrics, | |
| 'ie_ratio_maximum_value': summary_metrics, | |
| 'resp_events_AHI': summary_metrics, | |
| 'resp_events_HI': summary_metrics}) | |
| def calculate_summary_data(data): | |
| """ | |
| Calculate the average NIV parameters up to the study censor date and a year | |
| after onboarding for each study ID and save the resulting summary | |
| dataframe as a csv file | |
| -------- | |
| :param data: dataframe | |
| :param typ: string value to be input into file name showing what is summarised | |
| """ | |
| data_filter_censor = filter_study_censor(data) | |
| summary_censor = mean_max_summary(data_filter_censor, 'Study_ID') | |
| data_year_censor = filter_first_year(data) | |
| summary_year = mean_max_summary(data_year_censor, 'Study_ID') | |
| output_file_path = file_path + 'NIV_ Average_parameters_to_' | |
| summary_censor.to_csv(output_file_path + 'censor.csv') | |
| summary_year.to_csv(output_file_path + 'year.csv') | |
| def main(): | |
| # Read data | |
| NIV_data_file = input_file_path + "NIV_data_wrangled.csv" | |
| onboard_file = input_file_path + "onboarding_dates.csv" | |
| RC_SU1_IDs_file = input_file_path + "RC_SU1_IDs.csv" | |
| NIV_data = read_data(NIV_data_file) | |
| onboard = read_data(onboard_file) | |
| RC_SU1_IDs = read_data(RC_SU1_IDs_file) | |
| # Format data | |
| NIV_data = format_data(NIV_data, RC_SU1_IDs, onboard) | |
| # Calculate and save summary NIV data to year and study censor dates for each ID | |
| calculate_summary_data(NIV_data) | |
| main() |