Spaces:

opensyndrome
/

data-browser

Sleeping

App Files Files Community

anapaulagomes commited on Mar 10

Commit

5f1fdca

verified ·

1 Parent(s): 3aad961

Sync from GitHub via hub-sync

Browse files

Files changed (3) hide show

filter_engine.py +98 -76
fake_dataset.csv → toy_dataset.csv +0 -0
uv.lock +0 -0

filter_engine.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import marimo
-__generated_with = "0.20.2"
 app = marimo.App(
     width="medium",
     app_title="Open Syndrome Definition - Data Browser",
@@ -18,7 +18,7 @@ def _():
     import yaml
     from opensyndrome.filter import OSDEngine, load_profile
-    from opensyndrome.artifacts import get_schema_filepath, download_definitions, get_definition_dir
     return (
         OSDEngine,
@@ -35,13 +35,19 @@ def _():
 @app.cell
 def _(go, pl):
-    def plot_cases(_df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"):
         _definitions_columns_sum = [
             pl.col(definition).sum().alias(definition) for definition in definitions
         ]
         _agg_df = (
             _df_filtered.with_columns(
-                pl.col(date_column).str.to_datetime(format=date_format, strict=False).cast(pl.Date).dt.truncate("1mo").alias("_month")
             )
             .group_by("_month")
             .agg(_definitions_columns_sum)
@@ -94,9 +100,7 @@ def _(go, pl):
 @app.cell
 def _(mo):
-    mo.md(r"""
-    # Open Syndrome Definition 👩🏽‍🔬
-    """)
     return
@@ -115,8 +119,8 @@ def _(mo):
 @app.cell
 def _(Path):
     EXAMPLE_DATASETS = {
-        "Fake dataset - generated with ChatGPT": {
-            "csv": Path("fake_dataset.csv"),
             "mapping": Path("mapping.yaml"),
             "date_column": "recording_ts",
         },
@@ -160,11 +164,7 @@ def _(EXAMPLE_DATASETS, data_source, example_picker, pl, sample_file):
             else None
         )
     else:
-        df_selected = (
-            pl.read_csv(sample_file.contents())
-            if sample_file.value
-            else None
-        )
     return (df_selected,)
@@ -223,14 +223,18 @@ def _(df_selected, initial_date_column, initial_yaml, mo):
     mo.vstack(
         [
-            mo.md("### **Mapping your data to the format**"),
             mo.md(
                 "Edit the YAML below to map your dataset columns to OSD concepts, "
                 "then click **Submit**. "
                 "Select the date column separately for the time-series view.\n\n"
                 f"Your dataset columns: `{_cols_hint}`"
             ),
-            mo.hstack([yaml_editor, mo.vstack([date_column_picker, date_format_input])], widths=[3, 1], align="start"),
         ]
     )
     return date_column_picker, date_format_input, yaml_editor
@@ -246,7 +250,10 @@ def _(df_selected, load_profile, mo, yaml, yaml_editor):
         mo.stop(True, mo.callout(mo.md(f"**Invalid YAML:** {_e}"), kind="danger"))
     if not _parsed["profiles"][0]["columns"]:
-        mo.stop(True, mo.callout(mo.md(f"You need to map **at least one column**"), kind="danger"))
     not_found = []
     for declared_column in _parsed["profiles"][0]["columns"]:
@@ -254,7 +261,12 @@ def _(df_selected, load_profile, mo, yaml, yaml_editor):
             not_found.append(declared_column)
     if not_found:
-        mo.stop(True, mo.callout(mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"))
     try:
         _profile_name = _parsed["profiles"][0]["name"]
@@ -282,15 +294,6 @@ def _(get_definition_dir):
     return (definition_options,)
-@app.cell
-def _(definition_options, json):
-    def load_definition(name: str) -> dict:
-        letter_dir = name[0].lower()
-        return json.loads(definition_options[name].read_text())
-    return (load_definition,)
 @app.cell
 def _(definition_options, mo):
     definitions_dropdown = mo.ui.multiselect(
@@ -299,6 +302,30 @@ def _(definition_options, mo):
     return (definitions_dropdown,)
 @app.cell
 def _(definitions_dropdown, mo):
     mo.hstack([mo.md("**::lucide:filter:: Filters:**"), definitions_dropdown])
@@ -316,9 +343,7 @@ def _(
     profile,
 ):
     mo.stop(
-        df_selected is None
-        or df_selected.is_empty()
-        or not definitions_dropdown.value
     )
     definitions = definitions_dropdown.value
@@ -327,8 +352,7 @@ def _(
     engine = OSDEngine(profile, skip_unresolvable=True)
     defs_dict = {
-        name: json.loads(definition_options[name].read_text())
-        for name in definitions
     }
     df_filtered = engine.label(df_selected, defs_dict)
     return definitions, df_filtered
@@ -340,55 +364,31 @@ def _(definitions, df_filtered, df_selected, mo):
     _cards = [
         mo.stat(
-            label="Rows & columns",
             value=df_selected.shape[0],
         ),
         mo.stat(
             label="Columns",
             value=df_selected.shape[1],
         ),
-        mo.stat(
-            label="Syndromic Indicators",
-            value=len(definitions),
-            caption=", ".join([definition for definition in definitions]),
-            bordered=True,
-        ),
     ]
-    _title = "## Data with Open Syndrome Definitions"
-    mo.vstack(
-        [
-            mo.md(_title),
-            mo.hstack(_cards, widths="equal", align="center"),
-        ]
-    )
     return
 @app.cell
-def _(definitions, load_definition, mo):
-    mo.stop(definitions is None)
-    mo.vstack(
-        [
-            mo.md("### Definitions details"),
-            mo.md(
-                "This section shows the definitions used to filter the data. You can use them to understand how the data was filtered and what criteria were applied. 🔎"
-            ),
-            mo.ui.tabs(
-                {
-                    "JSONs": mo.accordion(
-                        {
-                            definition: mo.json(load_definition(definition))
-                            for definition in definitions
-                        }
-                    ),
-                },
-            ),
-        ]
-    )
-    return
 @app.cell
@@ -414,7 +414,7 @@ def _(
     if code_column:
         diagnosis_chart = [
-            mo.md("## Codes comparison per syndromic indicator"),
             top_n.left(),
             groupped_bar(
                 df_filtered,
@@ -426,24 +426,46 @@ def _(
     else:
         diagnosis_chart = []
     mo.vstack(
         [
-            mo.md("## Time series"),
-            plot_cases(df_filtered, definitions, date_column=date_column, date_format=date_format_input.value),
-            *diagnosis_chart
         ]
     )
     return
 @app.cell
-def _(definitions, df_filtered, df_selected, mo):
-    mo.stop(df_selected is None or definitions is None or df_filtered is None)
     mo.vstack(
         [
-            mo.md("### **Data**"),
-            mo.ui.dataframe(df_selected),
         ]
     )
     return

 import marimo
+__generated_with = "0.14.16"
 app = marimo.App(
     width="medium",
     app_title="Open Syndrome Definition - Data Browser",
     import yaml
     from opensyndrome.filter import OSDEngine, load_profile
+    from opensyndrome.artifacts import get_definition_dir
     return (
         OSDEngine,
 @app.cell
 def _(go, pl):
+    def plot_cases(
+        _df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"
+    ):
         _definitions_columns_sum = [
             pl.col(definition).sum().alias(definition) for definition in definitions
         ]
         _agg_df = (
             _df_filtered.with_columns(
+                pl.col(date_column)
+                .str.to_datetime(format=date_format, strict=False)
+                .cast(pl.Date)
+                .dt.truncate("1mo")
+                .alias("_month")
             )
             .group_by("_month")
             .agg(_definitions_columns_sum)
 @app.cell
 def _(mo):
+    mo.md(r"""# Open Syndrome Definition 👩🏽‍🔬""")
     return
 @app.cell
 def _(Path):
     EXAMPLE_DATASETS = {
+        "Toy dataset": {
+            "csv": Path("toy_dataset.csv"),
             "mapping": Path("mapping.yaml"),
             "date_column": "recording_ts",
         },
             else None
         )
     else:
+        df_selected = pl.read_csv(sample_file.contents()) if sample_file.value else None
     return (df_selected,)
     mo.vstack(
         [
+            mo.md("### Mapping your data to the format"),
             mo.md(
                 "Edit the YAML below to map your dataset columns to OSD concepts, "
                 "then click **Submit**. "
                 "Select the date column separately for the time-series view.\n\n"
                 f"Your dataset columns: `{_cols_hint}`"
             ),
+            mo.hstack(
+                [yaml_editor, mo.vstack([date_column_picker, date_format_input])],
+                widths=[3, 1],
+                align="start",
+            ),
         ]
     )
     return date_column_picker, date_format_input, yaml_editor
         mo.stop(True, mo.callout(mo.md(f"**Invalid YAML:** {_e}"), kind="danger"))
     if not _parsed["profiles"][0]["columns"]:
+        mo.stop(
+            True,
+            mo.callout(mo.md("You need to map **at least one column**"), kind="danger"),
+        )
     not_found = []
     for declared_column in _parsed["profiles"][0]["columns"]:
             not_found.append(declared_column)
     if not_found:
+        mo.stop(
+            True,
+            mo.callout(
+                mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"
+            ),
+        )
     try:
         _profile_name = _parsed["profiles"][0]["name"]
     return (definition_options,)
 @app.cell
 def _(definition_options, mo):
     definitions_dropdown = mo.ui.multiselect(
     return (definitions_dropdown,)
+@app.cell
+def _(mo):
+    mo.md(r"""### Data sample""")
+    return
+@app.cell
+def _(df_selected):
+    df_selected.sample(10)
+    return
+@app.cell
+def _(mo):
+    mo.md(r"""---""")
+    return
+@app.cell
+def _(mo):
+    mo.md(r"""## Data & Definitions""")
+    return
 @app.cell
 def _(definitions_dropdown, mo):
     mo.hstack([mo.md("**::lucide:filter:: Filters:**"), definitions_dropdown])
     profile,
 ):
     mo.stop(
+        df_selected is None or df_selected.is_empty() or not definitions_dropdown.value
     )
     definitions = definitions_dropdown.value
     engine = OSDEngine(profile, skip_unresolvable=True)
     defs_dict = {
+        name: json.loads(definition_options[name].read_text()) for name in definitions
     }
     df_filtered = engine.label(df_selected, defs_dict)
     return definitions, df_filtered
     _cards = [
         mo.stat(
+            label="Syndromic Indicators",
+            value=len(definitions),
+            caption=", ".join([definition for definition in definitions]),
+            bordered=True,
+        ),
+        mo.stat(
+            label="Rows",
             value=df_selected.shape[0],
         ),
         mo.stat(
             label="Columns",
             value=df_selected.shape[1],
         ),
     ]
+    mo.hstack(_cards, widths="equal", align="center")
     return
 @app.cell
+def _(definition_options, json):
+    def load_definition(name: str) -> dict:
+        return json.loads(definition_options[name].read_text())
+    return (load_definition,)
 @app.cell
     if code_column:
         diagnosis_chart = [
+            mo.md("### Codes comparison per syndromic indicator"),
             top_n.left(),
             groupped_bar(
                 df_filtered,
     else:
         diagnosis_chart = []
+    timeseries = [
+        mo.md("### Time series"),
+        plot_cases(
+            df_filtered,
+            definitions,
+            date_column=date_column,
+            date_format=date_format_input.value,
+        ),
+    ]
     mo.vstack(
         [
+            mo.md("## Findings from the data 📊"),
+            *timeseries,
+            *diagnosis_chart,
         ]
     )
     return
 @app.cell
+def _(definitions, load_definition, mo):
+    mo.stop(definitions is None)
     mo.vstack(
         [
+            mo.md("### Definitions details"),
+            mo.md(
+                "Here the definitions used to filter the data. See here what criteria were applied. 🔎"
+            ),
+            mo.accordion(
+                {
+                    "JSONs": mo.accordion(
+                        {
+                            definition: mo.json(load_definition(definition))
+                            for definition in definitions
+                        }
+                    ),
+                },
+            ),
         ]
     )
     return

fake_dataset.csv → toy_dataset.csv RENAMED Viewed

File without changes

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff