anapaulagomes commited on
Commit
5f1fdca
Β·
verified Β·
1 Parent(s): 3aad961

Sync from GitHub via hub-sync

Browse files
filter_engine.py CHANGED
@@ -1,6 +1,6 @@
1
  import marimo
2
 
3
- __generated_with = "0.20.2"
4
  app = marimo.App(
5
  width="medium",
6
  app_title="Open Syndrome Definition - Data Browser",
@@ -18,7 +18,7 @@ def _():
18
 
19
  import yaml
20
  from opensyndrome.filter import OSDEngine, load_profile
21
- from opensyndrome.artifacts import get_schema_filepath, download_definitions, get_definition_dir
22
 
23
  return (
24
  OSDEngine,
@@ -35,13 +35,19 @@ def _():
35
 
36
  @app.cell
37
  def _(go, pl):
38
- def plot_cases(_df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"):
 
 
39
  _definitions_columns_sum = [
40
  pl.col(definition).sum().alias(definition) for definition in definitions
41
  ]
42
  _agg_df = (
43
  _df_filtered.with_columns(
44
- pl.col(date_column).str.to_datetime(format=date_format, strict=False).cast(pl.Date).dt.truncate("1mo").alias("_month")
 
 
 
 
45
  )
46
  .group_by("_month")
47
  .agg(_definitions_columns_sum)
@@ -94,9 +100,7 @@ def _(go, pl):
94
 
95
  @app.cell
96
  def _(mo):
97
- mo.md(r"""
98
- # Open Syndrome Definition πŸ‘©πŸ½β€πŸ”¬
99
- """)
100
  return
101
 
102
 
@@ -115,8 +119,8 @@ def _(mo):
115
  @app.cell
116
  def _(Path):
117
  EXAMPLE_DATASETS = {
118
- "Fake dataset - generated with ChatGPT": {
119
- "csv": Path("fake_dataset.csv"),
120
  "mapping": Path("mapping.yaml"),
121
  "date_column": "recording_ts",
122
  },
@@ -160,11 +164,7 @@ def _(EXAMPLE_DATASETS, data_source, example_picker, pl, sample_file):
160
  else None
161
  )
162
  else:
163
- df_selected = (
164
- pl.read_csv(sample_file.contents())
165
- if sample_file.value
166
- else None
167
- )
168
  return (df_selected,)
169
 
170
 
@@ -223,14 +223,18 @@ def _(df_selected, initial_date_column, initial_yaml, mo):
223
 
224
  mo.vstack(
225
  [
226
- mo.md("### **Mapping your data to the format**"),
227
  mo.md(
228
  "Edit the YAML below to map your dataset columns to OSD concepts, "
229
  "then click **Submit**. "
230
  "Select the date column separately for the time-series view.\n\n"
231
  f"Your dataset columns: `{_cols_hint}`"
232
  ),
233
- mo.hstack([yaml_editor, mo.vstack([date_column_picker, date_format_input])], widths=[3, 1], align="start"),
 
 
 
 
234
  ]
235
  )
236
  return date_column_picker, date_format_input, yaml_editor
@@ -246,7 +250,10 @@ def _(df_selected, load_profile, mo, yaml, yaml_editor):
246
  mo.stop(True, mo.callout(mo.md(f"**Invalid YAML:** {_e}"), kind="danger"))
247
 
248
  if not _parsed["profiles"][0]["columns"]:
249
- mo.stop(True, mo.callout(mo.md(f"You need to map **at least one column**"), kind="danger"))
 
 
 
250
 
251
  not_found = []
252
  for declared_column in _parsed["profiles"][0]["columns"]:
@@ -254,7 +261,12 @@ def _(df_selected, load_profile, mo, yaml, yaml_editor):
254
  not_found.append(declared_column)
255
 
256
  if not_found:
257
- mo.stop(True, mo.callout(mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"))
 
 
 
 
 
258
 
259
  try:
260
  _profile_name = _parsed["profiles"][0]["name"]
@@ -282,15 +294,6 @@ def _(get_definition_dir):
282
  return (definition_options,)
283
 
284
 
285
- @app.cell
286
- def _(definition_options, json):
287
- def load_definition(name: str) -> dict:
288
- letter_dir = name[0].lower()
289
- return json.loads(definition_options[name].read_text())
290
-
291
- return (load_definition,)
292
-
293
-
294
  @app.cell
295
  def _(definition_options, mo):
296
  definitions_dropdown = mo.ui.multiselect(
@@ -299,6 +302,30 @@ def _(definition_options, mo):
299
  return (definitions_dropdown,)
300
 
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  @app.cell
303
  def _(definitions_dropdown, mo):
304
  mo.hstack([mo.md("**::lucide:filter:: Filters:**"), definitions_dropdown])
@@ -316,9 +343,7 @@ def _(
316
  profile,
317
  ):
318
  mo.stop(
319
- df_selected is None
320
- or df_selected.is_empty()
321
- or not definitions_dropdown.value
322
  )
323
 
324
  definitions = definitions_dropdown.value
@@ -327,8 +352,7 @@ def _(
327
  engine = OSDEngine(profile, skip_unresolvable=True)
328
 
329
  defs_dict = {
330
- name: json.loads(definition_options[name].read_text())
331
- for name in definitions
332
  }
333
  df_filtered = engine.label(df_selected, defs_dict)
334
  return definitions, df_filtered
@@ -340,55 +364,31 @@ def _(definitions, df_filtered, df_selected, mo):
340
 
341
  _cards = [
342
  mo.stat(
343
- label="Rows & columns",
 
 
 
 
 
 
344
  value=df_selected.shape[0],
345
  ),
346
  mo.stat(
347
  label="Columns",
348
  value=df_selected.shape[1],
349
  ),
350
- mo.stat(
351
- label="Syndromic Indicators",
352
- value=len(definitions),
353
- caption=", ".join([definition for definition in definitions]),
354
- bordered=True,
355
- ),
356
  ]
357
 
358
- _title = "## Data with Open Syndrome Definitions"
359
-
360
- mo.vstack(
361
- [
362
- mo.md(_title),
363
- mo.hstack(_cards, widths="equal", align="center"),
364
- ]
365
- )
366
  return
367
 
368
 
369
  @app.cell
370
- def _(definitions, load_definition, mo):
371
- mo.stop(definitions is None)
 
372
 
373
- mo.vstack(
374
- [
375
- mo.md("### Definitions details"),
376
- mo.md(
377
- "This section shows the definitions used to filter the data. You can use them to understand how the data was filtered and what criteria were applied. πŸ”Ž"
378
- ),
379
- mo.ui.tabs(
380
- {
381
- "JSONs": mo.accordion(
382
- {
383
- definition: mo.json(load_definition(definition))
384
- for definition in definitions
385
- }
386
- ),
387
- },
388
- ),
389
- ]
390
- )
391
- return
392
 
393
 
394
  @app.cell
@@ -414,7 +414,7 @@ def _(
414
 
415
  if code_column:
416
  diagnosis_chart = [
417
- mo.md("## Codes comparison per syndromic indicator"),
418
  top_n.left(),
419
  groupped_bar(
420
  df_filtered,
@@ -426,24 +426,46 @@ def _(
426
  else:
427
  diagnosis_chart = []
428
 
 
 
 
 
 
 
 
 
 
 
429
  mo.vstack(
430
  [
431
- mo.md("## Time series"),
432
- plot_cases(df_filtered, definitions, date_column=date_column, date_format=date_format_input.value),
433
- *diagnosis_chart
434
  ]
435
  )
436
  return
437
 
438
 
439
  @app.cell
440
- def _(definitions, df_filtered, df_selected, mo):
441
- mo.stop(df_selected is None or definitions is None or df_filtered is None)
442
 
443
  mo.vstack(
444
  [
445
- mo.md("### **Data**"),
446
- mo.ui.dataframe(df_selected),
 
 
 
 
 
 
 
 
 
 
 
 
447
  ]
448
  )
449
  return
 
1
  import marimo
2
 
3
+ __generated_with = "0.14.16"
4
  app = marimo.App(
5
  width="medium",
6
  app_title="Open Syndrome Definition - Data Browser",
 
18
 
19
  import yaml
20
  from opensyndrome.filter import OSDEngine, load_profile
21
+ from opensyndrome.artifacts import get_definition_dir
22
 
23
  return (
24
  OSDEngine,
 
35
 
36
  @app.cell
37
  def _(go, pl):
38
+ def plot_cases(
39
+ _df_filtered, definitions, date_column="date", date_format="%Y-%m-%d %H:%M:%S"
40
+ ):
41
  _definitions_columns_sum = [
42
  pl.col(definition).sum().alias(definition) for definition in definitions
43
  ]
44
  _agg_df = (
45
  _df_filtered.with_columns(
46
+ pl.col(date_column)
47
+ .str.to_datetime(format=date_format, strict=False)
48
+ .cast(pl.Date)
49
+ .dt.truncate("1mo")
50
+ .alias("_month")
51
  )
52
  .group_by("_month")
53
  .agg(_definitions_columns_sum)
 
100
 
101
  @app.cell
102
  def _(mo):
103
+ mo.md(r"""# Open Syndrome Definition πŸ‘©πŸ½β€πŸ”¬""")
 
 
104
  return
105
 
106
 
 
119
  @app.cell
120
  def _(Path):
121
  EXAMPLE_DATASETS = {
122
+ "Toy dataset": {
123
+ "csv": Path("toy_dataset.csv"),
124
  "mapping": Path("mapping.yaml"),
125
  "date_column": "recording_ts",
126
  },
 
164
  else None
165
  )
166
  else:
167
+ df_selected = pl.read_csv(sample_file.contents()) if sample_file.value else None
 
 
 
 
168
  return (df_selected,)
169
 
170
 
 
223
 
224
  mo.vstack(
225
  [
226
+ mo.md("### Mapping your data to the format"),
227
  mo.md(
228
  "Edit the YAML below to map your dataset columns to OSD concepts, "
229
  "then click **Submit**. "
230
  "Select the date column separately for the time-series view.\n\n"
231
  f"Your dataset columns: `{_cols_hint}`"
232
  ),
233
+ mo.hstack(
234
+ [yaml_editor, mo.vstack([date_column_picker, date_format_input])],
235
+ widths=[3, 1],
236
+ align="start",
237
+ ),
238
  ]
239
  )
240
  return date_column_picker, date_format_input, yaml_editor
 
250
  mo.stop(True, mo.callout(mo.md(f"**Invalid YAML:** {_e}"), kind="danger"))
251
 
252
  if not _parsed["profiles"][0]["columns"]:
253
+ mo.stop(
254
+ True,
255
+ mo.callout(mo.md("You need to map **at least one column**"), kind="danger"),
256
+ )
257
 
258
  not_found = []
259
  for declared_column in _parsed["profiles"][0]["columns"]:
 
261
  not_found.append(declared_column)
262
 
263
  if not_found:
264
+ mo.stop(
265
+ True,
266
+ mo.callout(
267
+ mo.md(f"**Columns not found:** {', '.join(not_found)}"), kind="danger"
268
+ ),
269
+ )
270
 
271
  try:
272
  _profile_name = _parsed["profiles"][0]["name"]
 
294
  return (definition_options,)
295
 
296
 
 
 
 
 
 
 
 
 
 
297
  @app.cell
298
  def _(definition_options, mo):
299
  definitions_dropdown = mo.ui.multiselect(
 
302
  return (definitions_dropdown,)
303
 
304
 
305
+ @app.cell
306
+ def _(mo):
307
+ mo.md(r"""### Data sample""")
308
+ return
309
+
310
+
311
+ @app.cell
312
+ def _(df_selected):
313
+ df_selected.sample(10)
314
+ return
315
+
316
+
317
+ @app.cell
318
+ def _(mo):
319
+ mo.md(r"""---""")
320
+ return
321
+
322
+
323
+ @app.cell
324
+ def _(mo):
325
+ mo.md(r"""## Data & Definitions""")
326
+ return
327
+
328
+
329
  @app.cell
330
  def _(definitions_dropdown, mo):
331
  mo.hstack([mo.md("**::lucide:filter:: Filters:**"), definitions_dropdown])
 
343
  profile,
344
  ):
345
  mo.stop(
346
+ df_selected is None or df_selected.is_empty() or not definitions_dropdown.value
 
 
347
  )
348
 
349
  definitions = definitions_dropdown.value
 
352
  engine = OSDEngine(profile, skip_unresolvable=True)
353
 
354
  defs_dict = {
355
+ name: json.loads(definition_options[name].read_text()) for name in definitions
 
356
  }
357
  df_filtered = engine.label(df_selected, defs_dict)
358
  return definitions, df_filtered
 
364
 
365
  _cards = [
366
  mo.stat(
367
+ label="Syndromic Indicators",
368
+ value=len(definitions),
369
+ caption=", ".join([definition for definition in definitions]),
370
+ bordered=True,
371
+ ),
372
+ mo.stat(
373
+ label="Rows",
374
  value=df_selected.shape[0],
375
  ),
376
  mo.stat(
377
  label="Columns",
378
  value=df_selected.shape[1],
379
  ),
 
 
 
 
 
 
380
  ]
381
 
382
+ mo.hstack(_cards, widths="equal", align="center")
 
 
 
 
 
 
 
383
  return
384
 
385
 
386
  @app.cell
387
+ def _(definition_options, json):
388
+ def load_definition(name: str) -> dict:
389
+ return json.loads(definition_options[name].read_text())
390
 
391
+ return (load_definition,)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
 
394
  @app.cell
 
414
 
415
  if code_column:
416
  diagnosis_chart = [
417
+ mo.md("### Codes comparison per syndromic indicator"),
418
  top_n.left(),
419
  groupped_bar(
420
  df_filtered,
 
426
  else:
427
  diagnosis_chart = []
428
 
429
+ timeseries = [
430
+ mo.md("### Time series"),
431
+ plot_cases(
432
+ df_filtered,
433
+ definitions,
434
+ date_column=date_column,
435
+ date_format=date_format_input.value,
436
+ ),
437
+ ]
438
+
439
  mo.vstack(
440
  [
441
+ mo.md("## Findings from the data πŸ“Š"),
442
+ *timeseries,
443
+ *diagnosis_chart,
444
  ]
445
  )
446
  return
447
 
448
 
449
  @app.cell
450
+ def _(definitions, load_definition, mo):
451
+ mo.stop(definitions is None)
452
 
453
  mo.vstack(
454
  [
455
+ mo.md("### Definitions details"),
456
+ mo.md(
457
+ "Here the definitions used to filter the data. See here what criteria were applied. πŸ”Ž"
458
+ ),
459
+ mo.accordion(
460
+ {
461
+ "JSONs": mo.accordion(
462
+ {
463
+ definition: mo.json(load_definition(definition))
464
+ for definition in definitions
465
+ }
466
+ ),
467
+ },
468
+ ),
469
  ]
470
  )
471
  return
fake_dataset.csv β†’ toy_dataset.csv RENAMED
File without changes
uv.lock CHANGED
The diff for this file is too large to render. See raw diff