| | from graph_visualizations import * |
| | from text_visualization import WordCloudExtractor |
| | import logging |
| | from functools import partial |
| | import gradio as gr |
| |
|
| |
|
| | def display_representations(repos_df, repo, representation1, representation2): |
| | repo_data = repos_df[repos_df["repo_name"] == repo] |
| | tasks = repo_data["tasks"].iloc[0] |
| | logging.info(f"repo_data: {repo_data}") |
| | text1 = ( |
| | repo_data[repo_data["representation"] == representation1]["text"].iloc[0] |
| | if not repo_data[repo_data["representation"] == representation1].empty |
| | else "No data available" |
| | ) |
| | text2 = ( |
| | repo_data[repo_data["representation"] == representation2]["text"].iloc[0] |
| | if not repo_data[repo_data["representation"] == representation2].empty |
| | else "No data available" |
| | ) |
| |
|
| | return tasks, text1, text2 |
| |
|
| |
|
| | def get_representation_wordclouds(representations, repos_df): |
| | wordclouds = dict() |
| | for representation in representations: |
| | texts = list(repos_df[repos_df["representation"] == representation]["text"]) |
| | wordclouds[representation] = WordCloudExtractor().extract_wordcloud_image(texts) |
| | return wordclouds |
| |
|
| |
|
| | def load_embeddings_description(): |
| | return |
| |
|
| |
|
| | def display_wordclouds(representation_types, repos_df): |
| | wordcloud_dict = get_representation_wordclouds(representation_types, repos_df) |
| | gr.Markdown("# Wordclouds") |
| | gr.Gallery( |
| | [ |
| | (wordcloud, representation_type) |
| | for representation_type, wordcloud in wordcloud_dict.items() |
| | ], |
| | columns=[3], |
| | rows=[4], |
| | height=300, |
| | ) |
| |
|
| |
|
| | def setup_repository_representations_tab(repos_df, repos, representation_types): |
| |
|
| | gr.Markdown("# Comparing repository representations") |
| | gr.Markdown("Select a repository and two representation types to compare them.") |
| | with gr.Row(): |
| | repo = gr.Dropdown(choices=repos, label="Repository", value=repos[0]) |
| | representation1 = gr.Dropdown( |
| | choices=representation_types, label="Representation 1", value="readme" |
| | ) |
| | representation2 = gr.Dropdown( |
| | choices=representation_types, |
| | label="Representation 2", |
| | value="code2doc_generated_readme", |
| | ) |
| |
|
| | displayed_tasks = gr.Markdown(elem_id="tasks") |
| | with gr.Row(): |
| | with gr.Column( |
| | elem_id="column1", |
| | variant="panel", |
| | scale=1, |
| | min_width=300, |
| | ): |
| | text1 = gr.Markdown() |
| | with gr.Column( |
| | elem_id="column2", |
| | variant="panel", |
| | scale=1, |
| | min_width=300, |
| | ): |
| | text2 = gr.Markdown() |
| |
|
| | def update_representations(repo, representation1, representation2): |
| | tasks, text1_content, text2_content = display_representations( |
| | repos_df, repo, representation1, representation2 |
| | ) |
| | tasks_display = tasks if tasks is not None else "No tasks available" |
| | return ( |
| | "## Repository PapersWithCode tasks:\n" + " ".join(tasks_display), |
| | f"### Representation 1: {representation1}\n\n{text1_content}", |
| | f"### Representation 2: {representation2}\n\n{text2_content}", |
| | ) |
| |
|
| | |
| | displayed_tasks.value, text1.value, text2.value = update_representations( |
| | repos[0], "readme", "code2doc_generated_readme" |
| | ) |
| |
|
| | for component in [repo, representation1, representation2]: |
| | component.change( |
| | fn=update_representations, |
| | inputs=[repo, representation1, representation2], |
| | outputs=[displayed_tasks, text1, text2], |
| | ) |
| |
|
| | display_wordclouds(representation_types, repos_df) |
| |
|
| |
|
| | def setup_tasks_tab(descriptions, task_visualizations): |
| |
|
| | gr.Markdown(descriptions["task_counts_description"]) |
| |
|
| | with gr.Row(): |
| | min_task_counts_slider_all = gr.Slider( |
| | minimum=50, |
| | maximum=1000, |
| | value=150, |
| | step=50, |
| | label="Minimum Task Count (All Repositories)", |
| | ) |
| | update_button = gr.Button("Update Plots") |
| | min_task_counts_slider_selected = gr.Slider( |
| | minimum=10, |
| | maximum=100, |
| | value=50, |
| | step=10, |
| | label="Minimum Task Count (Selected Repositories)", |
| | ) |
| | update_selected_button = gr.Button("Update Plots") |
| |
|
| | with gr.Row("Task Counts"): |
| | all_repos_tasks_plot = gr.Plot(label="All Repositories") |
| | selected_repos_tasks_plot = gr.Plot(label="Selected Repositories") |
| |
|
| | update_button.click( |
| | fn=partial(task_visualizations.get_tasks_sunburst, which_df="all"), |
| | inputs=[min_task_counts_slider_all], |
| | outputs=[all_repos_tasks_plot], |
| | ) |
| |
|
| | update_selected_button.click( |
| | fn=partial(task_visualizations.get_tasks_sunburst, which_df="selected"), |
| | inputs=[min_task_counts_slider_selected], |
| | outputs=[selected_repos_tasks_plot], |
| | ) |
| |
|
| |
|
| | def setup_embeddings_tab(descriptions, embedding_visualizer): |
| | tab_elems = [ |
| | gr.Markdown("## Tasks by area"), |
| | gr.Markdown(descriptions["intro"]), |
| | gr.Plot(embedding_visualizer.make_task_area_scatterplot()), |
| | ] |
| |
|
| | embedding_plots = embedding_visualizer.make_embedding_plots( |
| | color_col="representation" |
| | ) |
| | for plot_name in [ |
| | "Basic representations", |
| | "Dependency graph based representations", |
| | "READMEs", |
| | ]: |
| | tab_elems.append(gr.Markdown(f"## {plot_name}")) |
| | if descriptions.get(plot_name): |
| | tab_elems.append(gr.Markdown(descriptions[plot_name])) |
| | tab_elems.append(gr.Plot(embedding_plots[plot_name])) |
| | gr.Column(tab_elems) |
| |
|
| |
|
| | def setup_graph_tab(): |
| | gr.Markdown("# Dependency Graph Visualization") |
| | gr.Markdown("Select a repository to visualize its dependency graph.") |
| | graphs_dict = init_graphs() |
| | repo_names = list(graphs_dict.keys()) |
| |
|
| | def plot_selected_repo(repo_name, layout_type, *edge_type_checkboxes): |
| | |
| | edge_types = ( |
| | get_available_edge_types(graphs_dict[repo_name]) |
| | if repo_name in graphs_dict |
| | else [] |
| | ) |
| | selected_edge_types = set() |
| | for i, is_selected in enumerate(edge_type_checkboxes): |
| | if is_selected and i < len(edge_types): |
| | selected_edge_types.add(edge_types[i]) |
| |
|
| | fig, stats = visualize_graph( |
| | repo_name, graphs_dict, layout_type, selected_edge_types |
| | ) |
| | return fig, stats |
| |
|
| | def update_edge_checkboxes(repo_name): |
| | """Update edge type checkboxes when repository changes""" |
| | if repo_name not in graphs_dict: |
| | return [gr.Checkbox(visible=False)] * 8 |
| |
|
| | edge_types = get_available_edge_types(graphs_dict[repo_name]) |
| | checkboxes = [] |
| |
|
| | |
| | for i in range(8): |
| | if i < len(edge_types): |
| | edge_type = edge_types[i] |
| | |
| | default_value = edge_type != "function-function" |
| | checkboxes.append( |
| | gr.Checkbox(label=edge_type, value=default_value, visible=True) |
| | ) |
| | else: |
| | checkboxes.append(gr.Checkbox(visible=False)) |
| |
|
| | return checkboxes |
| |
|
| | |
| | initial_edge_types = [] |
| | if repo_names: |
| | initial_edge_types = get_available_edge_types(graphs_dict[repo_names[0]]) |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | repo_dropdown = gr.Dropdown( |
| | choices=repo_names, |
| | label="Select Repository", |
| | value=repo_names[0] if repo_names else None, |
| | ) |
| |
|
| | layout_dropdown = gr.Dropdown( |
| | choices=[ |
| | ("Spring Layout (Force-directed)", "spring"), |
| | ("Circular Layout", "circular"), |
| | ("Kamada-Kawai Layout", "kamada_kawai"), |
| | ("Fruchterman-Reingold Layout", "fruchterman_reingold"), |
| | ("Shell Layout", "shell"), |
| | ("Spectral Layout", "spectral"), |
| | ("Planar Layout", "planar"), |
| | ], |
| | label="Select Layout", |
| | value="spring", |
| | ) |
| |
|
| | gr.Markdown("### Edge Type Filters") |
| | gr.Markdown("Select which edge types to display:") |
| |
|
| | |
| | edge_checkboxes = [] |
| | for i in range(8): |
| | if i < len(initial_edge_types): |
| | checkbox = gr.Checkbox( |
| | label=initial_edge_types[i], value=True, visible=True |
| | ) |
| | else: |
| | checkbox = gr.Checkbox(label=f"Edge Type {i+1}", visible=False) |
| | edge_checkboxes.append(checkbox) |
| |
|
| | visualize_btn = gr.Button("Visualize Graph", variant="primary") |
| |
|
| | stats_text = gr.Textbox( |
| | label="Graph Statistics", lines=6, interactive=False |
| | ) |
| |
|
| | with gr.Column(scale=2): |
| | graph_plot = gr.Plot(label="Interactive Dependency Graph") |
| |
|
| | |
| | all_inputs = [repo_dropdown, layout_dropdown] + edge_checkboxes |
| |
|
| | visualize_btn.click( |
| | fn=plot_selected_repo, |
| | inputs=all_inputs, |
| | outputs=[graph_plot, stats_text], |
| | ) |
| |
|
| | |
| | repo_dropdown.change( |
| | fn=update_edge_checkboxes, |
| | inputs=[repo_dropdown], |
| | outputs=edge_checkboxes, |
| | ) |
| |
|
| | |
| | repo_dropdown.change( |
| | fn=plot_selected_repo, |
| | inputs=all_inputs, |
| | outputs=[graph_plot, stats_text], |
| | ) |
| |
|
| | |
| | layout_dropdown.change( |
| | fn=plot_selected_repo, |
| | inputs=all_inputs, |
| | outputs=[graph_plot, stats_text], |
| | ) |
| |
|
| | |
| | for checkbox in edge_checkboxes: |
| | checkbox.change( |
| | fn=plot_selected_repo, |
| | inputs=all_inputs, |
| | outputs=[graph_plot, stats_text], |
| | ) |
| |
|