{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from itertools import islice\n", "from pathlib import Path\n", "from sys import argv\n", "from textdistance import EntropyNCD\n", "\n", "# read files\n", "licenses = dict()\n", "for path in Path('choosealicense.com', '_licenses').iterdir():\n", " licenses[path.stem] = path.read_text()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "distances = []\n", "for name1, content1 in licenses.items():\n", " for name2, content2 in licenses.items():\n", " distances.append((name1, name2, EntropyNCD(qval=None)(content1, content2)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import plotnine as gg\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(distances, columns=['name1', 'name2', 'distance'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "(\n", " gg.ggplot(df)\n", " + gg.geom_tile(gg.aes(x='name1', y='name2', fill='distance'))\n", " + gg.scale_fill_continuous(palette=lambda *args: gg.scale_fill_continuous().palette(*args)[::-1])\n", " + gg.theme(\n", " figure_size=(12, 8), \n", " axis_text_x=gg.element_text(angle=90),\n", " )\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }
Generated by dwww version 1.15 on Wed Jun 26 06:28:11 CEST 2024.