{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook demonstrates the Dirichlet distribution. Plotting code below adapted from http://blog.bogatron.net/blog/2014/02/02/visualizing-dirichlet-distributions/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import matplotlib\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import matplotlib.tri as tri\n", "from functools import reduce\n", "\n", "corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])\n", "triangle = tri.Triangulation(corners[:, 0], corners[:, 1])\n", "# Mid-points of triangle sides opposite of each corner\n", "midpoints = [(corners[(i + 1) % 3] + corners[(i + 2) % 3]) / 2.0 \\\n", " for i in range(3)]\n", "def xy2bc(xy, tol=1.e-3):\n", " '''Converts 2D Cartesian coordinates to barycentric.'''\n", " s = [(corners[i] - midpoints[i]).dot(xy - midpoints[i]) / 0.75 \\\n", " for i in range(3)]\n", " return np.clip(s, tol, 1.0 - tol)\n", "\n", "class Dirichlet(object):\n", " def __init__(self, alpha):\n", " from math import gamma\n", " from operator import mul\n", " self._alpha = np.array(alpha)\n", " self._coef = gamma(np.sum(self._alpha)) / \\\n", " reduce(mul, [gamma(a) for a in self._alpha])\n", " def pdf(self, x):\n", " '''Returns pdf value for `x`.'''\n", " from operator import mul\n", " return self._coef * reduce(mul, [xx ** (aa - 1)\n", " for (xx, aa)in zip(x, self._alpha)])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "mystery1 = (1,3,0.5)\n", "mystery2 = (1,10,1)\n", "mystery3 = (1,0.2,0.2)\n", "mystery1 = (10,10,10)\n", "mystery2 = (0.1,0.1,0.1)\n", "mystery3 = (1,1,1)\n", "mystery4 = (5,5,0.1)\n", "mystery5 = (5,0.1,0.1)\n", "mystery6 = (5,5,1)\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def draw_pdf_contours(dist, nlevels=200, subdiv=8, **kwargs):\n", " import math\n", "\n", " refiner = tri.UniformTriRefiner(triangle)\n", " trimesh = refiner.refine_triangulation(subdiv=subdiv)\n", " pvals = [dist.pdf(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)]\n", " '''mylevels = np.arange(0.0, 20.2, 0.2)'''\n", " '''CS = plt.tricontourf(trimesh, pvals, levels=mylevels, extend='both', **kwargs)'''\n", " myplot = plt.tricontourf(trimesh, pvals, nlevels, extend='both', **kwargs)\n", " \n", " plt.axis('equal')\n", " plt.xlim(0, 1)\n", " plt.ylim(0, 0.75**0.5)\n", " plt.axis('off')\n", " CB = plt.colorbar(myplot, extend='both')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mylevels10 = np.arange(0, 10, 0.2)\n", "mylevels20 = np.arange(0, 20, 0.2)\n", "mylevels50 = np.arange(0, 50, 0.2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([1, 1, 1]), mylevels20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([3, 3, 3]), mylevels10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([8, 8, 8]), mylevels10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([0.2, 0.2, 0.2]), mylevels20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([0.4, 0.8, 3]), mylevels50)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "draw_pdf_contours(Dirichlet([5,5,-0.1]), mylevels10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compute pdf of particular vectors under a Dirichlet with the given alpha values." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import scipy.stats\n", "scipy.stats.dirichlet.pdf([0.001, 0.998, 0.001], [0.9, 0.9, 0.9])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Draw some samples from a Dirichlet and display them as partitions of a unit-length bar." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s = np.random.dirichlet([1,1,1], 20).transpose()\n", "plt.barh(range(20), s[0], color='k')\n", "plt.barh(range(20), s[1], left=s[0], color='g')\n", "plt.barh(range(20), s[2], left=s[0]+s[1], color='r')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(mystery6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m = np.random.dirichlet((0.2, 0.5, 1))\n", "print(m)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "np.random.multinomial(1000, m, size=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "alphas = (0.01, 0.01, 0.01)\n", "total = [0] * 3\n", "for i in range(0, 10):\n", " m = np.random.dirichlet(alphas)\n", " '''print(m)'''\n", " counts = np.random.multinomial(1000, m, size=1)\n", " '''print(counts)'''\n", " total = total + counts\n", "print(total)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 1 }