{
  "nbformat_minor": 0, 
  "nbformat": 4, 
  "cells": [
    {
      "execution_count": null, 
      "cell_type": "code", 
      "source": [
        "%matplotlib inline"
      ], 
      "outputs": [], 
      "metadata": {
        "collapsed": false
      }
    }, 
    {
      "source": [
        "\n# Words\n\n\nWords/Ladder Graph\n------------------\nGenerate  an undirected graph over the 5757 5-letter words in the\ndatafile `words_dat.txt.gz`.  Two words are connected by an edge\nif they differ in one letter, resulting in 14,135 edges. This example\nis described in Section 1.1 in Knuth's book (see [1]_ and [2]_).\n\nReferences\n----------\n.. [1] Donald E. Knuth,\n   \"The Stanford GraphBase: A Platform for Combinatorial Computing\",\n   ACM Press, New York, 1993.\n.. [2] http://www-cs-faculty.stanford.edu/~knuth/sgb.html\n\n"
      ], 
      "cell_type": "markdown", 
      "metadata": {}
    }, 
    {
      "execution_count": null, 
      "cell_type": "code", 
      "source": [
        "# Authors: Aric Hagberg (hagberg@lanl.gov),\n#          Brendt Wohlberg,\n#          hughdbrown@yahoo.com\n\n#    Copyright (C) 2004-2017 by\n#    Aric Hagberg <hagberg@lanl.gov>\n#    Dan Schult <dschult@colgate.edu>\n#    Pieter Swart <swart@lanl.gov>\n#    All rights reserved.\n#    BSD license.\n\nimport gzip\nfrom string import ascii_lowercase as lowercase\n\nimport networkx as nx\n\n#-------------------------------------------------------------------\n#   The Words/Ladder graph of Section 1.1\n#-------------------------------------------------------------------\n\n\ndef generate_graph(words):\n    G = nx.Graph(name=\"words\")\n    lookup = dict((c, lowercase.index(c)) for c in lowercase)\n\n    def edit_distance_one(word):\n        for i in range(len(word)):\n            left, c, right = word[0:i], word[i], word[i + 1:]\n            j = lookup[c]  # lowercase.index(c)\n            for cc in lowercase[j + 1:]:\n                yield left + cc + right\n    candgen = ((word, cand) for word in sorted(words)\n               for cand in edit_distance_one(word) if cand in words)\n    G.add_nodes_from(words)\n    for word, cand in candgen:\n        G.add_edge(word, cand)\n    return G\n\n\ndef words_graph():\n    \"\"\"Return the words example graph from the Stanford GraphBase\"\"\"\n    fh = gzip.open('words_dat.txt.gz', 'r')\n    words = set()\n    for line in fh.readlines():\n        line = line.decode()\n        if line.startswith('*'):\n            continue\n        w = str(line[0:5])\n        words.add(w)\n    return generate_graph(words)\n\n\nif __name__ == '__main__':\n    G = words_graph()\n    print(\"Loaded words_dat.txt containing 5757 five-letter English words.\")\n    print(\"Two words are connected if they differ in one letter.\")\n    print(\"Graph has %d nodes with %d edges\"\n          % (nx.number_of_nodes(G), nx.number_of_edges(G)))\n    print(\"%d connected components\" % nx.number_connected_components(G))\n\n    for (source, target) in [('chaos', 'order'),\n                             ('nodes', 'graph'),\n                             ('pound', 'marks')]:\n        print(\"Shortest path between %s and %s is\" % (source, target))\n        try:\n            sp = nx.shortest_path(G, source, target)\n            for n in sp:\n                print(n)\n        except nx.NetworkXNoPath:\n            print(\"None\")"
      ], 
      "outputs": [], 
      "metadata": {
        "collapsed": false
      }
    }
  ], 
  "metadata": {
    "kernelspec": {
      "display_name": "Python 2", 
      "name": "python2", 
      "language": "python"
    }, 
    "language_info": {
      "mimetype": "text/x-python", 
      "nbconvert_exporter": "python", 
      "name": "python", 
      "file_extension": ".py", 
      "version": "2.7.12", 
      "pygments_lexer": "ipython2", 
      "codemirror_mode": {
        "version": 2, 
        "name": "ipython"
      }
    }
  }
}