diff --git a/.gitignore b/.gitignore index bb14e0e..6a5bebb 100755 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.ipynb_checkpoints/ .DS_Store *.DS_Store *.pyc diff --git a/.ipynb_checkpoints/not_so_obvious_python_stuff-checkpoint.ipynb b/.ipynb_checkpoints/not_so_obvious_python_stuff-checkpoint.ipynb deleted file mode 100644 index 3ea736a..0000000 --- a/.ipynb_checkpoints/not_so_obvious_python_stuff-checkpoint.ipynb +++ /dev/null @@ -1,3160 +0,0 @@ -{ - "metadata": { - "name": "", - "signature": "sha256:9a07a78204a51f0faab65e52657f0446cd604ed470627f9c6af1ba74c047fe23" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ - { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Sebastian Raschka \n", - "last updated: 04/27/2014 ([Changelog](#changelog))\n", - "\n", - "[Link to this IPython Notebook on GitHub](https://github.com/rasbt/python_reference/blob/master/not_so_obvious_python_stuff.ipynb)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### All code was executed in Python 3.4" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# A collection of not-so-obvious Python stuff you should know!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "

\n", - "I am really looking forward to your comments and suggestions to improve and \n", - "extend this little collection! Just send me a quick note \n", - "via Twitter: [@rasbt](https://twitter.com/rasbt) \n", - "or Email: [bluewoodtree@gmail.com](mailto:bluewoodtree@gmail.com)\n", - "

\n", - "I am really looking forward to your comments and suggestions to improve and extend this collection! Just send me a quick note \n", - "via Twitter: [@rasbt](https://twitter.com/rasbt) \n", - "or Email: [bluewoodtree@gmail.com](mailto:bluewoodtree@gmail.com)\n", - "

" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "# Python benchmarks via `timeit`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Sections\n", - "- [String operations](#string_operations)\n", - " - [String formatting: .format() vs. binary operator %s](#str_format_bin)\n", - " - [String reversing: [::-1] vs. `''.join(reversed())`](#str_reverse)\n", - " - [String concatenation: `+=` vs. `''.join()`](#string_concat)\n", - " - [Assembling strings](#string_assembly) \n", - " - [Testing if a string is an integer](#is_integer)\n", - " - [Testing if a string is a number](#is_number)\n", - "- [List operations](#list_operations)\n", - " - [List reversing: [::-1] vs. reverse() vs. reversed()](#list_reverse)\n", - " - [Creating lists using conditional statements](#create_cond_list)\n", - "- [Dictionary operations](#dict_ops) \n", - " - [Adding elements to a dictionary](#adding_dict_elements)\n", - "- [Comprehensions vs. for-loops](#comprehensions)\n", - "- [Copying files by searching directory trees](#find_copy)\n", - "- [Returning column vectors slicing through a numpy array](#row_vectors)\n", - "- [Speed of numpy functions vs Python built-ins and std. lib.](#numpy)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# String operations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## String formatting: `.format()` vs. binary operator `%s`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We expect the string .format() method to perform slower than %, because it is doing the formatting for each object itself, where formatting via the binary % is hard-coded for known types. But let's see how big the difference really is..." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def test_format():\n", - " return ['{}'.format(i) for i in range(1000000)]\n", - "\n", - "def test_binaryop():\n", - " return ['%s' %i for i in range(1000000)]\n", - "\n", - "%timeit test_format()\n", - "%timeit test_binaryop()\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1 loops, best of 3: 400 ms per loop\n", - "1 loops, best of 3: 241 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 3 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## String reversing: `[::-1]` vs. `''.join(reversed())`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def reverse_join(my_str):\n", - " return ''.join(reversed(my_str))\n", - " \n", - "def reverse_slizing(my_str):\n", - " return my_str[::-1]\n", - "\n", - "\n", - "# Test to show that both work\n", - "a = reverse_join('abcd')\n", - "b = reverse_slizing('abcd')\n", - "assert(a == b and a == 'dcba')\n", - "\n", - "%timeit reverse_join('abcd')\n", - "%timeit reverse_slizing('abcd')\n", - "\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.4 GHz Intel Core Duo\n", - "# 8 GB 1067 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 1.28 \u00b5s per loop\n", - "1000000 loops, best of 3: 337 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 13 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## String concatenation: `+=` vs. `''.join()`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Strings in Python are immutable objects. So, each time we append a character to a string, it has to be created \u201cfrom scratch\u201d in memory. Thus, the answer to the question \u201cWhat is the most efficient way to concatenate strings?\u201d is a quite obvious, but the relative numbers of performance gains are nonetheless interesting." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def string_add(in_chars):\n", - " new_str = ''\n", - " for char in in_chars:\n", - " new_str += char\n", - " return new_str\n", - "\n", - "def string_join(in_chars):\n", - " return ''.join(in_chars)\n", - "\n", - "test_chars = ['a', 'b', 'c', 'd', 'e', 'f']\n", - "\n", - "%timeit string_add(test_chars)\n", - "%timeit string_join(test_chars)\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 595 ns per loop\n", - "1000000 loops, best of 3: 269 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 16 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Assembling strings\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, I wanted to compare different methods string \u201cassembly.\u201d This is different from simple string concatenation, which we have seen in the previous section, since we insert values into a string, e.g., from a variable." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def plus_operator():\n", - " return 'a' + str(1) + str(2) \n", - " \n", - "def format_method():\n", - " return 'a{}{}'.format(1,2)\n", - " \n", - "def binary_operator():\n", - " return 'a%s%s' %(1,2)\n", - "\n", - "%timeit plus_operator()\n", - "%timeit format_method()\n", - "%timeit binary_operator()\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 764 ns per loop\n", - "1000000 loops, best of 3: 494 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000000 loops, best of 3: 79.3 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 17 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Testing if a string is an integer" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def string_is_int(a_str):\n", - " try:\n", - " int(a_str)\n", - " return True\n", - " except ValueError:\n", - " return False\n", - "\n", - "an_int = '123'\n", - "no_int = '123abc'\n", - "\n", - "%timeit string_is_int(an_int)\n", - "%timeit string_is_int(no_int)\n", - "%timeit an_int.isdigit()\n", - "%timeit no_int.isdigit()\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 401 ns per loop\n", - "100000 loops, best of 3: 3.04 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000000 loops, best of 3: 92.1 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000000 loops, best of 3: 96.3 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 5 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Testing if a string is a number" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def string_is_number(a_str):\n", - " try:\n", - " float(a_str)\n", - " return True\n", - " except ValueError:\n", - " return False\n", - " \n", - "a_float = '1.234'\n", - "no_float = '123abc'\n", - "\n", - "a_float.replace('.','',1).isdigit()\n", - "no_float.replace('.','',1).isdigit()\n", - "\n", - "%timeit string_is_number(an_int)\n", - "%timeit string_is_number(no_int)\n", - "%timeit a_float.replace('.','',1).isdigit()\n", - "%timeit no_float.replace('.','',1).isdigit()\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 400 ns per loop\n", - "1000000 loops, best of 3: 1.15 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000000 loops, best of 3: 452 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000000 loops, best of 3: 394 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 6 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "# List operations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## List reversing - `[::-1]` vs. `reverse()` vs. `reversed()`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def reverse_func(my_list):\n", - " new_list = my_list[:]\n", - " new_list.reverse()\n", - " return new_list\n", - " \n", - "def reversed_func(my_list):\n", - " return list(reversed(my_list))\n", - "\n", - "def reverse_slizing(my_list):\n", - " return my_list[::-1]\n", - "\n", - "%timeit reverse_func([1,2,3,4,5])\n", - "%timeit reversed_func([1,2,3,4,5])\n", - "%timeit reverse_slizing([1,2,3,4,5])\n", - "\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.4 GHz Intel Core Duo\n", - "# 8 GB 1067 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1000000 loops, best of 3: 930 ns per loop\n", - "1000000 loops, best of 3: 1.89 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000000 loops, best of 3: 775 ns per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 1 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Creating lists using conditional statements\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "In this test, I attempted to figure out the fastest way to create a new list of elements that meet a certain criterion. For the sake of simplicity, the criterion was to check if an element is even or odd, and only if the element was even, it should be included in the list. For example, the resulting list for numbers in the range from 1 to 10 would be \n", - "[2, 4, 6, 8, 10].\n", - "\n", - "Here, I tested three different approaches: \n", - "1) a simple for loop with an if-statement check (`cond_loop()`) \n", - "2) a list comprehension (`list_compr()`) \n", - "3) the built-in filter() function (`filter_func()`) \n", - "\n", - "Note that the filter() function now returns a generator in Python 3, so I had to wrap it in an additional list() function call." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "def cond_loop():\n", - " even_nums = []\n", - " for i in range(100):\n", - " if i % 2 == 0:\n", - " even_nums.append(i)\n", - " return even_nums\n", - "\n", - "def list_compr():\n", - " even_nums = [i for i in range(100) if i % 2 == 0]\n", - " return even_nums\n", - " \n", - "def filter_func():\n", - " even_nums = list(filter((lambda x: x % 2 != 0), range(100)))\n", - " return even_nums\n", - "\n", - "%timeit cond_loop()\n", - "%timeit list_compr()\n", - "%timeit filter_func()\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "100000 loops, best of 3: 14.4 \u00b5s per loop\n", - "100000 loops, best of 3: 12 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 23.9 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 14 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "# Dictionary operations " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Adding elements to a Dictionary\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "[[back to top](#sections)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "All three functions below count how often different elements (values) occur in a list. \n", - "E.g., for the list ['a', 'b', 'a', 'c'], the dictionary would look like this: \n", - "`my_dict = {'a': 2, 'b': 1, 'c': 1}`" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import random\n", - "import timeit\n", - "from collections import defaultdict\n", - "\n", - "\n", - "def add_element_check1(elements):\n", - " d = dict()\n", - " for e in elements:\n", - " if e not in d:\n", - " d[e] = 1\n", - " else:\n", - " d[e] += 1\n", - " return d\n", - " \n", - "def add_element_check2(elements):\n", - " d = dict()\n", - " for e in elements:\n", - " if e not in d:\n", - " d[e] = 0\n", - " d[e] += 1 \n", - " return d\n", - " \n", - "def add_element_except(elements):\n", - " d = dict()\n", - " for e in elements:\n", - " try:\n", - " d[e] += 1\n", - " except KeyError:\n", - " d[e] = 1\n", - " return d\n", - " \n", - "def add_element_defaultdict(elements):\n", - " d = defaultdict(int)\n", - " for e in elements:\n", - " d[e] += 1\n", - " return d\n", - "\n", - "def add_element_get(elements):\n", - " d = dict()\n", - " for e in elements:\n", - " d[e] = d.get(e, 1) + 1\n", - " return d\n", - "\n", - "\n", - "random.seed(123)\n", - "\n", - "print('Results for 100 integers in range 1-10') \n", - "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", - "%timeit add_element_check1(rand_ints)\n", - "%timeit add_element_check2(rand_ints)\n", - "%timeit add_element_except(rand_ints)\n", - "%timeit add_element_defaultdict(rand_ints)\n", - "%timeit add_element_get(rand_ints)\n", - "\n", - "print('\\nResults for 1000 integers in range 1-5') \n", - "rand_ints = [random.randrange(1, 5) for i in range(1000)]\n", - "%timeit add_element_check1(rand_ints)\n", - "%timeit add_element_check2(rand_ints)\n", - "%timeit add_element_except(rand_ints)\n", - "%timeit add_element_defaultdict(rand_ints)\n", - "%timeit add_element_get(rand_ints)\n", - "\n", - "print('\\nResults for 1000 integers in range 1-1000') \n", - "rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n", - "%timeit add_element_check1(rand_ints)\n", - "%timeit add_element_check2(rand_ints)\n", - "%timeit add_element_except(rand_ints)\n", - "%timeit add_element_defaultdict(rand_ints)\n", - "%timeit add_element_get(rand_ints)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "Results for 100 integers in range 1-10\n", - "10000 loops, best of 3: 28 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 26.2 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 26.5 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 22.8 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 33.3 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Results for 1000 integers in range 1-5\n", - "1000 loops, best of 3: 242 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 239 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 203 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "10000 loops, best of 3: 184 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 350 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Results for 1000 integers in range 1-1000\n", - "1000 loops, best of 3: 262 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 370 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 502 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 422 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "1000 loops, best of 3: 373 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 25 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Conclusion" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n", - "However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Comprehesions vs. for-loops" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Comprehensions are not only shorter and prettier than ye goode olde for-loop, \n", - "but they are also up to ~1.2x faster." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "n = 1000\n", - "\n", - "#\n", - "# Python 3.4.0\n", - "# MacOS X 10.9.2\n", - "# 2.5 GHz Intel Core i5\n", - "# 4 GB 1600 Mhz DDR3\n", - "#" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 19 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Set comprehensions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def set_loop(n):\n", - " a_set = set()\n", - " for i in range(n):\n", - " if i % 3 == 0:\n", - " a_set.add(i)\n", - " return a_set" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 20 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def set_compr(n):\n", - " return {i for i in range(n) if i % 3 == 0}" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 21 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%timeit set_loop(n)\n", - "%timeit set_compr(n)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "10000 loops, best of 3: 136 \u00b5s per loop\n", - "10000 loops, best of 3: 113 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 22 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## List comprehensions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def list_loop(n):\n", - " a_list = list()\n", - " for i in range(n):\n", - " if i % 3 == 0:\n", - " a_list.append(i)\n", - " return a_list" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 23 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def list_compr(n):\n", - " return [i for i in range(n) if i % 3 == 0]" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 24 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%timeit list_loop(n)\n", - "%timeit list_compr(n)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "10000 loops, best of 3: 129 \u00b5s per loop\n", - "10000 loops, best of 3: 111 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 25 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dictionary comprehensions" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def dict_loop(n):\n", - " a_dict = dict()\n", - " for i in range(n):\n", - " if i % 3 == 0:\n", - " a_dict[i] = i\n", - " return a_dict" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 26 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def dict_compr(n):\n", - " return {i:i for i in range(n) if i % 3 == 0}" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 27 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%timeit dict_loop(n)\n", - "%timeit dict_compr(n)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "10000 loops, best of 3: 121 \u00b5s per loop\n", - "10000 loops, best of 3: 127 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 28 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Copying files by searching directory trees" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Executing `Unix`/`Linux` shell commands:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import subprocess\n", - "\n", - "def subprocess_findcopy(path, search_str, dest): \n", - " query = 'find %s -name \"%s\" -exec cp {} %s \\;' %(path, search_str, dest)\n", - " subprocess.call(query, shell=True)\n", - " return " - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 30 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using Python's `os.walk()` to search the directory tree recursively and matching patterns via `fnmatch.filter()`" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import shutil\n", - "import os\n", - "import fnmatch\n", - "\n", - "def walk_findcopy(path, search_str, dest):\n", - " for path, subdirs, files in os.walk(path):\n", - " for name in fnmatch.filter(files, search_str):\n", - " try:\n", - " shutil.copy(os.path.join(path,name), dest)\n", - " except NameError:\n", - " pass\n", - " return" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 33 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "\n", - "def findcopy_timeit(inpath, outpath, search_str):\n", - " \n", - " shutil.rmtree(outpath)\n", - " os.mkdir(outpath)\n", - " print(50*'#')\n", - " print('subprocsess call')\n", - " %timeit subprocess_findcopy(inpath, search_str, outpath)\n", - " print(\"copied %s files\" %len(os.listdir(outpath)))\n", - " shutil.rmtree(outpath)\n", - " os.mkdir(outpath)\n", - " print('\\nos.walk approach')\n", - " %timeit walk_findcopy(inpath, search_str, outpath)\n", - " print(\"copied %s files\" %len(os.listdir(outpath)))\n", - " print(50*'#')\n", - "\n", - "print('small tree')\n", - "inpath = '/Users/sebastian/Desktop/testdir_in'\n", - "outpath = '/Users/sebastian/Desktop/testdir_out'\n", - "search_str = '*.png'\n", - "findcopy_timeit(inpath, outpath, search_str)\n", - "\n", - "print('larger tree')\n", - "inpath = '/Users/sebastian/Dropbox'\n", - "outpath = '/Users/sebastian/Desktop/testdir_out'\n", - "search_str = '*.csv'\n", - "findcopy_timeit(inpath, outpath, search_str)\n" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "small tree\n", - "##################################################" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "subprocsess call\n", - "1 loops, best of 3: 268 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "copied 13 files\n", - "\n", - "os.walk approach\n", - "100 loops, best of 3: 12.2 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "copied 13 files\n", - "##################################################\n", - "larger tree\n", - "##################################################\n", - "subprocsess call\n", - "1 loops, best of 3: 623 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "copied 105 files\n", - "\n", - "os.walk approach\n", - "1 loops, best of 3: 417 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "copied 105 files\n", - "##################################################\n" - ] - } - ], - "prompt_number": 35 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "I have to say that I am really positively surprised. The shell's `find` scales even better than expected!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
\n", - "
\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Returning column vectors slicing through a numpy array" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Given a numpy matrix, I want to iterate through it and return each column as a 1-column vector. \n", - "E.g., if I want to return the 1st column from matrix A below\n", - "\n", - "

\n",
-      "A = np.array([ [1,2,3], [4,5,6], [7,8,9] ])\n",
-      ">>> A\n",
-      "array([[1, 2, 3],\n",
-      "       [4, 5, 6],\n",
-      "       [7, 8, 9]])

\n", - "\n", - "I want my result to be:\n", - "

\n",
-      "array([[1],\n",
-      "       [4],\n",
-      "       [7]])

\n", - "\n", - "with `.shape` = `(3,1)`\n", - "\n", - "\n", - "However, the default behavior of numpy is to return the column as a row vector:\n", - "\n", - "

\n",
-      ">>> A[:,0]\n",
-      "array([1, 4, 7])\n",
-      ">>> A[:,0].shape\n",
-      "(3,)\n",
-      "

" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import numpy as np\n", - "\n", - "# 1st column, e.g., A[:,0,np.newaxis]\n", - "\n", - "def colvec_method1(A):\n", - " for col in A.T:\n", - " colvec = row[:,np.newaxis]\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 83 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., A[:,0:1]\n", - "\n", - "def colvec_method2(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = A[:,idx:idx+1]\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 82 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., A[:,0].reshape(-1,1)\n", - "\n", - "def colvec_method3(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = A[:,idx].reshape(-1,1)\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 81 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., np.vstack(A[:,0]\n", - "\n", - "def colvec_method4(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = np.vstack(A[:,idx])\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 79 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., np.row_stack(A[:,0])\n", - "\n", - "def colvec_method5(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = np.row_stack(A[:,idx])\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 77 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., np.column_stack((A[:,0],))\n", - "\n", - "def colvec_method6(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = np.column_stack((A[:,idx],))\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 74 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "# 1st column, e.g., A[:,[0]]\n", - "\n", - "def colvec_method7(A):\n", - " for idx in range(A.shape[1]):\n", - " colvec = A[:,[idx]]\n", - " yield colvec" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 89 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "def test_method(method, A):\n", - " for i in method(A): \n", - " assert i.shape == (A.shape[0],1), \"{}, {}\".format(i.shape, A.shape[0],1)" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 69 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import timeit\n", - "\n", - "A = np.random.random((300, 3))\n", - "\n", - "for method in [\n", - " colvec_method1, colvec_method2, \n", - " colvec_method3, colvec_method4, \n", - " colvec_method5, colvec_method6,\n", - " colvec_method7]:\n", - " print('\\nTest:', method.__name__)\n", - " %timeit test_method(colvec_method2, A)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "Test: colvec_method1\n", - "100000 loops, best of 3: 16.6 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method2\n", - "10000 loops, best of 3: 16.1 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method3\n", - "100000 loops, best of 3: 16.2 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method4\n", - "100000 loops, best of 3: 16.4 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method5\n", - "100000 loops, best of 3: 16.2 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method6\n", - "100000 loops, best of 3: 16.8 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "\n", - "Test: colvec_method7\n", - "100000 loops, best of 3: 16.3 \u00b5s per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 91 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "
\n", - "
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Speed of numpy functions vs Python built-ins and std. lib." - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import numpy as np\n", - "import timeit\n", - "\n", - "samples = list(range(1000000))\n", - "\n", - "%timeit(sum(samples))\n", - "%timeit(np.sum(samples))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "100 loops, best of 3: 18.3 ms per loop\n", - "10 loops, best of 3: 136 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 6 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "%timeit(list(range(1000000)))\n", - "%timeit(np.arange(1000000))\n", - "\n", - "# note that in Python range() is implemented as xrange()\n", - "# with lazy evaluation (generator)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "10 loops, best of 3: 82.6 ms per loop\n", - "100 loops, best of 3: 5.35 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 11 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import statistics\n", - "\n", - "%timeit(statistics.mean(samples))\n", - "%timeit(np.mean(samples))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "1 loops, best of 3: 1.14 s per loop\n", - "10 loops, best of 3: 141 ms per loop" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 14 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} - } - ] -} \ No newline at end of file diff --git a/CSS/hover.css b/CSS/hover.css new file mode 100644 index 0000000..e04a4aa --- /dev/null +++ b/CSS/hover.css @@ -0,0 +1,26 @@ +.hover_light_dark{ + opacity: 0.5; + filter: alpha(opacity=50); + no-repeat; + } + +.hover_light_dark:hover { + opacity: 1; + filter: alpha(opacity=100); + } + + + +.hover_dark_light{ + opacity: 1; + filter: alpha(opacity=100); + no-repeat; + } + + +.hover_dark_light:hover { + opacity: 0.5; + filter: alpha(opacity=500); + } + + diff --git a/Data/some_soccer_data.csv b/Data/some_soccer_data.csv new file mode 100644 index 0000000..c2cdab8 --- /dev/null +++ b/Data/some_soccer_data.csv @@ -0,0 +1,21 @@ +PLAYER,SALARY,GP,G,A,SOT,PPG,P +"Sergio Agüero + Forward — Manchester City",$19.2m,16,14,3,34,13.12,209.98 +"Eden Hazard + Midfield — Chelsea",$18.9m,21,8,4,17,13.05,274.04 +"Alexis Sánchez + Forward — Arsenal",$17.6m,,12,7,29,11.19,223.86 +"Yaya Touré + Midfield — Manchester City",$16.6m,18,7,1,19,10.99,197.91 +"Ángel Di María + Midfield — Manchester United",$15.0m,13,3,,13,10.17,132.23 +"Santiago Cazorla + Midfield — Arsenal",$14.8m,20,4,,20,9.97, +"David Silva + Midfield — Manchester City",$14.3m,15,6,2,11,10.35,155.26 +"Cesc Fàbregas + Midfield — Chelsea",$14.0m,20,2,14,10,10.47,209.49 +"Saido Berahino + Forward — West Brom",$13.8m,21,9,0,20,7.02,147.43 +"Steven Gerrard + Midfield — Liverpool",$13.8m,20,5,1,11,7.5,150.01 diff --git a/Data/test.csv b/Data/test.csv new file mode 100644 index 0000000..7c8d315 --- /dev/null +++ b/Data/test.csv @@ -0,0 +1,7 @@ +name,column1,column2,column3 +abc,1.1,4.2,1.2 +def,2.1,1.4,5.2 +ghi,1.5,1.2,2.1 +jkl,1.8,1.1,4.2 +mno,9.4,6.6,6.2 +pqr,1.4,8.3,8.4 diff --git a/Data/test_marked.csv b/Data/test_marked.csv new file mode 100644 index 0000000..86dfd82 --- /dev/null +++ b/Data/test_marked.csv @@ -0,0 +1,7 @@ +name,column1,column2,column3 +abc,1.1^,4.2,1.2^ +def,2.1,1.4,5.2 +ghi,1.5,1.2,2.1 +jkl,1.8,1.1^,4.2 +mno,9.4*,6.6,6.2 +pqr,1.4,8.3*,8.4* diff --git a/Images/Ipv4_address.png b/Images/Ipv4_address.png new file mode 100644 index 0000000..04028af Binary files /dev/null and b/Images/Ipv4_address.png differ diff --git a/Images/Ipv6_address.png b/Images/Ipv6_address.png new file mode 100644 index 0000000..8929b65 Binary files /dev/null and b/Images/Ipv6_address.png differ diff --git a/Images/MACaddressV3.png b/Images/MACaddressV3.png new file mode 100644 index 0000000..3064c5a Binary files /dev/null and b/Images/MACaddressV3.png differ diff --git a/Images/cython_final_leastsqr.png b/Images/cython_final_leastsqr.png new file mode 100644 index 0000000..036c507 Binary files /dev/null and b/Images/cython_final_leastsqr.png differ diff --git a/Images/cython_vs_chart.png b/Images/cython_vs_chart.png new file mode 100644 index 0000000..a536ef2 Binary files /dev/null and b/Images/cython_vs_chart.png differ diff --git a/Images/ipython_links_ex.png b/Images/ipython_links_ex.png new file mode 100644 index 0000000..6b3dacf Binary files /dev/null and b/Images/ipython_links_ex.png differ diff --git a/Images/ipython_links_format.png b/Images/ipython_links_format.png new file mode 100644 index 0000000..d0627e5 Binary files /dev/null and b/Images/ipython_links_format.png differ diff --git a/Images/ipython_links_overview.png b/Images/ipython_links_overview.png new file mode 100644 index 0000000..16cb19f Binary files /dev/null and b/Images/ipython_links_overview.png differ diff --git a/Images/ipython_links_remedy.png b/Images/ipython_links_remedy.png new file mode 100644 index 0000000..4c200e2 Binary files /dev/null and b/Images/ipython_links_remedy.png differ diff --git a/Images/ipython_links_remedy2.png b/Images/ipython_links_remedy2.png new file mode 100644 index 0000000..ae3abea Binary files /dev/null and b/Images/ipython_links_remedy2.png differ diff --git a/Images/ipython_table_header.png b/Images/ipython_table_header.png new file mode 100644 index 0000000..897ba78 Binary files /dev/null and b/Images/ipython_table_header.png differ diff --git a/Images/lda_overview.png b/Images/lda_overview.png new file mode 100644 index 0000000..8856366 Binary files /dev/null and b/Images/lda_overview.png differ diff --git a/Images/least_squares_perpendicular.png b/Images/least_squares_perpendicular.png new file mode 100644 index 0000000..a0d425f Binary files /dev/null and b/Images/least_squares_perpendicular.png differ diff --git a/Images/least_squares_vertical.png b/Images/least_squares_vertical.png new file mode 100644 index 0000000..1162973 Binary files /dev/null and b/Images/least_squares_vertical.png differ diff --git a/Images/literature.png b/Images/literature.png new file mode 100644 index 0000000..25a5447 Binary files /dev/null and b/Images/literature.png differ diff --git a/Images/literature_small.png b/Images/literature_small.png new file mode 100644 index 0000000..2ffcedf Binary files /dev/null and b/Images/literature_small.png differ diff --git a/Images/logo.png b/Images/logo.png new file mode 100644 index 0000000..64d8efd Binary files /dev/null and b/Images/logo.png differ diff --git a/Images/logo.pxm b/Images/logo.pxm new file mode 100644 index 0000000..b6b42e3 Binary files /dev/null and b/Images/logo.pxm differ diff --git a/Images/matcheat_R_logo.png b/Images/matcheat_R_logo.png new file mode 100644 index 0000000..ce6c192 Binary files /dev/null and b/Images/matcheat_R_logo.png differ diff --git a/Images/matcheat_julia_benchmark.png b/Images/matcheat_julia_benchmark.png new file mode 100644 index 0000000..2af6e46 Binary files /dev/null and b/Images/matcheat_julia_benchmark.png differ diff --git a/Images/matcheat_julia_logo.png b/Images/matcheat_julia_logo.png new file mode 100644 index 0000000..2339fea Binary files /dev/null and b/Images/matcheat_julia_logo.png differ diff --git a/Images/matcheat_matlab_logo.png b/Images/matcheat_matlab_logo.png new file mode 100644 index 0000000..e6993c7 Binary files /dev/null and b/Images/matcheat_matlab_logo.png differ diff --git a/Images/matcheat_matrix.png b/Images/matcheat_matrix.png new file mode 100644 index 0000000..e986078 Binary files /dev/null and b/Images/matcheat_matrix.png differ diff --git a/Images/matcheat_numpy_logo.png b/Images/matcheat_numpy_logo.png new file mode 100644 index 0000000..c56d4c0 Binary files /dev/null and b/Images/matcheat_numpy_logo.png differ diff --git a/Images/matcheat_octave_logo.png b/Images/matcheat_octave_logo.png new file mode 100644 index 0000000..f046a29 Binary files /dev/null and b/Images/matcheat_octave_logo.png differ diff --git a/Images/multiprocessing_scheme.png b/Images/multiprocessing_scheme.png new file mode 100644 index 0000000..9cb130d Binary files /dev/null and b/Images/multiprocessing_scheme.png differ diff --git a/Images/python-logo-master-v3-TM-flattened.png b/Images/python-logo-master-v3-TM-flattened.png new file mode 100644 index 0000000..738f6ed Binary files /dev/null and b/Images/python-logo-master-v3-TM-flattened.png differ diff --git a/Images/scope_resolution_1.png b/Images/scope_resolution_1.png new file mode 100644 index 0000000..6bf48b0 Binary files /dev/null and b/Images/scope_resolution_1.png differ diff --git a/README.md b/README.md old mode 100755 new mode 100644 index 82f14a1..05de8e6 --- a/README.md +++ b/README.md @@ -1,17 +1,220 @@ -Python Tutorials and References -================ +

A collection of useful scripts, tutorials, and other Python-related things

-Useful functions, tutorials, and other Python-related things +
+

-###Links to view the IPython Notebooks +

-- [Python benchmarks via `timeit`](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/benchmarks/timeit_tests.ipynb?create=1) -- [Benchmarks of different palindrome functions](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/benchmarks/palindrome_timeit.ipynb?create=1) -- [A collection of not so obvious Python stuff you should know!](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/not_so_obvious_python_stuff.ipynb?create=1) -- [Python's scope resolution for variable names and the LEGB rule](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/scope_resolution_legb_rule.ipynb?create=1) +- [// Python tips and tutorials](#-python-tips-and-tutorials) +- [// Python and the web](#-python-and-the-web) +- [// Algorithms](#-algorithms) +- [// Plotting and Visualization](#-plotting-and-visualization) +- [// Benchmarks](#-benchmarks) +- [// Python and "Data Science"](#-python-and-data-science) +- [// Useful scripts and snippets](#-useful-scripts-and-snippets) +- [// Other](#-other) +- [// Links](#-links) -### Links to Markdown files -- [A thorough guide to SQLite database operations in Python](./sqlite3_howto/README.md) -- [Unit testing in Python - Why we want to make it a habit](./tutorials/unit_testing.md) -- [Installing Scientific Packages for Python3 on MacOS 10.9 Mavericks](./tutorials/installing_scientific_packages.md) + + +

+ + +Python tips and tutorials [back to top] + +- A collection of not so obvious Python stuff you should know! [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/not_so_obvious_python_stuff.ipynb?create=1)] + +- Python's scope resolution for variable names and the LEGB rule [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/scope_resolution_legb_rule.ipynb?create=1)] + +- Key differences between Python 2.x and Python 3.x [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/key_differences_between_python_2_and_3.ipynb?create=1)] + +- A thorough guide to SQLite database operations in Python [[Markdown](./tutorials/sqlite3_howto/README.md)] + +- Unit testing in Python - Why we want to make it a habit [[Markdown](./tutorials/unit_testing.md)] + +- Installing Scientific Packages for Python3 on MacOS 10.9 Mavericks [[Markdown](./tutorials/installing_scientific_packages.md)] + +- Sorting CSV files using the Python csv module [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/sorting_csvs.ipynb)] + +- Using Cython with and without IPython magic [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/running_cython.ipynb)] + +- Parallel processing via the multiprocessing module [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/multiprocessing_intro.ipynb?create=1)] + +- Entry point: Data - using sci-packages to prepare data for Machine Learning tasks and other data analyses [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/python_data_entry_point.ipynb?create=1)] + +- Awesome things that you can do in IPython Notebooks (in progress) [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/awesome_things_ipynb.ipynb)] + +- A collection of useful regular expressions [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/useful_regex.ipynb)] + +- Quick guide for dealing with missing numbers in NumPy [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/numpy_nan_quickguide.ipynb)] + +- A random collection of useful Python snippets [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/python_patterns/patterns.ipynb)] + +- Things in pandas I wish I'd had known earlier [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/things_in_pandas.ipynb)] + + + +
+ + +Python and the web [back to top] + +- Creating internal links in IPython Notebooks and Markdown docs [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/table_of_contents_ipython.ipynb)] + +- Converting Markdown to HTML and adding Python syntax highlighting [[Markdown](./tutorials/markdown_syntax_highlighting/README.md)] + + +
+Algorithms and Data Structures [back to top] + +*This category has been moved to a separate GitHub repository [rasbt/algorithms_in_ipython_notebooks](https://github.com/rasbt/algorithms_in_ipython_notebooks)* + + +- Sorting Algorithms [[Collection of IPython Notebooks](https://github.com/rasbt/algorithms_in_ipython_notebooks/tree/master/ipython_nbs/sorting) + +- Linear regression via the least squares fit method [[IPython nb](http://nbviewer.ipython.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/linregr_least_squares_fit.ipynb?create=1)] + +- Dixon's Q test to identify outliers for small sample sizes [[IPython nb](http://nbviewer.ipython.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/statistics/dixon_q_test.ipynb?create=1)] + +- Counting points inside a hypercube [[IPython nb](http://nbviewer.ipython.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/geometry/points_in_hybercube.ipynb)] + +- Singly Linked List [[ IPython nbviewer ](http://nbviewer.ipython.org/github/rasbt/algorithms_in_ipython_notebooks/blob/master/ipython_nbs/data-structures/singly-linked-list.ipynb)] + +
+Plotting and Visualization [back to top] + +*The matplotlib-gallery in IPython notebooks has been moved to a separate GitHub repository [matplotlib-gallery](https://github.com/rasbt/matplotlib-gallery)* + +**Featured articles**: + +- Preparing Plots for Publication [[IPython nb](http://nbviewer.ipython.org/github/rasbt/matplotlib-gallery/blob/master/ipynb/publication.ipynb)] + + + + + +
+Benchmarks [back to top] + + +- Simple tricks to speed up the sum calculation in pandas [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/benchmarks/pandas_sum_tricks.ipynb)] + +
+*More benchmarks can be found in the separate GitHub repository [One-Python-benchmark-per-day](https://github.com/rasbt/One-Python-benchmark-per-day)* + +**Featured articles**: + + + +- (C)Python compilers - Cython vs. Numba vs. Parakeet [[IPython nb](http://nbviewer.ipython.org/github/rasbt/One-Python-benchmark-per-day/blob/master/ipython_nbs/day4_2_cython_numba_parakeet.ipynb)] + +- Just-in-time compilers for NumPy array expressions [[IPython nb](http://nbviewer.ipython.org/github/rasbt/One-Python-benchmark-per-day/blob/master/ipython_nbs/day7_2_jit_numpy.ipynb)] + +- Cython - Bridging the gap between Python and Fortran [[IPython nb](http://nbviewer.ipython.org/github/rasbt/One-Python-benchmark-per-day/blob/master/ipython_nbs/day10_fortran_lstsqr.ipynb)] + +- Parallel processing via the multiprocessing module [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/multiprocessing_intro.ipynb)] + +- Vectorizing a classic for-loop in NumPy [[IPython nb](http://nbviewer.ipython.org/github/rasbt/One-Python-benchmark-per-day/blob/master/ipython_nbs/day16_numpy_vectorization.ipynb)] + +
+ + +Python and "Data Science" [back to top] + +*The "data science"-related posts have been moved to a separate GitHub repository [pattern_classification](https://github.com/rasbt/pattern_classification)* + +**Featured articles**: + +- Entry Point: Data - Using Python's sci-packages to prepare data for Machine Learning tasks and other data analyses [[IPython nb](http://nbviewer.ipython.org/github/rasbt/python_reference/blob/master/tutorials/python_data_entry_point.ipynb)] + +- About Feature Scaling: Standardization and Min-Max-Scaling (Normalization) [[IPython nb](http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/preprocessing/about_standardization_normalization.ipynb)] + +- Principal Component Analysis (PCA) [[IPython nb](http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/dimensionality_reduction/projection/principal_component_analysis.ipynb)] + +- Linear Discriminant Analysis (LDA) [[IPython nb](http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/dimensionality_reduction/projection/linear_discriminant_analysis.ipynb)] + +- Kernel density estimation via the Parzen-window technique [[IPython nb](http://nbviewer.ipython.org/github/rasbt/pattern_classification/blob/master/parameter_estimation_techniques/parzen_window_technique.ipynb)] + + +
+ +Useful scripts and snippets [back to top] + +- [watermark](https://github.com/rasbt/watermark) - An IPython magic extension for printing date and time stamps, version numbers, and hardware information. + +- [Shell script](./useful_scripts/prepend_python_shebang.sh) For prepending Python-shebangs to .py files. + +- A random string generator [function](./useful_scripts/random_string_generator.py). + +- [Converting large CSV files](https://github.com/rasbt/python_reference/blob/master/useful_scripts/large_csv_to_sqlite.py) to SQLite databases using pandas. + +- [Sparsifying a matrix](https://github.com/rasbt/python_reference/blob/master/useful_scripts/sparsify_matrix.py) by zeroing out all elements but the top k elements in a row using NumPy. + +
+ +Other [back to top] + +- [Python book reviews](./other/python_book_reviews.md) +- [Happy Mother's Day Plot](./other/happy_mothers_day.ipynb) + +
+ +Links [back to top] + + + +- [PyPI - the Python Package Index](https://pypi.python.org/pypi) - The official repository for all open source Python modules and packages. + +- [PEP 8](https://www.python.org/dev/peps/pep-0008/) - The official style guide for Python code. + +- [PEP 257](https://www.python.org/dev/peps/pep-0257/) - Python's official docstring conventions; [pep257 - Python style guide checker](https://pypi.python.org/pypi/pep257) + + +
+ +**// News** + +- [Python subreddit](http://www.reddit.com/r/Python/) - My favorite resource to catch up with Python news and great Python-related articles. + +- [Python community on Google+](https://plus.google.com/communities/103393744324769547228) - A nice and friendly community to share and discuss everything about Python. + +- [Python Weekly](http://www.pythonweekly.com) - A free weekly newsletter featuring curated news, articles, new releases, jobs etc. related to Python. + + +
+ +**// Resources for learning Python** + +- [Dive Into Python](http://www.diveintopython.net) / [Dive Into Python 3](http://getpython3.com/diveintopython3/) - A free Python book for experienced programmers. + +- [The Hitchhiker’s Guide to Python](http://docs.python-guide.org/en/latest/) - A free best-practice handbook for both novices and experts. + +- [Think Python - How to Think Like a Computer Scientist](http://www.greenteapress.com/thinkpython/) - An introduction for beginners starting with basic concepts of programming. + +- [A Byte of Python](https://python.swaroopch.com/) - a free book on programming using the Python language. + +- [Python Patterns](http://matthiaseisen.com/pp/) - A directory of proven, reusable solutions to common programming problems. + +- [Intro to Computer Science - Build a Search Engine & a Social Network](https://www.udacity.com/course/intro-to-computer-science--cs101) - A great, free course for learning Python if you haven't programmed before. + +
+ +**// My favorite Python projects and packages** + +- [The IPython Notebook](http://ipython.org/notebook.html) - An interactive computational environment for combining code execution, documentation (with Markdown and LateX support), inline plots, and rich media all in one document. + +- [matplotlib](http://matplotlib.org) - Python's favorite plotting library. + +- [NumPy](http://www.numpy.org) - A library for multi-dimensional arrays and matrices, along with a large library of high-level mathematical functions to operate on these arrays. + +- [SciPy](http://www.scipy.org) - A library that provides various useful functions for numerical computing, such as modules for optimization, linear algebra, integration, interpolation, ... + + +- [pandas](http://pandas.pydata.org) - High-performance, easy-to-use data structures and data analysis tools build on top of NumPy. + +- [Cython](http://cython.org) - C-extensions for Python, an optimizing static compiler to combine Python and C code. + +- [Numba](http://numba.pydata.org) - A just-in-time specializing compiler which compiles annotated Python and NumPy code to LLVM (through decorators) + +- [scikit-learn](http://scikit-learn.org/stable/) - A powerful machine learning library for Python and tools for efficient data mining and analysis. diff --git a/algorithms/sequential_selection_algorithms.ipynb b/algorithms/sequential_selection_algorithms.ipynb new file mode 100644 index 0000000..9032aa2 --- /dev/null +++ b/algorithms/sequential_selection_algorithms.ipynb @@ -0,0 +1,1419 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:422c66e0088094cc07058647ff0a8c20c5fbb08fad34a18ee957f594d82b1e53" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Sebastian Raschka* \n", + "last updated: **03/29/2014** \n", + "[Link to this IPython Notebook on GitHub](https://github.com/rasbt/algorithms_in_ipython_notebooks) \n", + "\n", + "
\n", + "Executed in Python 3.4.0\n", + "