Numpy Tutorial
Need for Numpy :
If we have two lists and simply want to add the elements, we would have to iterate over each element of both lists and add them. Just using "+" operator will concatenate the lists.
a = [1, 2, 3, 4]
b = [5, 6, 7, 8]
print (a+b)
r = []
for x,y in zip(a,b):
r.append(x+y)
print (r)
Numpy :
import numpy as np
# Array Creation
#New Arrays can be created by using array function.
#This function takes list as an argument and create N-Dimensional array based on the arguments.
# One Dimensional Array
ar1 = np.array([1,2,3])
# Two Dimensional Array
ar2 = np.array([[1,2,3], [4,5,6]])
# Other methods to create new Arrays
# There are several other ways to create Arrays :-
# arange([start], [stop], [step])
# This is similar to Python range function and creates evenly spaced arrays.
np.arange(5)
# OUT : array([0, 1, 2, 3, 4])
np.arange(2,10,2) # Excludes the stop position element
# OUT : array([2, 4, 6, 8])
# linspace([start], [stop], [num])
# Creates array by number of points.
np.linspace(0,6,3)
# Creates 3 evenly spaced elements between start and stop point.
# Note, stop point value is included.
# OUT : array([0., 3., 6.])
np.linspace(0,6,3,endpoint=False) # Excludes the stop position element
#OUT : array([0., 2., 4.])
# Type of array
a = np.array([1, 2, 3, 4, 5])
type(a)
# Data-type : Every element inside this array will be of this type
a.dtype
# Another array with floating members
f = np.array([1.2, 33.5, 6.4, 7.8, 8.6])
f.dtype
a[0] = 10
a
# Decimal portion will be truncated because all elements has to be of same type
a[0] = 6.88
a
# Number of dimension
a.ndim
# Returns a tuple showing number of elements across each dimension
a.shape
# Total number of elements
a.size
# Bytes per element
print(a.itemsize)
print(f.itemsize)
# Bytes used by data portion of array
a.nbytes
# Multi-Dimensional Arrays
# A 2-D array is basically a list of list
# A 3-D array will be a list of list of list
a = np.array([[1, 2, 3, 4],
[5, 6, 7, 8]])
print("Array is :\n",a)
print("\n")
print("Size is : ",a.size)
print("Dimension is : ",a.ndim)
print("Shape is : ",a.shape)
# NOTE :
# Dimension 0 is row, dimension 1 is column. So, in this case, we have a tuple (2,4) indicating 2 rows and
# 4 elements in each row
Following picture illustartes the dimension/axis for multi-dimensional array :-
Source : http://physics.cornell.edu/
# Retrieving/Setting individual element from a 2-D array :-
# Syntax : Array[row,column]
print("Element in first row and fourth column is ",a[0,3])
print("Element in second row and first column is ",a[1,0])
# Retrieving all elements of a row :-
# If you specify only first parameter (row index), then all elements of that row are returned
print("Elements in second row", a[1])
# Setting an element
a[1,0] = 10
print("Array after changing first element of second row :\n", a)
a[1] = [10, 11 ,12 ,13]
print("Array after changing second row :\n", a)
Array Slicing :-
An array is sliced with the following syntax, which extracts the sequence based on lower and upper bound
Array[start:stop:step]
Note, that the lower (start) bound element is included but the upper(stop) bound element is not included. Step value defines the stride.
Just like Python lists, the array is represented with indices from both directions. Sequence can be extracted with the above syntax and combinations of positive/negative indices.
NOTE : If boundaries are ommited, it is considered as starting (or ending) of a list.
a = np.array([10, 11, 12, 13, 14, 15])
"""
+---+---+---+---+---+---+
| 10| 11| 12| 13| 14| 15|
+---+---+---+---+---+---+
0 1 2 3 4 5
-6 -5 -4 -3 -2 -1
"""
# Extract the second, third and fourth element
print("Second, third and fourth element : a[1:4] :-", a[1:4])
print("\nSame elements can be extracted with following notations as well ...")
print("a[-5:-2] :- ", a[-5:-2])
print("a[-5:4] :- ", a[-5:4])
print("a[1:-2] :- ", a[1:-2])
# Extract first three elements
print("\nExtract first three elements ...")
print("a[:3] :- ", a[:3])
# Extract last three elements
print("\nExtract last three elements ...")
print("a[-3:] :- ", a[-3:])
print("a[3:] :- ", a[3:])
# Extract every other element
print("\nExtract every other element ...")
print("a[::2] :- ", a[::2])
# Inserting values
a[2:] = [2, 3, 4, 5]
print(a)
# More on slicing with an example using 2-D data
a = np.arange(25).reshape(5,5)
print("Original Array")
print(a)
print("\n")
print(a[4])
print("\n")
print(a[:,1::2])
print("\n")
print(a[1::2,:3:2])
a = np.array([1, 2, 3, 4, 5])
b = a[:3]
b[0] = -1
print(a)
c = a.copy()
Fancy Indexing :-
To understand this better, let's s
# Fancy Indexing - Index Based : (1-D)
# To understand this better, let's say we have an array (1-D for simplicity) with some random numbers
a = np.random.randint(1, 100, 15) # Will create an array of 15 elements between 1 & 100
print (a)
# Now, say we need to extract the element(s) at index 1, 5 and 13. One way is to extract each item individually
print("Element at index {} is {}".format(1, a[1]))
print("Element at index {} is {}".format(5, a[5]))
print("Element at index {} is {}".format(13, a[13]))
# However, Numpy offers another approach where we could just a pass of list of indexes for which we want to
# retrieve elements
index = [1, 5, 13]
print("Elements at indexes {} are {}".format(index, a[index]))
# Fancy Indexing - Boolean Array Indexing (1-D)
# Consider we have an array of odd and even numbers.
# Our task is to find out even elements
a = np.array([1, 3, 4, 5, 6, 8, 10, 3, 1])
# Numpy allows elements to be retrieved by a boolean array i.e. elements will be returned for True element
mask = (a % 2 == 0)
print("Original array is {}".format(a))
print("Mask array is {}".format(mask))
print("Even numbers are ")
print(a[mask])
# Fancy Indexing - Index Based : (2-D)
# Fancy indexing can also be performed on a 2-D array
a = np.arange(25).reshape(5,5)
# If we have a 2-D array, passing a single index will return the entire row
print("Original array is \n {}".format(a))
print("\n")
print("Contents at index (or row) {} is {}".format(3,a[3]))
print("Contents at index (or row) {} is {}".format(2,a[2]))
# If we pass multiple indexes (as a list), we get the data for these entire rows
print("a[3,2] : This will print the element at 3rd row and 2nd column :")
print(a[3,2])
print("\n")
print("a[[3,2]] : This will print the contents for 3rd and 2nd row :")
print(a[[3,2]])
print("\n")
print("a[[0,2,4],[0,3,0]] : This will print elements at (0,0), (2,3) & (4,0) :")
print(a[[0,2,4],[0,3,0]])
print("\n")
print("a[[0,2,4]][:] : This will print all elements at row 0, 2 & 4 :")
print(a[[0,2,4]][:])
print(a[[0,2,4]])
print("\n")
print("a[[0,2,4]][:,[0,1,3]] : This will print elements at row 0, 2 & 4 AND columns 0, 1 & 3 :")
print(a[[0,2,4]][:,[0,1,3]])
print(a)
# Boolean Array Indexing (2-D)
# Boolean Array indexing will work similar to 1-D. Let's use the similar example to find all odd elements
mask = (a % 2 != 0)
print("Original array is \n {} \n".format(a))
print("Mask array is \n {} \n".format(mask))
print("Odd numbers are ")
print(a[mask])
Vectorization :
In high-level languages the term vectorization refers to use of pre-compiled, optimized code written in language like C to perform mathematical operations over sequence of data. Basically, this is done without writing "for" loop in Python.
Python native list allows elements to be of different data types as opposed to Numpy array where the elements have to be of same data type. This property allows mathematical operations to be delecated to pre-compiled code written in C to gain performance improvements.
a = np.arange(16).reshape(4,4)
b = np.arange(16).reshape(4,4)
print("Array a is \n",a,"\n\n","Array b is \n",b, "\n")
# Element wise operation
print("a + 2 : Adding 2 to each element :- ","\n",a+2, "\n")
print("a - 2 : Subtracting 2 from each element :- ","\n",a-2, "\n")
print("a / 2 : Dividing 2 from each element :- ","\n",a/2, "\n")
print("a * 2 : Multiplying 2 from each element :- ","\n",a*2, "\n")
# Array based operations :-
print("a + b : Adding each element of array 'a' to element of 'b' :- ","\n",a+b, "\n")
print("a - b : Subtracting each element of array 'a' to element of 'b' :- ","\n",a-b, "\n")
print("a * b : Multiplying each element of array 'a' to element of 'b' :- ","\n",a*b, "\n")
# Sequence based operations
# These functions are called usoc or universal functions which operate on each element
print(np.sum(a)) # Add each element in array
print(np.sum(a,axis=0)) # Add each element across axis-0 (rows) - vertical direction
print(np.sum(a,axis=1)) # Add each element across axis-1 (columns)
Broadcasting :-
When we perform arithemetic operations on two arrays, the operations are performed element wise. One condition to perform such operations is that the arrays should be of same shape.
Broadcasting is a technique used by Numpy to perform operations on arrays of different shapes.
Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python. It does this without making needless copies of data and usually leads to efficient algorithm implementations. [From Numpy.org]
a = np.array([1, 2, 3])
b = np.array([2, 2, 2])
print(a+b)
Example 1 :-
In the below example, we are performing operations between an array and scalar. Scalar can be considered as another array (dimensionless). If you look at the results, they are identical to the example above (where b is an 1*3 array with 2 as an element).
In this case, the smaller array (scalar) is broadcasted across the larger array i.e. the smaller array is duplicated to match the dimensions and size of larger array.
(Image Source : Numpy.org)
a = np.array([1, 2, 3])
b = 2
print(a+b)
Example 2 :-
In this example, a 1-D array is added to a 2-D array.
(Image Source : Numpy.org)
a = np.array([[0, 0, 0],
[10, 10, 10],
[20, 20, 20],
[30, 30, 30]])
b = np.array([[0, 1, 2]])
print(a+b)
Broadcasting Rules :-
Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.
Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.
Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.
# Example 1
a = np.ones((2,4))
b = np.arange(4)
print("Array a is :-")
print(a)
print("\nArray b is :-")
print(b)
print("\nShape of array a is :-")
print(a.shape) # => (2,4)
print("\nShape of array b is :-")
print(b.shape) # => (4,)
# Apply rule 1 on the array with fewer dimensions i.e. pad with ones on left side
# a.shape => (2,4)
# b.shape => (1,4)
# Apply rule 2 : Stretch the dimension for array b accross dimension with 1's
# a.shape => (2,4)
# b.shape => (2,4)
# Apply rule 3 : Dimensions matches
print("\n a+b is :-")
print(a+b)
# View broadcasted arrays
x, y = np.broadcast_arrays(a, b)
print("\nArray a after broadcasting is (before addition) :-")
print(x)
print("\nArray b after broadcasting is (before addition) :-")
print(y)
# Example 2
a = np.arange(4).reshape(4,1)
b = np.arange(4)
print("Array a is :-")
print(a)
print("\nArray b is :-")
print(b)
print("\nShape of array a is :-")
print(a.shape) # => (4,1)
print("\nShape of array b is :-")
print(b.shape) # => (4,)
# Apply rule 1 on the array with fewer dimensions i.e. pad with ones on left side
# a.shape => (4,1)
# b.shape => (1,4)
# Apply rule 2 : Stretch the dimension for array b accross dimension with 1's
# a.shape => (4,4)
# b.shape => (4,4)
# Apply rule 3 : Dimensions matches
print("\n a+b is :-")
print(a+b)
# View broadcasted arrays
x, y = np.broadcast_arrays(a, b)
print("\nArray a after broadcasting is (before addition) :-")
print(x)
print("\nArray b after broadcasting is (before addition) :-")
print(y)
# Example 3
a = np.ones((4,3))
b = np.arange(4)
print("Array a is :-")
print(a)
print("\nArray b is :-")
print(b)
print("\nShape of array a is :-")
print(a.shape) # => (4,3)
print("\nShape of array b is :-")
print(b.shape) # => (4,)
# Apply rule 1 on the array with fewer dimensions i.e. pad with ones on left side
# a.shape => (4,3)
# b.shape => (1,4)
# Apply rule 2 : Stretch the dimension for array b accross dimension with 1's
# a.shape => (4,3)
# b.shape => (4,4)
# Apply rule 3 : Dimensions DOES NOT matches
print("\n a+b is :-")
#print(a+b)
# View broadcasted arrays
try:
x, y = np.broadcast_arrays(a, b)
print("\nArray a after broadcasting is (before addition) :-")
print(x)
print("\nArray b after broadcasting is (before addition) :-")
print(y)
except Exception as e:
print(e)
Shape Operations
reshape(array, newshape)
Takes an array as an argument with newshape (integer or tuple). The newshape should be compatible with existing shape.
ravel(array)
Takes an array as an argument and returns a flattened contiguous array (1-D).
a = np.arange(6).reshape((2,3))
print(a)
b = np.ravel(x)
print(b)