Commit dc9b5633 authored by Brian McMahan's avatar Brian McMahan
Browse files

updating notebooks to include the OMP_NUM_THREADS environment variable; DRAMATIC SPEED UP

parent 7cfbf065
%% Cell type:code id: tags:
``` python
from argparse import Namespace
import os
os.environ['OMP_NUM_THREADS'] = '4'
from annoy import AnnoyIndex
import numpy as np
import torch
from tqdm import tqdm_notebook
from argparse import Namespace
```
%% Cell type:markdown id: tags:
These pre-trained word embeddings come from the [Glove project](https://nlp.stanford.edu/projects/glove/). For more details, about how the embeddings were generated see [this paper](https://nlp.stanford.edu/pubs/glove.pdf).
%% Cell type:code id: tags:
``` python
args = Namespace(
glove_filename='../data/glove.6B.100d.txt'
)
```
%% Cell type:code id: tags:
``` python
def load_word_vectors(filename):
"""
A helper function to load word vectors from a file.
"""
word_to_index = {}
word_vectors = []
with open(filename) as fp:
for line in tqdm_notebook(fp.readlines(), leave=False):
line = line.split(" ")
word = line[0]
word_to_index[word] = len(word_to_index)
vec = np.array([float(x) for x in line[1:]])
word_vectors.append(vec)
return word_to_index, word_vectors
```
%% Cell type:code id: tags:
``` python
class PreTrainedEmbeddings(object):
"""
A helper class to use standalone pre-trained embeddings
"""
def __init__(self, glove_filename):
self.word_to_index, self.word_vectors = load_word_vectors(glove_filename)
self.word_vector_size = len(self.word_vectors[0])
self.index_to_word = {v: k for k, v in self.word_to_index.items()}
self.index = AnnoyIndex(self.word_vector_size, metric='euclidean')
print('Building Index')
for _, i in tqdm_notebook(self.word_to_index.items(), leave=False):
self.index.add_item(i, self.word_vectors[i])
self.index.build(50)
print('Finished!')
def get_embedding(self, word):
return self.word_vectors[self.word_to_index[word]]
def closest(self, word, n=1):
"""
Finall the top-n closest words (in the embedding space) to a given word.
"""
vector = self.get_embedding(word)
nn_indices = self.index.get_nns_by_vector(vector, n)
return [self.index_to_word[neighbor] for neighbor in nn_indices]
def closest_v(self, vector, n=1):
nn_indices = self.index.get_nns_by_vector(vector, n)
return [self.index_to_word[neighbor] for neighbor in nn_indices]
def sim(self, w1, w2):
"""
find similarity between two words. returns a non-negative score.
Higher the score, more the similarity in some dimension.
"""
return np.dot(self.get_embedding(w1), self.get_embedding(w2))
```
%% Cell type:code id: tags:
``` python
glove = PreTrainedEmbeddings(args.glove_filename)
```
%% Output
Building Index
Finished!
%% Cell type:code id: tags:
``` python
glove.closest('apple', n=5)
```
%% Output
['apple', 'microsoft', 'dell', 'pc', 'compaq']
%% Cell type:code id: tags:
``` python
glove.closest('plane', n=5)
```
%% Output
['plane', 'airplane', 'jet', 'flight', 'crashed']
%% Cell type:code id: tags:
``` python
glove.sim('beer', 'wine'), glove.sim('beer', 'gasoline')
```
%% Output
(26.873448266652, 16.501491855324)
%% Cell type:markdown id: tags:
**A study of lexical relationships uncovered by word embeddings**
Traditionally many of these relationships were hand-coded. See, for example, [the WordNet project](https://wordnet.princeton.edu/).
%% Cell type:code id: tags:
``` python
def SAT_analogy(w1, w2, w3):
'''
Solves problems of the type:
w1 : w2 :: w3 : __
'''
closest_words = []
try:
w1v = glove.get_embedding(w1)
w2v = glove.get_embedding(w2)
w3v = glove.get_embedding(w3)
w4v = w3v + (w2v - w1v)
closest_words = glove.closest_v(w4v, n=5)
closest_words = [w for w in closest_words if w not in [w1, w2, w3]]
except:
pass
if len(closest_words) == 0:
print(':-(')
else:
the_closest_word = closest_words[0]
print('{} : {} :: {} : {}'.format(w1, w2, w3, the_closest_word))
```
%% Cell type:markdown id: tags:
**Pronouns**
%% Cell type:code id: tags:
``` python
SAT_analogy('man', 'he', 'woman')
```
%% Output
man : he :: woman : she
%% Cell type:markdown id: tags:
** Verb-Noun relationships **
%% Cell type:code id: tags:
``` python
SAT_analogy('fly', 'plane', 'sail')
```
%% Output
fly : plane :: sail : ship
%% Cell type:markdown id: tags:
**Noun-Noun relationships**
%% Cell type:code id: tags:
``` python
SAT_analogy('cat', 'kitten', 'dog')
```
%% Output
cat : kitten :: dog : pug
%% Cell type:code id: tags:
``` python
SAT_analogy('human', 'baby', 'dog')
```
%% Output
human : baby :: dog : puppy
%% Cell type:code id: tags:
``` python
SAT_analogy('human', 'babies', 'dog')
```
%% Output
human : babies :: dog : puppies
%% Cell type:markdown id: tags:
**Hypernymy**
%% Cell type:code id: tags:
``` python
SAT_analogy('blue', 'color', 'dog')
```
%% Output
blue : color :: dog : animal
%% Cell type:markdown id: tags:
**Meronymy**
%% Cell type:code id: tags:
``` python
SAT_analogy('leg', 'legs', 'hand')
```
%% Output
leg : legs :: hand : hands
%% Cell type:markdown id: tags:
**Troponymy**
%% Cell type:code id: tags:
``` python
SAT_analogy('talk', 'communicate', 'read')
```
%% Output
talk : communicate :: read : correctly
%% Cell type:markdown id: tags:
**Metonymy**
%% Cell type:code id: tags:
``` python
SAT_analogy('blue', 'democrat', 'red')
```
%% Output
blue : democrat :: red : republican
%% Cell type:markdown id: tags:
**Misc**
%% Cell type:code id: tags:
``` python
SAT_analogy('man', 'doctor', 'woman')
```
%% Output
man : doctor :: woman : nurse
%% Cell type:code id: tags:
``` python
SAT_analogy('man', 'leader', 'woman')
```
%% Output
man : leader :: woman : opposition
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
......
%% Cell type:markdown id: tags:
# PyTorch Basics
%% Cell type:code id: tags:
``` python
import os
os.environ['OMP_NUM_THREADS'] = '4'
import torch
import numpy as np
torch.manual_seed(1234)
```
%% Output
<torch._C.Generator at 0x7f0bc00aa430>
%% Cell type:markdown id: tags:
## Tensors
%% Cell type:markdown id: tags:
* Scalar is a single number.
* Vector is an array of numbers.
* Matrix is a 2-D array of numbers.
* Tensors are N-D arrays of numbers.
%% Cell type:markdown id: tags:
#### Creating Tensors
%% Cell type:markdown id: tags:
You can create tensors by specifying the shape as arguments. Here is a tensor with 5 rows and 3 columns
%% Cell type:code id: tags:
``` python
def describe(x):
print("Type: {}".format(x.type()))
print("Shape/size: {}".format(x.shape))
print("Values: \n{}".format(x))
```
%% Cell type:code id: tags:
``` python
describe(torch.Tensor(2, 3))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 4.0624e+05, 4.5696e-41, 7.3640e-32],
[ 3.0700e-41, 4.4842e-44, 0.0000e+00]])
%% Cell type:code id: tags:
``` python
describe(torch.randn(2, 3))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0.0461, 0.4024, -1.0115],
[ 0.2167, -0.6123, 0.5036]])
%% Cell type:markdown id: tags:
It's common in prototyping to create a tensor with random numbers of a specific shape.
%% Cell type:code id: tags:
``` python
x = torch.rand(2, 3)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0.7749, 0.8208, 0.2793],
[ 0.6817, 0.2837, 0.6567]])
%% Cell type:markdown id: tags:
You can also initialize tensors of ones or zeros.
%% Cell type:code id: tags:
``` python
describe(torch.zeros(2, 3))
x = torch.ones(2, 3)
describe(x)
x.fill_(5)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 0., 0.],
[ 0., 0., 0.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1., 1., 1.],
[ 1., 1., 1.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 5., 5., 5.],
[ 5., 5., 5.]])
%% Cell type:markdown id: tags:
Tensors can be initialized and then filled in place.
Note: operations that end in an underscore (`_`) are in place operations.
%% Cell type:code id: tags:
``` python
x = torch.Tensor(3,4).fill_(5)
print(x.type())
print(x.shape)
print(x)
```
%% Output
torch.FloatTensor
torch.Size([3, 4])
tensor([[ 5., 5., 5., 5.],
[ 5., 5., 5., 5.],
[ 5., 5., 5., 5.]])
%% Cell type:markdown id: tags:
Tensors can be initialized from a list of lists
%% Cell type:code id: tags:
``` python
x = torch.Tensor([[1, 2,],
[2, 4,]])
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values:
tensor([[ 1., 2.],
[ 2., 4.]])
%% Cell type:markdown id: tags:
Tensors can be initialized from numpy matrices
%% Cell type:code id: tags:
``` python
npy = np.random.rand(2, 3)
describe(torch.from_numpy(npy))
print(npy.dtype)
```
%% Output
Type: torch.DoubleTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0.6607, 0.2072, 0.1046],
[ 0.8505, 0.8420, 0.9717]], dtype=torch.float64)
float64
%% Cell type:markdown id: tags:
#### Tensor Types
%% Cell type:markdown id: tags:
The FloatTensor has been the default tensor that we have been creating all along
%% Cell type:code id: tags:
``` python
import torch
x = torch.arange(6).view(2, 3)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
%% Cell type:code id: tags:
``` python
x = torch.FloatTensor([[1, 2, 3],
[4, 5, 6]])
describe(x)
x = x.long()
describe(x)
x = torch.tensor([[1, 2, 3],
[4, 5, 6]], dtype=torch.int64)
describe(x)
x = x.float()
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1., 2., 3.],
[ 4., 5., 6.]])
Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1, 2, 3],
[ 4, 5, 6]])
Type: torch.LongTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1, 2, 3],
[ 4, 5, 6]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1., 2., 3.],
[ 4., 5., 6.]])
%% Cell type:code id: tags:
``` python
x = torch.randn(2, 3)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 1.5385, -0.9757, 1.5769],
[ 0.3840, -0.6039, -0.5240]])
%% Cell type:code id: tags:
``` python
describe(torch.add(x, x))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 3.0771, -1.9515, 3.1539],
[ 0.7680, -1.2077, -1.0479]])
%% Cell type:code id: tags:
``` python
describe(x + x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 3.0771, -1.9515, 3.1539],
[ 0.7680, -1.2077, -1.0479]])
%% Cell type:code id: tags:
``` python
x = torch.arange(6)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([6])
Values:
tensor([ 0., 1., 2., 3., 4., 5.])
%% Cell type:code id: tags:
``` python
x = x.view(2, 3)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
%% Cell type:code id: tags:
``` python
describe(torch.sum(x, dim=0))
describe(torch.sum(x, dim=1))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([3])
Values:
tensor([ 3., 5., 7.])
Type: torch.FloatTensor
Shape/size: torch.Size([2])
Values:
tensor([ 3., 12.])
%% Cell type:code id: tags:
``` python
describe(torch.transpose(x, 0, 1))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([3, 2])
Values:
tensor([[ 0., 3.],
[ 1., 4.],
[ 2., 5.]])
%% Cell type:code id: tags:
``` python
import torch
x = torch.arange(6).view(2, 3)
describe(x)
describe(x[:1, :2])
describe(x[0, 1])
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
Type: torch.FloatTensor
Shape/size: torch.Size([1, 2])
Values:
tensor([[ 0., 1.]])
Type: torch.FloatTensor
Shape/size: torch.Size([])
Values:
1.0
%% Cell type:code id: tags:
``` python
indices = torch.LongTensor([0, 2])
describe(torch.index_select(x, dim=1, index=indices))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values:
tensor([[ 0., 2.],
[ 3., 5.]])
%% Cell type:code id: tags:
``` python
indices = torch.LongTensor([0, 0])
describe(torch.index_select(x, dim=0, index=indices))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 0., 1., 2.]])
%% Cell type:code id: tags:
``` python
row_indices = torch.arange(2).long()
col_indices = torch.LongTensor([0, 1])
describe(x[row_indices, col_indices])
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2])
Values:
tensor([ 0., 4.])
%% Cell type:markdown id: tags:
Long Tensors are used for indexing operations and mirror the `int64` numpy type
%% Cell type:code id: tags:
``` python
x = torch.LongTensor([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
describe(x)
print(x.dtype)
print(x.numpy().dtype)
```
%% Output
Type: torch.LongTensor
Shape/size: torch.Size([3, 3])
Values:
tensor([[ 1, 2, 3],
[ 4, 5, 6],
[ 7, 8, 9]])
torch.int64
int64
%% Cell type:markdown id: tags:
You can convert a FloatTensor to a LongTensor
%% Cell type:code id: tags:
``` python
x = torch.FloatTensor([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
x = x.long()
describe(x)
```
%% Output
Type: torch.LongTensor
Shape/size: torch.Size([3, 3])
Values:
tensor([[ 1, 2, 3],
[ 4, 5, 6],
[ 7, 8, 9]])
%% Cell type:markdown id: tags:
### Special Tensor initializations
%% Cell type:markdown id: tags:
We can create a vector of incremental numbers
%% Cell type:code id: tags:
``` python
x = torch.arange(0, 10)
print(x)
```
%% Output
tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
%% Cell type:markdown id: tags:
Sometimes it's useful to have an integer-based arange for indexing
%% Cell type:code id: tags:
``` python
x = torch.arange(0, 10).long()
print(x)
```
%% Output
tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
%% Cell type:markdown id: tags:
## Operations
Using the tensors to do linear algebra is a foundation of modern Deep Learning practices
%% Cell type:markdown id: tags:
Reshaping allows you to move the numbers in a tensor around. One can be sure that the order is preserved. In PyTorch, reshaping is called `view`
%% Cell type:code id: tags:
``` python
x = torch.arange(0, 20)
print(x.view(1, 20))
print(x.view(2, 10))
print(x.view(4, 5))
print(x.view(5, 4))
print(x.view(10, 2))
print(x.view(20, 1))
```
%% Output
tensor([[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.,
10., 11., 12., 13., 14., 15., 16., 17., 18., 19.]])
tensor([[ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
[ 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.]])
tensor([[ 0., 1., 2., 3., 4.],
[ 5., 6., 7., 8., 9.],
[ 10., 11., 12., 13., 14.],
[ 15., 16., 17., 18., 19.]])
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.],
[ 12., 13., 14., 15.],
[ 16., 17., 18., 19.]])
tensor([[ 0., 1.],
[ 2., 3.],
[ 4., 5.],
[ 6., 7.],
[ 8., 9.],
[ 10., 11.],
[ 12., 13.],
[ 14., 15.],
[ 16., 17.],
[ 18., 19.]])
tensor([[ 0.],
[ 1.],
[ 2.],
[ 3.],
[ 4.],
[ 5.],
[ 6.],
[ 7.],
[ 8.],
[ 9.],
[ 10.],
[ 11.],
[ 12.],
[ 13.],
[ 14.],
[ 15.],
[ 16.],
[ 17.],
[ 18.],
[ 19.]])
%% Cell type:markdown id: tags:
We can use view to add size-1 dimensions, which can be useful for combining with other tensors. This is called broadcasting.
%% Cell type:code id: tags:
``` python
x = torch.arange(12).view(3, 4)
y = torch.arange(4).view(1, 4)
z = torch.arange(3).view(3, 1)
print(x)
print(y)
print(z)
print(x + y)
print(x + z)
```
%% Output
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
tensor([[ 0., 1., 2., 3.]])
tensor([[ 0.],
[ 1.],
[ 2.]])
tensor([[ 0., 2., 4., 6.],
[ 4., 6., 8., 10.],
[ 8., 10., 12., 14.]])
tensor([[ 0., 1., 2., 3.],
[ 5., 6., 7., 8.],
[ 10., 11., 12., 13.]])
%% Cell type:markdown id: tags:
Unsqueeze and squeeze will add and remove 1-dimensions.
%% Cell type:code id: tags:
``` python
x = torch.arange(12).view(3, 4)
print(x.shape)
x = x.unsqueeze(dim=1)
print(x.shape)
x = x.squeeze()
print(x.shape)
```
%% Output
torch.Size([3, 4])
torch.Size([3, 1, 4])
torch.Size([3, 4])
%% Cell type:markdown id: tags:
all of the standard mathematics operations apply (such as `add` below)
%% Cell type:code id: tags:
``` python
x = torch.rand(3,4)
print("x: \n", x)
print("--")
print("torch.add(x, x): \n", torch.add(x, x))
print("--")
print("x+x: \n", x + x)
```
%% Output
x:
tensor([[ 0.6662, 0.3343, 0.7893, 0.3216],
[ 0.5247, 0.6688, 0.8436, 0.4265],
[ 0.9561, 0.0770, 0.4108, 0.0014]])
--
torch.add(x, x):
tensor([[ 1.3324, 0.6686, 1.5786, 0.6433],
[ 1.0494, 1.3377, 1.6872, 0.8530],
[ 1.9123, 0.1540, 0.8216, 0.0028]])
--
x+x:
tensor([[ 1.3324, 0.6686, 1.5786, 0.6433],
[ 1.0494, 1.3377, 1.6872, 0.8530],
[ 1.9123, 0.1540, 0.8216, 0.0028]])
%% Cell type:markdown id: tags:
The convention of `_` indicating in-place operations continues:
%% Cell type:code id: tags:
``` python
x = torch.arange(12).reshape(3, 4)
print(x)
print(x.add_(x))
```
%% Output
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
tensor([[ 0., 2., 4., 6.],
[ 8., 10., 12., 14.],
[ 16., 18., 20., 22.]])
%% Cell type:markdown id: tags:
There are many operations for which reduce a dimension. Such as sum:
%% Cell type:code id: tags:
``` python
x = torch.arange(12).reshape(3, 4)
print("x: \n", x)
print("---")
print("Summing across rows (dim=0): \n", x.sum(dim=0))
print("---")
print("Summing across columns (dim=1): \n", x.sum(dim=1))
```
%% Output
x:
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
---
Summing across rows (dim=0):
tensor([ 12., 15., 18., 21.])
---
Summing across columns (dim=1):
tensor([ 6., 22., 38.])
%% Cell type:markdown id: tags:
#### Indexing, Slicing, Joining and Mutating
%% Cell type:code id: tags:
``` python
x = torch.arange(6).view(2, 3)
print("x: \n", x)
print("---")
print("x[:2, :2]: \n", x[:2, :2])
print("---")
print("x[0][1]: \n", x[0][1])
print("---")
print("Setting [0][1] to be 8")
x[0][1] = 8
print(x)
```
%% Output
x:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
---
x[:2, :2]:
tensor([[ 0., 1.],
[ 3., 4.]])
---
x[0][1]:
tensor(1.)
---
Setting [0][1] to be 8
tensor([[ 0., 8., 2.],
[ 3., 4., 5.]])
%% Cell type:markdown id: tags:
We can select a subset of a tensor using the `index_select`
%% Cell type:code id: tags:
``` python
x = torch.arange(9).view(3,3)
print(x)
print("---")
indices = torch.LongTensor([0, 2])
print(torch.index_select(x, dim=0, index=indices))
print("---")
indices = torch.LongTensor([0, 2])
print(torch.index_select(x, dim=1, index=indices))
```
%% Output
tensor([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]])
---
tensor([[ 0., 1., 2.],
[ 6., 7., 8.]])
---
tensor([[ 0., 2.],
[ 3., 5.],
[ 6., 8.]])
%% Cell type:markdown id: tags:
We can also use numpy-style advanced indexing:
%% Cell type:code id: tags:
``` python
x = torch.arange(9).view(3,3)
indices = torch.LongTensor([0, 2])
print(x[indices])
print("---")
print(x[indices, :])
print("---")
print(x[:, indices])
```
%% Output
tensor([[ 0., 1., 2.],
[ 6., 7., 8.]])
---
tensor([[ 0., 1., 2.],
[ 6., 7., 8.]])
---
tensor([[ 0., 2.],
[ 3., 5.],
[ 6., 8.]])
%% Cell type:markdown id: tags:
We can combine tensors by concatenating them. First, concatenating on the rows
%% Cell type:code id: tags:
``` python
x = torch.arange(6).view(2,3)
describe(x)
describe(torch.cat([x, x], dim=0))
describe(torch.cat([x, x], dim=1))
describe(torch.stack([x, x]))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
Type: torch.FloatTensor
Shape/size: torch.Size([4, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 0., 1., 2.],
[ 3., 4., 5.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 6])
Values:
tensor([[ 0., 1., 2., 0., 1., 2.],
[ 3., 4., 5., 3., 4., 5.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2, 3])
Values:
tensor([[[ 0., 1., 2.],
[ 3., 4., 5.]],
[[ 0., 1., 2.],
[ 3., 4., 5.]]])
%% Cell type:markdown id: tags:
We can concentate along the first dimension.. the columns.
%% Cell type:code id: tags:
``` python
x = torch.arange(9).view(3,3)
print(x)
print("---")
new_x = torch.cat([x, x, x], dim=1)
print(new_x.shape)
print(new_x)
```
%% Output
tensor([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]])
---
torch.Size([3, 9])
tensor([[ 0., 1., 2., 0., 1., 2., 0., 1., 2.],
[ 3., 4., 5., 3., 4., 5., 3., 4., 5.],
[ 6., 7., 8., 6., 7., 8., 6., 7., 8.]])
%% Cell type:markdown id: tags:
We can also concatenate on a new 0th dimension to "stack" the tensors:
%% Cell type:code id: tags:
``` python
x = torch.arange(9).view(3,3)
print(x)
print("---")
new_x = torch.stack([x, x, x])
print(new_x.shape)
print(new_x)
```
%% Output
tensor([[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]])
---
torch.Size([3, 3, 3])
tensor([[[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]],
[[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]],
[[ 0., 1., 2.],
[ 3., 4., 5.],
[ 6., 7., 8.]]])
%% Cell type:markdown id: tags:
#### Linear Algebra Tensor Functions
%% Cell type:markdown id: tags:
Transposing allows you to switch the dimensions to be on different axis. So we can make it so all the rows are columsn and vice versa.
%% Cell type:code id: tags:
``` python
x = torch.arange(0, 12).view(3,4)
print("x: \n", x)
print("---")
print("x.tranpose(1, 0): \n", x.transpose(1, 0))
```
%% Output
x:
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
---
x.tranpose(1, 0):
tensor([[ 0., 4., 8.],
[ 1., 5., 9.],
[ 2., 6., 10.],
[ 3., 7., 11.]])
%% Cell type:markdown id: tags:
A three dimensional tensor would represent a batch of sequences, where each sequence item has a feature vector. It is common to switch the batch and sequence dimensions so that we can more easily index the sequence in a sequence model.
Note: Transpose will only let you swap 2 axes. Permute (in the next cell) allows for multiple
%% Cell type:code id: tags:
``` python
batch_size = 3
seq_size = 4
feature_size = 5
x = torch.arange(batch_size * seq_size * feature_size).view(batch_size, seq_size, feature_size)
print("x.shape: \n", x.shape)
print("x: \n", x)
print("-----")
print("x.transpose(1, 0).shape: \n", x.transpose(1, 0).shape)
print("x.transpose(1, 0): \n", x.transpose(1, 0))
```
%% Output
x.shape:
torch.Size([3, 4, 5])
x:
tensor([[[ 0., 1., 2., 3., 4.],
[ 5., 6., 7., 8., 9.],
[ 10., 11., 12., 13., 14.],
[ 15., 16., 17., 18., 19.]],
[[ 20., 21., 22., 23., 24.],
[ 25., 26., 27., 28., 29.],
[ 30., 31., 32., 33., 34.],
[ 35., 36., 37., 38., 39.]],
[[ 40., 41., 42., 43., 44.],
[ 45., 46., 47., 48., 49.],
[ 50., 51., 52., 53., 54.],
[ 55., 56., 57., 58., 59.]]])
-----
x.transpose(1, 0).shape:
torch.Size([4, 3, 5])
x.transpose(1, 0):
tensor([[[ 0., 1., 2., 3., 4.],
[ 20., 21., 22., 23., 24.],
[ 40., 41., 42., 43., 44.]],
[[ 5., 6., 7., 8., 9.],
[ 25., 26., 27., 28., 29.],
[ 45., 46., 47., 48., 49.]],
[[ 10., 11., 12., 13., 14.],
[ 30., 31., 32., 33., 34.],
[ 50., 51., 52., 53., 54.]],
[[ 15., 16., 17., 18., 19.],
[ 35., 36., 37., 38., 39.],
[ 55., 56., 57., 58., 59.]]])
%% Cell type:markdown id: tags:
Permute is a more general version of tranpose:
%% Cell type:code id: tags:
``` python
batch_size = 3
seq_size = 4
feature_size = 5
x = torch.arange(batch_size * seq_size * feature_size).view(batch_size, seq_size, feature_size)
print("x.shape: \n", x.shape)
print("x: \n", x)
print("-----")
print("x.permute(1, 0, 2).shape: \n", x.permute(1, 0, 2).shape)
print("x.permute(1, 0, 2): \n", x.permute(1, 0, 2))
```
%% Output
x.shape:
torch.Size([3, 4, 5])
x:
tensor([[[ 0., 1., 2., 3., 4.],
[ 5., 6., 7., 8., 9.],
[ 10., 11., 12., 13., 14.],
[ 15., 16., 17., 18., 19.]],
[[ 20., 21., 22., 23., 24.],
[ 25., 26., 27., 28., 29.],
[ 30., 31., 32., 33., 34.],
[ 35., 36., 37., 38., 39.]],
[[ 40., 41., 42., 43., 44.],
[ 45., 46., 47., 48., 49.],
[ 50., 51., 52., 53., 54.],
[ 55., 56., 57., 58., 59.]]])
-----
x.permute(1, 0, 2).shape:
torch.Size([4, 3, 5])
x.permute(1, 0, 2):
tensor([[[ 0., 1., 2., 3., 4.],
[ 20., 21., 22., 23., 24.],
[ 40., 41., 42., 43., 44.]],
[[ 5., 6., 7., 8., 9.],
[ 25., 26., 27., 28., 29.],
[ 45., 46., 47., 48., 49.]],
[[ 10., 11., 12., 13., 14.],
[ 30., 31., 32., 33., 34.],
[ 50., 51., 52., 53., 54.]],
[[ 15., 16., 17., 18., 19.],
[ 35., 36., 37., 38., 39.],
[ 55., 56., 57., 58., 59.]]])
%% Cell type:markdown id: tags:
Matrix multiplication is `mm`:
%% Cell type:code id: tags:
``` python
x1 = torch.arange(6).view(2, 3).float()
describe(x1)
x2 = torch.ones(3, 2)
x2[:, 1] += 1
describe(x2)
describe(torch.mm(x1, x2))
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 3])
Values:
tensor([[ 0., 1., 2.],
[ 3., 4., 5.]])
Type: torch.FloatTensor
Shape/size: torch.Size([3, 2])
Values:
tensor([[ 1., 2.],
[ 1., 2.],
[ 1., 2.]])
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values:
tensor([[ 3., 6.],
[ 12., 24.]])
%% Cell type:code id: tags:
``` python
x = torch.arange(0, 12).view(3,4).float()
print(x)
x2 = torch.ones(4, 2)
x2[:, 1] += 1
print(x2)
print(x.mm(x2))
```
%% Output
tensor([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
tensor([[ 1., 2.],
[ 1., 2.],
[ 1., 2.],
[ 1., 2.]])
tensor([[ 6., 12.],
[ 22., 44.],
[ 38., 76.]])
%% Cell type:markdown id: tags:
See the [PyTorch Math Operations Documentation](https://pytorch.org/docs/stable/torch.html#math-operations) for more!
%% Cell type:markdown id: tags:
## Computing Gradients
%% Cell type:code id: tags:
``` python
x = torch.tensor([[2,3]], requires_grad=True, dtype=torch.float32)
z = 3 * x
print(z)
```
%% Output
tensor([[ 6, 9]])
%% Cell type:markdown id: tags:
In this small snippet, you can see the gradient computations at work. We create a tensor and multiply it by 3. Then, we create a scalar output using `sum()`. A Scalar output is needed as the the loss variable. Then, called backward on the loss means it computes its rate of change with respect to the inputs. Since the scalar was created with sum, each position in z and x are independent with respect to the loss scalar.
The rate of change of x with respect to the output is just the constant 3 that we multiplied x by.
%% Cell type:code id: tags:
``` python
x = torch.tensor([[2,3]], requires_grad=True, dtype=torch.float32)
print("x: \n", x)
print("---")
z = 3 * x
print("z = 3*x: \n", z)
print("---")
loss = z.sum()
print("loss = z.sum(): \n", loss)
print("---")
loss.backward()
print("after loss.backward(), x.grad: \n", x.grad)
```
%% Output
x:
tensor([[ 2, 3]])
---
z = 3*x:
tensor([[ 6, 9]])
---
loss = z.sum():
tensor(15)
---
after loss.backward(), x.grad:
tensor([[ 3, 3]])
%% Cell type:markdown id: tags:
### Example: Computing a conditional gradient
$$ \text{ Find the gradient of f(x) at x=1 } $$
$$ {} $$
$$ f(x)=\left\{
\begin{array}{ll}
sin(x) \text{ if } x>0 \\
cos(x) \text{ otherwise } \\
\end{array}
\right.$$
%% Cell type:code id: tags:
``` python
def f(x):
# FILL THIS PART IN
y = x
return y
```
%% Cell type:code id: tags:
``` python
x = torch.tensor([1.0], requires_grad=True)
y = f(x)
y.backward()
print(x.grad)
```
%% Output
tensor([ 0.5403])
%% Cell type:markdown id: tags:
We could apply this to a larger vector too, but we need to make sure the output is a scalar:
%% Cell type:code id: tags:
``` python
x = torch.tensor([1.0, 0.5], requires_grad=True)
y = f(x)
try:
y.backward()
print(x.grad)
except RuntimeError as re:
print("THIS ERROR WAS EXPECTED: ", re)
```
%% Output
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-33-48e74398a3f8> in <module>()
1 x = torch.tensor([1.0, 0.5], requires_grad=True)
2 y = f(x)
----> 3 y.backward()
4 print(x.grad)
~/anaconda3/envs/pytorch04/lib/python3.6/site-packages/torch/tensor.py in backward(self, gradient, retain_graph, create_graph)
91 products. Defaults to ``False``.
92 """
---> 93 torch.autograd.backward(self, gradient, retain_graph, create_graph)
94
95 def register_hook(self, hook):
~/anaconda3/envs/pytorch04/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables)
81 grad_tensors = list(grad_tensors)
82
---> 83 grad_tensors = _make_grads(tensors, grad_tensors)
84 if retain_graph is None:
85 retain_graph = create_graph
~/anaconda3/envs/pytorch04/lib/python3.6/site-packages/torch/autograd/__init__.py in _make_grads(outputs, grads)
25 if out.requires_grad:
26 if out.numel() != 1:
---> 27 raise RuntimeError("grad can be implicitly created only for scalar outputs")
28 new_grads.append(torch.ones_like(out))
29 else:
RuntimeError: grad can be implicitly created only for scalar outputs
%% Cell type:markdown id: tags:
Making the output a scalar:
%% Cell type:code id: tags:
``` python
x = torch.tensor([1.0, 0.5], requires_grad=True)
y = f(x)
y.sum().backward()
print(x.grad)
```
%% Output
tensor([ 0.5403, 0.8776])
%% Cell type:markdown id: tags:
but there was an issue.. this isn't right for this edge case:
%% Cell type:code id: tags:
``` python
x = torch.tensor([1.0, -1], requires_grad=True)
y = f(x)
y.sum().backward()
print(x.grad)
```
%% Output
tensor([-0.8415, 0.8415])
%% Cell type:code id: tags:
``` python
x = torch.tensor([-0.5, -1], requires_grad=True)
y = f(x)
y.sum().backward()
print(x.grad)
```
%% Output
tensor([ 0.4794, 0.8415])
%% Cell type:markdown id: tags:
This is because we aren't doing the boolean computation and subsequent application of cos and sin on an elementwise basis. So, to solve this, it is common to use masking:
%% Cell type:code id: tags:
``` python
def f2(x):
# WRITE SOLUTION HERE
y = x
return y
x = torch.tensor([1.0, -1], requires_grad=True)
y = f2(x)
y.sum().backward()
print(x.grad)
```
%% Output
tensor([ 0.5403, 0.8415])
%% Cell type:code id: tags:
``` python
def describe_grad(x):
if x.grad is None:
print("No gradient information")
else:
print("Gradient: \n{}".format(x.grad))
print("Gradient Function: {}".format(x.grad_fn))
```
%% Cell type:code id: tags:
``` python
import torch
x = torch.ones(2, 2, requires_grad=True)
describe(x)
describe_grad(x)
y = (x + 2) * (x + 5) + 3
describe(y)
z = y.mean()
describe(z)
describe_grad(x)
z.backward(create_graph=True, retain_graph=True)
describe_grad(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values:
tensor([[ 1., 1.],
[ 1., 1.]])
No gradient information
Type: torch.FloatTensor
Shape/size: torch.Size([2, 2])
Values:
tensor([[ 21., 21.],
[ 21., 21.]])
Type: torch.FloatTensor
Shape/size: torch.Size([])
Values:
21.0
No gradient information
Gradient:
tensor([[ 2.2500, 2.2500],
[ 2.2500, 2.2500]])
Gradient Function: None
%% Cell type:markdown id: tags:
### Exploring gradients
Below is essentially a scratch pad :)
%% Cell type:code id: tags:
``` python
x = torch.ones(2, 2, requires_grad=True)
```
%% Cell type:code id: tags:
``` python
y = x + 2
```
%% Cell type:code id: tags:
``` python
y.grad_fn
```
%% Output
<AddBackward0 at 0x7f662cce4e10>
%% Cell type:code id: tags:
``` python
z = y * y * 3
out = z.mean()
```
%% Cell type:code id: tags:
``` python
out.backward()
```
%% Cell type:code id: tags:
``` python
torch.is_tensor(out.grad_fn)
```
%% Output
False
%% Cell type:code id: tags:
``` python
hasattr(out.grad_fn, 'variable')
```
%% Output
False
%% Cell type:code id: tags:
``` python
out.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0].next_functions[0][0].next_functions
```
%% Output
()
%% Cell type:markdown id: tags:
### CUDA Tensors
%% Cell type:markdown id: tags:
PyTorch's operations can seamlessly be used on the GPU or on the CPU. There are a couple basic operations for interacting in this way.
%% Cell type:code id: tags:
``` python
print(torch.cuda.is_available())
```
%% Output
True
%% Cell type:code id: tags:
``` python
x = torch.rand(3,3)
describe(x)
```
%% Output
Type: torch.FloatTensor
Shape/size: torch.Size([3, 3])
Values:
tensor([[ 0.7453, 0.8045, 0.2022],
[ 0.7781, 0.0699, 0.7157],
[ 0.5425, 0.7718, 0.0086]])
%% Cell type:code id: tags:
``` python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
```
%% Output
cuda
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
x = torch.rand(3, 3).to(device)
describe(x)
print(x.device)
```
%% Output
Type: torch.cuda.FloatTensor
Shape/size: torch.Size([3, 3])
Values:
tensor([[ 0.6902, 0.7547, 0.5336],
[ 0.1381, 0.0748, 0.5548],
[ 0.0975, 0.0272, 0.5688]], device='cuda:0')
cuda:0
%% Cell type:code id: tags:
``` python
cpu_device = torch.device("cpu")
```
%% Cell type:code id: tags:
``` python
y = torch.rand(3, 3)
x + y
```
%% Output
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-11-a34211793e6e> in <module>()
1 y = torch.rand(3, 3)
----> 2 x + y
RuntimeError: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #3 'other'
%% Cell type:code id: tags:
``` python
y = y.to(cpu_device)
x = x.to(cpu_device)
x + y
```
%% Output
tensor([[ 0.7159, 1.0685, 1.3509],
[ 0.3912, 0.2838, 1.3202],
[ 0.2967, 0.0420, 0.6559]])
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
if torch.cuda.is_available(): # only is GPU is available
a = torch.rand(3,3).to(device='cuda:0') # CUDA Tensor
print(a)
b = torch.rand(3,3).cuda()
print(b)
print(a + b)
a = a.cpu() # Error expected
print(a + b)
```
%% Output
tensor([[ 0.2388, 0.7313, 0.6012],
[ 0.3043, 0.2548, 0.6294],
[ 0.9665, 0.7399, 0.4517]], device='cuda:0')
tensor([[ 0.4757, 0.7842, 0.1525],
[ 0.6662, 0.3343, 0.7893],
[ 0.3216, 0.5247, 0.6688]], device='cuda:0')
tensor([[ 0.7145, 1.5155, 0.7537],
[ 0.9706, 0.5891, 1.4187],
[ 1.2882, 1.2647, 1.1206]], device='cuda:0')
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-6-0cfe18366dba> in <module>()
9
10 a = a.cpu() # Error expected
---> 11 print(a + b)
RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #3 'other'
%% Cell type:markdown id: tags:
### Exercises
Some of these exercises require operations not covered in the notebook. You will have to look at [the documentation](https://pytorch.org/docs/) (on purpose!)
(Answers are at the bottom)
%% Cell type:markdown id: tags:
#### Exercise 1
Create a 2D tensor and then add a dimension of size 1 inserted at the 0th axis.
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 2
Remove the extra dimension you just added to the previous tensor.
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 3
Create a random tensor of shape 5x3 in the interval [3, 7)
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 4
Create a tensor with values from a normal distribution (mean=0, std=1).
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 5
Retrieve the indexes of all the non zero elements in the tensor torch.Tensor([1, 1, 1, 0, 1]).
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 6
Create a random tensor of size (3,1) and then horizonally stack 4 copies together.
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 7
Return the batch matrix-matrix product of two 3 dimensional matrices (a=torch.rand(3,4,5), b=torch.rand(3,5,4)).
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 8
Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a=torch.rand(3,4,5), b=torch.rand(5,4)).
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Answers below
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
Answers still below.. Keep Going
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
#### Exercise 1
Create a 2D tensor and then add a dimension of size 1 inserted at the 0th axis.
%% Cell type:code id: tags:
``` python
a = torch.rand(3,3)
a = a.unsqueeze(0)
print(a)
print(a.shape)
```
%% Output
tensor([[[ 0.7077, 0.4189, 0.0655],
[ 0.8839, 0.8083, 0.7528],
[ 0.8988, 0.6839, 0.7658]]])
torch.Size([1, 3, 3])
%% Cell type:markdown id: tags:
#### Exercise 2
Remove the extra dimension you just added to the previous tensor.
%% Cell type:code id: tags:
``` python
a = a.squeeze(0)
print(a.shape)
```
%% Output
torch.Size([3, 3])
%% Cell type:markdown id: tags:
#### Exercise 3
Create a random tensor of shape 5x3 in the interval [3, 7)
%% Cell type:code id: tags:
``` python
3 + torch.rand(5, 3) * 4
```
%% Output
tensor([[ 6.6597, 4.5970, 3.4402],
[ 4.0164, 4.7330, 4.7802],
[ 4.9864, 6.1461, 5.6416],
[ 3.5212, 4.3992, 4.5295],
[ 6.2172, 4.2744, 4.1632]])
%% Cell type:markdown id: tags:
#### Exercise 4
Create a tensor with values from a normal distribution (mean=0, std=1).
%% Cell type:code id: tags:
``` python
a = torch.rand(3,3)
a.normal_(mean=0, std=1)
```
%% Output
tensor([[-0.2107, 1.1399, -2.5122],
[ 1.3823, 0.9847, 1.4719],
[ 0.3100, 1.5829, 0.2351]])
%% Cell type:markdown id: tags:
#### Exercise 5
Retrieve the indexes of all the non zero elements in the tensor torch.Tensor([1, 1, 1, 0, 1]).
%% Cell type:code id: tags:
``` python
a = torch.Tensor([1, 1, 1, 0, 1])
torch.nonzero(a)
```
%% Output
tensor([[ 0],
[ 1],
[ 2],
[ 4]])
%% Cell type:markdown id: tags:
#### Exercise 6
Create a random tensor of size (3,1) and then horizonally stack 4 copies together.
%% Cell type:code id: tags:
``` python
a = torch.rand(3,1)
a.expand(3,4)
```
%% Output
tensor([[ 0.7595, 0.7595, 0.7595, 0.7595],
[ 0.5311, 0.5311, 0.5311, 0.5311],
[ 0.6449, 0.6449, 0.6449, 0.6449]])
%% Cell type:markdown id: tags:
#### Exercise 7
Return the batch matrix-matrix product of two 3 dimensional matrices (a=torch.rand(3,4,5), b=torch.rand(3,5,4)).
%% Cell type:code id: tags:
``` python
a = torch.rand(3,4,5)
b = torch.rand(3,5,4)
torch.bmm(a, b)
```
%% Output
tensor([[[ 1.7768, 1.5815, 1.7667, 0.9918],
[ 0.7049, 0.7050, 0.6055, 0.3455],
[ 1.5937, 1.3627, 1.6757, 1.0042],
[ 1.2478, 0.9978, 0.8067, 1.1299]],
[[ 1.4816, 1.4685, 1.7443, 1.6224],
[ 0.8311, 1.1861, 1.2165, 0.9788],
[ 1.3339, 1.3306, 1.7348, 1.3621],
[ 1.3177, 1.5979, 1.5706, 1.3298]],
[[ 1.8358, 0.8323, 1.2206, 1.3237],
[ 2.0028, 1.5150, 1.5610, 1.2854],
[ 2.2775, 1.6230, 1.9977, 1.8435],
[ 2.5799, 1.6463, 1.8448, 1.7839]]])
%% Cell type:markdown id: tags:
#### Exercise 8
Return the batch matrix-matrix product of a 3D matrix and a 2D matrix (a=torch.rand(3,4,5), b=torch.rand(5,4)).
%% Cell type:code id: tags:
``` python
a = torch.rand(3,4,5)
b = torch.rand(5,4)
torch.bmm(a, b.unsqueeze(0).expand(a.size(0), *b.size()))
```
%% Output
tensor([[[ 1.7612, 1.5625, 1.6402, 1.3563],
[ 1.8779, 1.5361, 1.5207, 1.1499],
[ 1.0956, 0.6416, 0.6545, 0.7713],
[ 1.7553, 1.0487, 1.0097, 0.9493]],
[[ 1.0501, 0.7754, 0.7437, 0.6454],
[ 1.4964, 1.3167, 1.3392, 1.0311],
[ 1.9306, 1.4569, 1.5652, 1.4622],
[ 2.1869, 1.7187, 1.5579, 1.1575]],
[[ 1.1794, 0.8222, 0.7826, 0.7259],
[ 1.3814, 0.9908, 0.9205, 0.7621],
[ 2.9413, 2.0567, 1.9665, 1.7345],
[ 2.1376, 1.4964, 1.4937, 1.4069]]])
%% Cell type:markdown id: tags:
### END
......
%% Cell type:markdown id: tags:
# Classify names with character n-grams
%% Cell type:code id: tags:
``` python
from argparse import Namespace
import os
os.environ['OMP_NUM_THREADS'] = '4'
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm_notebook
%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (14, 6)
```
%% Cell type:markdown id: tags:
## Overview of Data/Task
- Data compiled by [Sean Robertson](https://github.com/spro)
- Predict nationality from names.
- Data consist of 20074 names, 18 categories.
- Russian names are dominant (skewing labels)
- We downsample russian names to minimize the skew. Checkout the RNN tutorial for a different approach to handle label bias.
```
2750 names_test.csv
10994 names_train.csv
```
%% Cell type:markdown id: tags:
### Args for this example
%% Cell type:code id: tags:
``` python
args = Namespace(
surname_csv="../data/surnames.csv",
model_filename="names_mlp_model.pth",
cuda=False,
num_epochs=100
)
# Check CUDA
if not torch.cuda.is_available():
args.cuda = False
print("Using CUDA: {}".format(args.cuda))
args.device = torch.device("cuda" if args.cuda else "cpu")
args.device
```
%% Output
Using CUDA: False
device(type='cpu')
%% Cell type:markdown id: tags:
## Load names
%% Cell type:code id: tags:
``` python
name_data = pd.read_csv(args.surname_csv)
```
%% Cell type:markdown id: tags:
## Class Breakdown
%% Cell type:code id: tags:
``` python
sns.catplot(data=name_data, y='nationality',
kind='count', height=5, aspect=3);
plt.title("Counts per Nationality in the Surnames Dataset");
```
%% Output