#blstm
#lstm
#tensorflow
#MNIST
#batch size
#unstack
#tf.unstack
#RNN
#Perceptron
#MLP
#Activation Function
#Logistic Regression
#attention
#attention is all you need
#paper
#review
#Optimizer
#Gradient Descent
#Regularization
#Dropout
#dataset
#data
#dataset splitting
#train, validation, test set
#Backpropagation
#Vanishing Gradient
#LSTM