#blstm
   #lstm
   #tensorflow
   #MNIST
   #batch size
   #unstack
   #tf.unstack
   #RNN
   #Perceptron
   #MLP
   #Activation Function
   #Logistic Regression
      #attention
   #attention is all you need
   #paper
   #review
   #Optimizer
   #Gradient Descent
   #Regularization
   #Dropout
   #dataset
   #data
   #dataset splitting
   #train, validation, test set
   #Backpropagation
   #Vanishing Gradient
   #LSTM