/PaddleRec/youtube_dnn/train.py

https://github.com/PaddlePaddle/models · Python · 78 lines · 65 code · 10 blank · 3 comment · 7 complexity · f44a726c46206477d2ccef83febaa79f MD5 · raw file

  1. import numpy as np
  2. import pandas as pd
  3. import os
  4. import random
  5. import paddle.fluid as fluid
  6. from youtubednn import YoutubeDNN
  7. import paddle
  8. import args
  9. import logging
  10. import time
  11. logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
  12. logger = logging.getLogger("fluid")
  13. logger.setLevel(logging.INFO)
  14. def train(args):
  15. youtube_model = YoutubeDNN()
  16. inputs = youtube_model.input_data(args.watch_vec_size, args.search_vec_size, args.other_feat_size)
  17. loss, acc, l3 = youtube_model.net(inputs, args.output_size, layers=[128, 64, 32])
  18. sgd = fluid.optimizer.SGD(learning_rate=args.base_lr)
  19. sgd.minimize(loss)
  20. place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
  21. exe = fluid.Executor(place)
  22. exe.run(fluid.default_startup_program())
  23. # Build a random data set.
  24. sample_size = 100
  25. watch_vecs = []
  26. search_vecs = []
  27. other_feats = []
  28. labels = []
  29. for i in range(sample_size):
  30. watch_vec = np.random.rand(args.batch_size, args.watch_vec_size)
  31. search_vec = np.random.rand(args.batch_size, args.search_vec_size)
  32. other_feat = np.random.rand(args.batch_size, args.other_feat_size)
  33. watch_vecs.append(watch_vec)
  34. search_vecs.append(search_vec)
  35. other_feats.append(other_feat)
  36. label = np.random.randint(args.output_size, size=(args.batch_size, 1))
  37. labels.append(label)
  38. for epoch in range(args.epochs):
  39. for i in range(sample_size):
  40. begin = time.time()
  41. loss_data, acc_val = exe.run(fluid.default_main_program(),
  42. feed={
  43. "watch_vec": watch_vecs[i].astype('float32'),
  44. "search_vec": search_vecs[i].astype('float32'),
  45. "other_feat": other_feats[i].astype('float32'),
  46. "label": np.array(labels[i]).reshape(args.batch_size, 1)
  47. },
  48. return_numpy=True,
  49. fetch_list=[loss.name, acc.name])
  50. end = time.time()
  51. logger.info("epoch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, acc: {:.5f}".format(
  52. epoch, end-begin, float(np.array(loss_data)), np.array(acc_val)[0]))
  53. #save model
  54. model_dir = os.path.join(args.model_dir, 'epoch_' + str(epoch + 1), "checkpoint")
  55. feed_var_names = ["watch_vec", "search_vec", "other_feat"]
  56. fetch_vars = [l3]
  57. fluid.io.save_inference_model(model_dir, feed_var_names, fetch_vars, exe)
  58. #save all video vector
  59. video_array = np.array(fluid.global_scope().find_var('l4_weight').get_tensor())
  60. video_vec = pd.DataFrame(video_array)
  61. video_vec.to_csv(args.video_vec_path, mode="a", index=False, header=0)
  62. if __name__ == "__main__":
  63. args = args.parse_args()
  64. logger.info("use_gpu: {}, batch_size: {}, epochs: {}, watch_vec_size: {}, search_vec_size: {}, other_feat_size: {}, output_size: {}, model_dir: {}, test_epoch: {}, base_lr: {}, video_vec_path: {}".format(
  65. args.use_gpu, args.batch_size, args.epochs, args.watch_vec_size, args.search_vec_size, args.other_feat_size, args.output_size, args.model_dir, args.test_epoch, args.base_lr, args.video_vec_path))
  66. if(os.path.exists(args.video_vec_path)):
  67. os.system("rm " + args.video_vec_path)
  68. train(args)