/PaddleRec/youtube_dnn/train.py
https://github.com/PaddlePaddle/models · Python · 78 lines · 65 code · 10 blank · 3 comment · 7 complexity · f44a726c46206477d2ccef83febaa79f MD5 · raw file
- import numpy as np
- import pandas as pd
- import os
- import random
- import paddle.fluid as fluid
- from youtubednn import YoutubeDNN
- import paddle
- import args
- import logging
- import time
- logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
- logger = logging.getLogger("fluid")
- logger.setLevel(logging.INFO)
- def train(args):
- youtube_model = YoutubeDNN()
- inputs = youtube_model.input_data(args.watch_vec_size, args.search_vec_size, args.other_feat_size)
- loss, acc, l3 = youtube_model.net(inputs, args.output_size, layers=[128, 64, 32])
- sgd = fluid.optimizer.SGD(learning_rate=args.base_lr)
- sgd.minimize(loss)
- place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
- exe = fluid.Executor(place)
- exe.run(fluid.default_startup_program())
- # Build a random data set.
- sample_size = 100
- watch_vecs = []
- search_vecs = []
- other_feats = []
- labels = []
- for i in range(sample_size):
- watch_vec = np.random.rand(args.batch_size, args.watch_vec_size)
- search_vec = np.random.rand(args.batch_size, args.search_vec_size)
- other_feat = np.random.rand(args.batch_size, args.other_feat_size)
- watch_vecs.append(watch_vec)
- search_vecs.append(search_vec)
- other_feats.append(other_feat)
- label = np.random.randint(args.output_size, size=(args.batch_size, 1))
- labels.append(label)
- for epoch in range(args.epochs):
- for i in range(sample_size):
- begin = time.time()
- loss_data, acc_val = exe.run(fluid.default_main_program(),
- feed={
- "watch_vec": watch_vecs[i].astype('float32'),
- "search_vec": search_vecs[i].astype('float32'),
- "other_feat": other_feats[i].astype('float32'),
- "label": np.array(labels[i]).reshape(args.batch_size, 1)
- },
- return_numpy=True,
- fetch_list=[loss.name, acc.name])
- end = time.time()
- logger.info("epoch_id: {}, batch_time: {:.5f}s, loss: {:.5f}, acc: {:.5f}".format(
- epoch, end-begin, float(np.array(loss_data)), np.array(acc_val)[0]))
- #save model
- model_dir = os.path.join(args.model_dir, 'epoch_' + str(epoch + 1), "checkpoint")
- feed_var_names = ["watch_vec", "search_vec", "other_feat"]
- fetch_vars = [l3]
- fluid.io.save_inference_model(model_dir, feed_var_names, fetch_vars, exe)
- #save all video vector
- video_array = np.array(fluid.global_scope().find_var('l4_weight').get_tensor())
- video_vec = pd.DataFrame(video_array)
- video_vec.to_csv(args.video_vec_path, mode="a", index=False, header=0)
- if __name__ == "__main__":
- args = args.parse_args()
- logger.info("use_gpu: {}, batch_size: {}, epochs: {}, watch_vec_size: {}, search_vec_size: {}, other_feat_size: {}, output_size: {}, model_dir: {}, test_epoch: {}, base_lr: {}, video_vec_path: {}".format(
- args.use_gpu, args.batch_size, args.epochs, args.watch_vec_size, args.search_vec_size, args.other_feat_size, args.output_size, args.model_dir, args.test_epoch, args.base_lr, args.video_vec_path))
-
- if(os.path.exists(args.video_vec_path)):
- os.system("rm " + args.video_vec_path)
- train(args)