/ReverseTrainSecondLayer.py
Python | 86 lines | 78 code | 5 blank | 3 comment | 3 complexity | a6386d9742fb3a78ced1d0f596811c18 MD5 | raw file
- from keras.layers import Input, Dense, Activation
- import numpy as np
- from keras.models import Model, load_model
- from keras import optimizers, activations
- import math
- from numpy import *
- import pandas as pd
- from keras.callbacks import ModelCheckpoint
- from keras.callbacks import CSVLogger
- from keras import optimizers
- import tensorflow as tf
- from keras.backend import tensorflow_backend as K
- import glob
- def sigmo(x):
- return (1 / (1 + np.exp(-x)))
- yykt = glob.glob("Path to the folder having all the files of phenotipic associated gene expression profile/*.txt") # the files are needed with .txt extension
- yykt = sorted(yykt)
- count=0
- b = np.identity(len(yykt), dtype = float)
- for fl in yykt:
- if count==0:
- UXU = np.loadtxt(fl)
- yead1e = UXU.shape
- Trg0t = np.repeat(b[:,count], repeats = yead1e[1], axis=0).reshape(len(yykt),yead1e[1])
- else:
- VVV = np.loadtxt(fl)
- yead1e = VVV.shape
- Trgft = np.repeat(b[:,count], repeats = yead1e[1], axis=0).reshape(len(yykt),yead1e[1])
- Trg0t = np.append(Trg0t,Trgft, axis=1)
- UXU = np.append(UXU,VVV, axis=1)
- count = count+1
- qfwqwcv = np.random.permutation(UXU.shape[1])
- Trg0t = Trg0t[:,qfwqwcv]
- UXU = UXU[:,qfwqwcv]
- n_genes = 20848 # We can replace this number according to the number of genes in the expression profile
- # The trained autoencoder is needed, e.g. an output of Autoencoder3layer.py
- autoencoder = load_model('MicroarrayDeep512_512_512_AE20K.h5')
- autoencoder.layers[2].activation = activations.linear
- yrts = np.repeat([1],n_genes).reshape(1,n_genes)
- input_SigF=Input(shape=(n_genes,))
- laye1r = autoencoder.get_layer("dense_1")
- laye1r.name = "D1"
- L1 = laye1r(input_SigF)
- laye2r = autoencoder.get_layer("dense_2")
- laye2r.name = "D2"
- L2 = laye2r(L1)
- autoencodeL2r = Model(input_SigF, L2)
- yytrk = autoencodeL2r.predict(np.transpose(UXU))
- Etr = np.transpose(yytrk)
- Dtr = np.transpose(Trg0t)
- ttxrmi = Etr.mean(axis=0)
- Etr = (Etr-ttxrmi[None,:])
- Etr = np.transpose(Etr)
- # In case of loading the autoencoder MicroarrayDeep1024_1024_1024_AE20K.h5 and MicroarrayDeep256_256_256_AE20K.h5, we replaced com_dim = 512 to com_dim = 1024 and com_dim = 256 respectively.
- com_dim = 512
- input_SigS = Input(shape=(len(yykt),))
- LL1 = Dense(com_dim, activation='sigmoid',name='X1')(input_SigS)
- LLf = Dense(com_dim, activation='linear',name='X2')(LL1)
- DeasMSDeepNN = Model(input_SigS, LLf)
- admO = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=1e-6)
- DeasMSDeepNN.compile(optimizer=admO, loss = 'mean_absolute_error')
- new_filename1 = "L2ConvS" + str(com_dim) + ".csv"
- csv_logger = CSVLogger(new_filename1, append=False, separator='\t')
- DeasMSDeepNN.fit(Dtr, Etr, epochs=5000, batch_size=32, shuffle=True, callbacks=[csv_logger])
- zoz = DeasMSDeepNN.get_weights()
- yy = autoencoder.get_weights()
- yy[4] = (np.var(zoz[2])/np.var(yy[4]))*yy[4]
- yy[5] = (np.var(zoz[3])/np.var(yy[5]))*yy[5]
- yfecfa = DeasMSDeepNN.predict(b)
- reswf = np.matmul(yfecfa, yy[4])
- yfuj = reswf.shape
- zzosk = np.transpose(np.tile(yy[5],yfuj[0]).reshape(yfuj[1],yfuj[0]))
- L3inS = (reswf+zzosk)
- L3otS = sigmo(L3inS)
- reswMidf = np.transpose(np.matmul(L3otS, yy[6]))
- # The reference file for the genes in which the autoencoder is trained, RefEntrezMicroarray.txt and RefEntrezRNAseq.txt correspond to micro-array and RNA-seq data respectively.
- ytre = np.genfromtxt('RefEntrezMicroarray.txt',dtype='str')
- yytrv = np.argsort(-np.absolute(reswMidf), axis=0)
- entNe = ytre[yytrv]
- np.savetxt("OrderDisease.txt", yykt,delimiter='\t',fmt='%s')
- np.savetxt("DeepAE_DiseaseGene.txt", entNe,delimiter='\t',fmt='%s')