ReverseTrainSecondLayer.py - The trained autoencoder is nee…

/ReverseTrainSecondLayer.py

https://gitlab.com/hyeonjik420/deep_learning_transcriptomics · Python · 86 lines · 78 code · 5 blank · 3 comment · 3 complexity · a6386d9742fb3a78ced1d0f596811c18 MD5 · raw file


from keras.layers import Input, Dense, Activation
import numpy as np
from keras.models import Model, load_model
from keras import optimizers, activations
import math
from numpy import *
import pandas as pd
from keras.callbacks import ModelCheckpoint
from keras.callbacks import CSVLogger
from keras import optimizers
import tensorflow as tf
from keras.backend import tensorflow_backend as K
import glob
def sigmo(x):
	return (1 / (1 + np.exp(-x)))

yykt = glob.glob("Path to the folder having all the files of phenotipic associated gene expression profile/*.txt") # the files are needed with .txt extension
yykt = sorted(yykt)
count=0
b = np.identity(len(yykt), dtype = float) 
for fl in yykt:
	if count==0:
		UXU = np.loadtxt(fl)
		yead1e = UXU.shape
		Trg0t = np.repeat(b[:,count], repeats = yead1e[1], axis=0).reshape(len(yykt),yead1e[1])
	else:
		VVV = np.loadtxt(fl)
		yead1e = VVV.shape
		Trgft = np.repeat(b[:,count], repeats = yead1e[1], axis=0).reshape(len(yykt),yead1e[1])
		Trg0t = np.append(Trg0t,Trgft, axis=1)
		UXU = np.append(UXU,VVV, axis=1)
	count = count+1

qfwqwcv = np.random.permutation(UXU.shape[1])
Trg0t = Trg0t[:,qfwqwcv]
UXU = UXU[:,qfwqwcv]


n_genes = 20848 # We can replace this number according to the number of genes in the expression profile
# The trained autoencoder is needed, e.g. an output of Autoencoder3layer.py
autoencoder = load_model('MicroarrayDeep512_512_512_AE20K.h5')
autoencoder.layers[2].activation = activations.linear
yrts = np.repeat([1],n_genes).reshape(1,n_genes)

input_SigF=Input(shape=(n_genes,))
laye1r = autoencoder.get_layer("dense_1")
laye1r.name = "D1"
L1 = laye1r(input_SigF)
laye2r = autoencoder.get_layer("dense_2")
laye2r.name = "D2"
L2 = laye2r(L1)
autoencodeL2r = Model(input_SigF, L2)
yytrk = autoencodeL2r.predict(np.transpose(UXU))
Etr = np.transpose(yytrk)
Dtr = np.transpose(Trg0t)
ttxrmi = Etr.mean(axis=0)
Etr = (Etr-ttxrmi[None,:])
Etr = np.transpose(Etr)
# In case of loading the autoencoder MicroarrayDeep1024_1024_1024_AE20K.h5 and MicroarrayDeep256_256_256_AE20K.h5, we replaced com_dim = 512 to com_dim = 1024 and com_dim = 256 respectively.
com_dim = 512
input_SigS = Input(shape=(len(yykt),))
LL1 = Dense(com_dim, activation='sigmoid',name='X1')(input_SigS)
LLf = Dense(com_dim, activation='linear',name='X2')(LL1)
DeasMSDeepNN = Model(input_SigS, LLf)
admO = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=1e-6)
DeasMSDeepNN.compile(optimizer=admO, loss = 'mean_absolute_error') 
new_filename1 = "L2ConvS" + str(com_dim) + ".csv"
csv_logger = CSVLogger(new_filename1, append=False, separator='\t')
DeasMSDeepNN.fit(Dtr, Etr,  epochs=5000, batch_size=32, shuffle=True, callbacks=[csv_logger])
zoz = DeasMSDeepNN.get_weights()
yy = autoencoder.get_weights()
yy[4] = (np.var(zoz[2])/np.var(yy[4]))*yy[4]
yy[5] = (np.var(zoz[3])/np.var(yy[5]))*yy[5]
yfecfa = DeasMSDeepNN.predict(b)
reswf = np.matmul(yfecfa, yy[4])
yfuj = reswf.shape
zzosk = np.transpose(np.tile(yy[5],yfuj[0]).reshape(yfuj[1],yfuj[0]))
L3inS = (reswf+zzosk)
L3otS = sigmo(L3inS)
reswMidf = np.transpose(np.matmul(L3otS, yy[6]))
# The reference file for the genes in which the autoencoder is trained, RefEntrezMicroarray.txt and RefEntrezRNAseq.txt correspond to micro-array and RNA-seq data respectively.  
ytre = np.genfromtxt('RefEntrezMicroarray.txt',dtype='str')
yytrv = np.argsort(-np.absolute(reswMidf), axis=0)
entNe = ytre[yytrv]
np.savetxt("OrderDisease.txt", yykt,delimiter='\t',fmt='%s')
np.savetxt("DeepAE_DiseaseGene.txt", entNe,delimiter='\t',fmt='%s')

Tech Fingerprint

Alerts (2)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
6
'def' Ensure functions have docstrings for documentation
14