% MakeTemplates: Find templates from % seed sequences for probability of position occurance % of 0.0, 0.1, 0.2, ... ,0.9, and 1.0. % % Scott F. Smith % Department of Electrical and Computer Engineering % Boise State University % SFSmith@BoiseState.edu % % 19 January 2005 % Choose set of twenty measure values for amino acids % Choices are: MeasuresHydro1 (Kyte and Doolittle) % MeasuresHydro2 (Engelman, Steitz, and Goldman) % MeasuresVolume (Zamyatin) % MeasuresCharge MeasuresCharge % Choose input and output files fileIn = fopen('Data/7tm.txt'); fidOut = fopen('TemplatesCharge/Templates7tm.m','w'); % Read in sequences Sequences = []; while 1 FileLine = fgetl(fileIn); if ~ischar(FileLine) break end Sequences = [Sequences; FileLine(24:length(FileLine))]; end fclose(fileIn); Sequences = upper(Sequences); [NumbSeq, NumbPos] = size(Sequences); % Find fraction of string with charcter at each position gap_count = zeros(1,NumbPos); for j = 1:NumbPos for i = 1:NumbSeq if (Sequences(i,j) == '.' | Sequences(i,j) == ' ') gap_count(j) = gap_count(j) + 1; end end end gap_count = gap_count / NumbSeq; % Find mean hydrophobicity of each column hydro = zeros(1,NumbPos); for j = 1:NumbPos for i = 1:NumbSeq if Sequences(i,j) == 'A' hydro(j) = hydro(j) + measures(1); end if Sequences(i,j) == 'C' hydro(j) = hydro(j) + measures(2); end if Sequences(i,j) == 'D' hydro(j) = hydro(j) + measures(3); end if Sequences(i,j) == 'E' hydro(j) = hydro(j) + measures(4); end if Sequences(i,j) == 'F' hydro(j) = hydro(j) + measures(5); end if Sequences(i,j) == 'G' hydro(j) = hydro(j) + measures(6); end if Sequences(i,j) == 'H' hydro(j) = hydro(j) + measures(7); end if Sequences(i,j) == 'I' hydro(j) = hydro(j) + measures(8); end if Sequences(i,j) == 'K' hydro(j) = hydro(j) + measures(9); end if Sequences(i,j) == 'L' hydro(j) = hydro(j) + measures(10); end if Sequences(i,j) == 'M' hydro(j) = hydro(j) + measures(11); end if Sequences(i,j) == 'N' hydro(j) = hydro(j) + measures(12); end if Sequences(i,j) == 'P' hydro(j) = hydro(j) + measures(13); end if Sequences(i,j) == 'Q' hydro(j) = hydro(j) + measures(14); end if Sequences(i,j) == 'R' hydro(j) = hydro(j) + measures(15); end if Sequences(i,j) == 'S' hydro(j) = hydro(j) + measures(16); end if Sequences(i,j) == 'T' hydro(j) = hydro(j) + measures(17); end if Sequences(i,j) == 'V' hydro(j) = hydro(j) + measures(18); end if Sequences(i,j) == 'W' hydro(j) = hydro(j) + measures(19); end if Sequences(i,j) == 'Y' hydro(j) = hydro(j) + measures(20); end end end hydro = hydro / NumbSeq; % Print out fraction present and mean hydrophobicities %gap_count %hydro % Output mean hydrophobicities to file for each probability % threshold fprintf(fidOut,'t_all = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.9); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_90 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.8); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_80 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.7); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_70 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.6); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_60 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.5); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_50 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.4); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_40 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.3); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_30 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.2); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_20 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.1); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_10 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count == 0.0); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_0 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); fclose(fidOut);