% MakeTemplate: Find hydrophobicity templates from % seed sequences for probability of position occurance % of 0.0, 0.1, 0.2, ... ,0.9, and 1.0. % % Scott F. Smith % Department of Electrical and Computer Engineering % Boise State University % SFSmith@BoiseState.edu % % 19 January 2005 % Read in sequences Sequences = []; fileIn = fopen('Data/HCV/plain.txt'); while 1 FileLine = fgetl(fileIn); if ~ischar(FileLine) break end Sequences = [Sequences; FileLine(24:length(FileLine))]; end fclose(fileIn); Sequences = upper(Sequences); [NumbSeq, NumbPos] = size(Sequences); % Find fraction of string with charcter at each position gap_count = zeros(1,NumbPos); for j = 1:NumbPos for i = 1:NumbSeq if (Sequences(i,j) == '.' | Sequences(i,j) == ' ') gap_count(j) = gap_count(j) + 1; end end end gap_count = gap_count / NumbSeq; % Find mean hydrophobicity of each column hydro = zeros(1,NumbPos); for j = 1:NumbPos for i = 1:NumbSeq if Sequences(i,j) == 'A' hydro(j) = hydro(j) + 1.8; end if Sequences(i,j) == 'C' hydro(j) = hydro(j) + 2.5; end if Sequences(i,j) == 'D' hydro(j) = hydro(j) - 3.5; end if Sequences(i,j) == 'E' hydro(j) = hydro(j) - 3.5; end if Sequences(i,j) == 'F' hydro(j) = hydro(j) + 2.8; end if Sequences(i,j) == 'G' hydro(j) = hydro(j) - 0.4; end if Sequences(i,j) == 'H' hydro(j) = hydro(j) - 3.2; end if Sequences(i,j) == 'I' hydro(j) = hydro(j) + 4.5; end if Sequences(i,j) == 'K' hydro(j) = hydro(j) - 3.9; end if Sequences(i,j) == 'L' hydro(j) = hydro(j) + 3.8; end if Sequences(i,j) == 'M' hydro(j) = hydro(j) + 1.9; end if Sequences(i,j) == 'N' hydro(j) = hydro(j) - 3.5; end if Sequences(i,j) == 'P' hydro(j) = hydro(j) - 1.6; end if Sequences(i,j) == 'Q' hydro(j) = hydro(j) - 3.5; end if Sequences(i,j) == 'R' hydro(j) = hydro(j) - 4.5; end if Sequences(i,j) == 'S' hydro(j) = hydro(j) - 0.8; end if Sequences(i,j) == 'T' hydro(j) = hydro(j) - 0.7; end if Sequences(i,j) == 'V' hydro(j) = hydro(j) + 4.2; end if Sequences(i,j) == 'W' hydro(j) = hydro(j) - 0.9; end if Sequences(i,j) == 'Y' hydro(j) = hydro(j) - 1.3; end end end hydro = hydro / (NumbSeq * 4.5); % Print out fraction present and mean hydrophobicities %gap_count %hydro % Output mean hydrophobicities to file for each probability % threshold fidOut = fopen('Data/HCV/Templates.txt','w'); fprintf(fidOut,'t_all = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.9); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_90 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.8); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_80 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.7); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_70 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.6); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_60 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.5); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_50 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.4); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_40 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.3); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_30 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.2); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_20 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count < 0.1); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_10 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); new_positions = find(gap_count == 0.0); hydro = hydro(new_positions); gap_count = gap_count(new_positions); fprintf(fidOut,'t_0 = ['); fprintf(fidOut,'%f ',hydro); fprintf(fidOut,']; \r\n\r\n'); fclose(fidOut);