% SurfFind: Find the animo acids at the surface of a protein % % Scott F. Smith % Department of Electrical and Computer Engineering % Boise State University % SFSmith@BoiseState.edu % % 17 February 2004 % % Input file should have 4 columns: % column #1 - One character amino acid code % column #2 - X-axis position of amino acide alpha-C % column #3 - Y-axis position of amino acide alpha-C % column #4 - Z-axis position of amino acide alpha-C % Filename to read (fname.txt) fname = '1SW6'; % Window size is 2*K + 1 K = 2; fid = fopen([fname '.txt']); FileData = fscanf(fid,'%s %f %f %f',[4 Inf]); fclose(fid); ProtString = char(FileData(1,:)); Xlocs = FileData(2,:).'; Ylocs = FileData(3,:).'; Zlocs = FileData(4,:).'; % Calculate locations relative to center Xlocs = Xlocs - mean(Xlocs); Ylocs = Ylocs - mean(Ylocs); Zlocs = Zlocs - mean(Zlocs); % Convert all to lower case ProtString = lower(ProtString); % Precalulate squares of locations Xloc2 = Xlocs .* Xlocs; Yloc2 = Ylocs .* Ylocs; Zloc2 = Zlocs .* Zlocs; % Precalculate sums of squares X2Y2 = Xloc2 + Yloc2; X2Z2 = Xloc2 + Zloc2; Y2Z2 = Yloc2 + Zloc2; X2Y2Z2 = Xloc2 + Yloc2 + Zloc2; %%% Precalculate quadrants % X and Y XpYp = find((Xlocs > 0) & (Ylocs > 0)); XpYn = find((Xlocs > 0) & (Ylocs < 0)); XnYp = find((Xlocs < 0) & (Ylocs > 0)); XnYn = find((Xlocs < 0) & (Ylocs < 0)); % X and Z XpZp = find((Xlocs > 0) & (Zlocs > 0)); XpZn = find((Xlocs > 0) & (Zlocs < 0)); XnZp = find((Xlocs < 0) & (Zlocs > 0)); XnZn = find((Xlocs < 0) & (Zlocs < 0)); % Y and Z YpZp = find((Ylocs > 0) & (Zlocs > 0)); YpZn = find((Ylocs > 0) & (Zlocs < 0)); YnZp = find((Ylocs < 0) & (Zlocs > 0)); YnZn = find((Ylocs < 0) & (Zlocs < 0)); % X, Y, and Z XpYpZp = find((Xlocs > 0) & (Ylocs > 0) & (Zlocs > 0)); XpYpZn = find((Xlocs > 0) & (Ylocs > 0) & (Zlocs < 0)); XpYnZp = find((Xlocs > 0) & (Ylocs < 0) & (Zlocs > 0)); XpYnZn = find((Xlocs > 0) & (Ylocs < 0) & (Zlocs < 0)); XnYpZp = find((Xlocs < 0) & (Ylocs > 0) & (Zlocs > 0)); XnYpZn = find((Xlocs < 0) & (Ylocs > 0) & (Zlocs < 0)); XnYnZp = find((Xlocs < 0) & (Ylocs < 0) & (Zlocs > 0)); XnYnZn = find((Xlocs < 0) & (Ylocs < 0) & (Zlocs < 0)); %%% Convert surface amino acids to upper case %% Single variable cases % case #1 [MaxVal, MaxIndex] = max(Xlocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #2 [MaxVal, MaxIndex] = min(Xlocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #3 [MaxVal, MaxIndex] = max(Ylocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #4 [MaxVal, MaxIndex] = min(Ylocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #5 [MaxVal, MaxIndex] = max(Zlocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #6 [MaxVal, MaxIndex] = min(Zlocs); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); %% X and Y cases % case #7 [MaxVal, MaxIndex] = max(X2Y2(XpYp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #8 [MaxVal, MaxIndex] = max(X2Y2(XpYn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #9 [MaxVal, MaxIndex] = max(X2Y2(XnYp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #10 [MaxVal, MaxIndex] = max(X2Y2(XnYn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); %% X and Z cases % case #11 [MaxVal, MaxIndex] = max(X2Z2(XpZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #12 [MaxVal, MaxIndex] = max(X2Z2(XpZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #13 [MaxVal, MaxIndex] = max(X2Z2(XnZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #14 [MaxVal, MaxIndex] = max(X2Z2(XnZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); %% Y and Z cases % case #15 [MaxVal, MaxIndex] = max(Y2Z2(YpZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #16 [MaxVal, MaxIndex] = max(Y2Z2(YpZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #17 [MaxVal, MaxIndex] = max(Y2Z2(YnZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #18 [MaxVal, MaxIndex] = max(Y2Z2(YnZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); %% Three variable cases % case #19 [MaxVal, MaxIndex] = max(X2Y2Z2(XpYpZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #20 [MaxVal, MaxIndex] = max(X2Y2Z2(XpYpZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #21 [MaxVal, MaxIndex] = max(X2Y2Z2(XpYnZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #22 [MaxVal, MaxIndex] = max(X2Y2Z2(XpYnZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #23 [MaxVal, MaxIndex] = max(X2Y2Z2(XnYpZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #24 [MaxVal, MaxIndex] = max(X2Y2Z2(XnYpZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #25 [MaxVal, MaxIndex] = max(X2Y2Z2(XnYnZp)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % case #26 [MaxVal, MaxIndex] = max(X2Y2Z2(XnYnZn)); WinLow = max([1 MaxIndex-K]); WinHigh = min([length(ProtString) MaxIndex+K]); ProtString(WinLow:WinHigh) = upper(ProtString(WinLow:WinHigh)); % Output surface coded amino acid string ProtString fidOut = fopen([fname '_Surf.txt'],'w'); fprintf(fid,'%s',ProtString); fclose(fidOut);