Spaces:
Runtime error
Runtime error
| #This software is a free software. Thus, it is licensed under GNU General Public License. | |
| #Python implementation to Smith-Waterman Algorithm for Homework 1 of Bioinformatics class. | |
| #Forrest Bao, Sept. 26 <http://fsbao.net> <forrest.bao aT gmail.com> | |
| # zeros() was origianlly from NumPy. | |
| # This version is implemented by alevchuk 2011-04-10 | |
| def zeros(shape): | |
| retval = [] | |
| for x in range(shape[0]): | |
| retval.append([]) | |
| for y in range(shape[1]): | |
| retval[-1].append(0) | |
| return retval | |
| match_award = 10 | |
| mismatch_penalty = -5 | |
| gap_penalty = -5 # both for opening and extanding | |
| def match_score(alpha, beta, gap_symbol = "-"): | |
| if alpha == beta: | |
| return match_award | |
| elif alpha == gap_symbol or beta == gap_symbol: | |
| return gap_penalty | |
| else: | |
| return mismatch_penalty | |
| def finalize(align1, align2, gap_symbol = "-"): | |
| align1 = align1[::-1] #reverse sequence 1 | |
| align2 = align2[::-1] #reverse sequence 2 | |
| i,j = 0,0 | |
| #calcuate identity, score and aligned sequeces | |
| symbol = '' | |
| for i in range(0,len(align1)): | |
| # if two AAs are the same, then output the letter | |
| if align1[i] == align2[i]: | |
| symbol = symbol + align1[i] | |
| # if they are not identical and none of them is gap | |
| elif align1[i] != align2[i] and align1[i] != gap_symbol and align2[i] != gap_symbol: | |
| symbol += ' ' | |
| found = 0 | |
| #if one of them is a gap, output a space | |
| elif align1[i] == gap_symbol or align2[i] == gap_symbol: | |
| symbol += ' ' | |
| return align1, align2 | |
| def needle(seq1, seq2, gap_symbol = "-"): | |
| m, n = len(seq1), len(seq2) # length of two sequences | |
| # Generate DP table and traceback path pointer matrix | |
| score = zeros((m+1, n+1)) # the DP table | |
| # Calculate DP table | |
| for i in range(0, m + 1): | |
| score[i][0] = gap_penalty * i | |
| for j in range(0, n + 1): | |
| score[0][j] = gap_penalty * j | |
| for i in range(1, m + 1): | |
| for j in range(1, n + 1): | |
| match = score[i - 1][j - 1] + match_score(seq1[i-1], seq2[j-1]) | |
| delete = score[i - 1][j] + gap_penalty | |
| insert = score[i][j - 1] + gap_penalty | |
| score[i][j] = max(match, delete, insert) | |
| # Traceback and compute the alignment | |
| align1, align2 = '', '' | |
| i,j = m,n # start from the bottom right cell | |
| while i > 0 and j > 0: # end toching the top or the left edge | |
| score_current = score[i][j] | |
| score_diagonal = score[i-1][j-1] | |
| score_up = score[i][j-1] | |
| score_left = score[i-1][j] | |
| if score_current == score_diagonal + match_score(seq1[i-1], seq2[j-1]): | |
| align1 += seq1[i-1] | |
| align2 += seq2[j-1] | |
| i -= 1 | |
| j -= 1 | |
| elif score_current == score_left + gap_penalty: | |
| align1 += seq1[i-1] | |
| align2 += gap_symbol | |
| i -= 1 | |
| elif score_current == score_up + gap_penalty: | |
| align1 += gap_symbol | |
| align2 += seq2[j-1] | |
| j -= 1 | |
| # Finish tracing up to the top left cell | |
| while i > 0: | |
| align1 += seq1[i-1] | |
| align2 += gap_symbol | |
| i -= 1 | |
| while j > 0: | |
| align1 += gap_symbol | |
| align2 += seq2[j-1] | |
| j -= 1 | |
| return finalize(align1, align2) |