IntroductionNumberingSequencesStructuresModellingMacrosPublicationsLinks
ColoringSequence StatisticsRenumberingAccessibilityTorsion AnglesStructural VariabilityHydrogen BondsDownloads

Macro AA_Frequency

Counts the occurence of each type of amino acid in every column of the sequence alignment, generating a table of absolute frequency. From this table, a second table is generated, which contains the relative frequencies in percent. The summary lines contain the consensus sequence, the percentage of sequences which agree with the consensus in this position, the number of sequences which contain any amino acid in this position and the sequence variability acording to Kabat is listed.

The sequence variability according to Kabat is defined as the number of different amino acids in a given position divided by the frequency of the most abundant amino acid in this position. This formula yields values between 1 (absolutely conserved) and 400 (all 20 amino acids occuring with equal frequency)

Usage:

  • Select the cells containing sequences to be incorporated in the analysis
  • Choose "Macro" for the "Tool" menu
  • Run macro AA_Frequency

Caution:

Overwrites 55 lines below the last line of the sequence alignment

Sub AA_Frequency()

'Determines absolute and relative frequencies of amino acids occuring at a given position
'based on a selected Sequence alignment
'The results are placed in the 55 rows below the selected sequence block
'
    If Selection.Columns.Count = 0 Then      'Error, nothing selected
        MsgBox Prompt:="No cells selected, please select the sequences you wish to process"
        Exit Sub
    End If

'get extent of current selection
    i1 = Selection.Row
    i2 = i1 + Selection.Rows.Count - 1
    j1 = Selection.Column
    j2 = j1 + Selection.Columns.Count - 1

    Cells(i2 + 3, j1 - 1) = "D"
    Cells(i2 + 4, j1 - 1) = "E"
    Cells(i2 + 5, j1 - 1) = "K"
    Cells(i2 + 6, j1 - 1) = "R"
    Cells(i2 + 7, j1 - 1) = "H"
    Cells(i2 + 8, j1 - 1) = "T"
    Cells(i2 + 9, j1 - 1) = "S"
    Cells(i2 + 10, j1 - 1) = "N"
    Cells(i2 + 11, j1 - 1) = "Q"
    Cells(i2 + 12, j1 - 1) = "G"
    Cells(i2 + 13, j1 - 1) = "A"
    Cells(i2 + 14, j1 - 1) = "C"
    Cells(i2 + 15, j1 - 1) = "P"
    Cells(i2 + 16, j1 - 1) = "V"
    Cells(i2 + 17, j1 - 1) = "I"
    Cells(i2 + 18, j1 - 1) = "L"
    Cells(i2 + 19, j1 - 1) = "M"
    Cells(i2 + 20, j1 - 1) = "F"
    Cells(i2 + 21, j1 - 1) = "Y"
    Cells(i2 + 22, j1 - 1) = "W"
    Cells(i2 + 23, j1 - 1) = "B"
    Cells(i2 + 24, j1 - 1) = "Z"
    Cells(i2 + 25, j1 - 1) = "X"
    Cells(i2 + 26, j1 - 1) = "sum"
    
     Cells(i2 + 28, j1 - 1) = "D"
    Cells(i2 + 29, j1 - 1) = "E"
    Cells(i2 + 30, j1 - 1) = "K"
    Cells(i2 + 31, j1 - 1) = "R"
    Cells(i2 + 32, j1 - 1) = "H"
    Cells(i2 + 33, j1 - 1) = "T"
    Cells(i2 + 34, j1 - 1) = "S"
    Cells(i2 + 35, j1 - 1) = "N"
    Cells(i2 + 36, j1 - 1) = "Q"
    Cells(i2 + 37, j1 - 1) = "G"
    Cells(i2 + 38, j1 - 1) = "A"
    Cells(i2 + 39, j1 - 1) = "C"
    Cells(i2 + 40, j1 - 1) = "P"
    Cells(i2 + 41, j1 - 1) = "V"
    Cells(i2 + 42, j1 - 1) = "I"
    Cells(i2 + 43, j1 - 1) = "L"
    Cells(i2 + 44, j1 - 1) = "M"
    Cells(i2 + 45, j1 - 1) = "F"
    Cells(i2 + 46, j1 - 1) = "Y"
    Cells(i2 + 47, j1 - 1) = "W"
    Cells(i2 + 48, j1 - 1) = "B"
    Cells(i2 + 49, j1 - 1) = "Z"
    Cells(i2 + 50, j1 - 1) = "X"
    Cells(i2 + 52, j1 - 1) = "Consensus"
    Cells(i2 + 53, j1 - 1) = "% Agree"
    Cells(i2 + 54, j1 - 1) = "# of Seq."
    Cells(i2 + 55, j1 - 1) = "Variability"
  
   For i = (i2 + 3) To (i2 + 26) Step 1
        For j = j1 To j2 Step 1
            Cells(i, j) = 0
        Next j
    Next i
  
    For j = j1 To j2 Step 1
        For i = i1 To i2 Step 1
        
            For k = (i2 + 3) To (i2 + 25) Step 1
                If Cells(i, j) Like Cells(k, j1 - 1) Then
                    Cells(k, j) = Cells(k, j) + 1
                    Cells(i2 + 26, j) = Cells(i2 + 26, j) + 1
                End If
            Next k
        Next i
    Next j
                    
    'Abs2Rel
    'Calculating relative Frequencies
   1         
    For j = j1 To j2 Step 1
        For k = i2 + 28 To i2 + 50 Step 1
            If (Cells(i2 + 26, j) > 0) Then
                Cells(k, j) = 100 * Cells(k - 25, j) / Cells(i2 + 26, j)
                Else: Cells(k, j) = "0"
            End If
        Next k
    Next j
    
    'Consensu sequences

    For j = j1 To j2 Step 1
        N = 0
        M = 0
        For i = i2 + 28 To i2 + 50 Step 1
            If M < Cells(i, j) Then
                M = Cells(i, j)
                N = i
            End If
        Next i
        If N <> 0 Then
                Cells(i2 + 52, j) = Cells(N, j1 - 1)
                Cells(i2 + 53, j) = Cells(N, j)
                Cells(i2 + 54, j) = Cells(i2 + 26, j)

            Else
                Cells(i2 + 52, j) = "."
                Cells(i2 + 53, j) = 0
        End If
    Next j
    
    'Variability
        
    For j = j1 To j2 Step 1
        N = 0
        M = 0
        m1 = 0
        For k = i2 + 28 To i2 + 50 Step 1
            If IsNumeric(Cells(k, j)) Then
                If (Cells(k, j) = 0) Then
                    Else
                        N = N + 1
                        If Cells(k, j) > M Then
                            M = Cells(k, j)
                            m1 = i
                    End If
                End If
            End If
        Next k
        If m1 <> 0 Then
            Cells(i2 + 55, j) = (100 * N / M)
        End If

    Next j
                
End Sub

						
AAAAA Homepage Zürich University Dept. of Biochemistry Plückthun Group Annemarie Honegger

Last Modified by A.Honegger Wednesday, January 26, 2005