Logomaker example with RFAM

import FASTX
import PythonCall
import Logomaker
using Statistics: mean
using LogExpFunctions: xlogx

Fetch RNA family alignment RF00162 from RFAM, trimmed by removing insertions.

fasta_path = Logomaker.__example_fasta()
records = collect(FASTX.FASTA.Reader(open(fasta_path)))
seqs = FASTX.sequence.(records)
 Downloading artifact: RF00162_trimmed

RNA nucleotides

NTs = "ACGU-"

One-hot representation

function onehot(s::AbstractString)
    return reshape(collect(s), 1, length(s)) .== collect(NTs)
end
X = reshape(reduce(hcat, onehot.(seqs)), length(NTs), :, length(seqs))

Compute conservation scores

p = reshape(mean(X; dims=3), size(X, 1), size(X, 2))
H = sum(-xlogx.(p) / log(2); dims=1)
cons = p .* (log2(5) .- H)

Plot sequence logo!

color_scheme = Logomaker.color_scheme('A' => "blue", 'C' => "orange", 'G' => "green", 'U' => "red", '-' => "black")
logo = Logomaker.Logo(cons, collect(NTs); color_scheme)
logo.ax.set_ylim(0, log2(5))
logo.fig
Example block output

This page was generated using Literate.jl.