Logomaker example with RFAM
import FASTX
import PythonCall
import Logomaker
using Statistics: mean
using LogExpFunctions: xlogx
Fetch RNA family alignment RF00162 from RFAM, trimmed by removing insertions.
fasta_path = Logomaker.__example_fasta()
records = collect(FASTX.FASTA.Reader(open(fasta_path)))
seqs = FASTX.sequence.(records)
Downloading artifact: RF00162_trimmed
RNA nucleotides
NTs = "ACGU-"
One-hot representation
function onehot(s::AbstractString)
return reshape(collect(s), 1, length(s)) .== collect(NTs)
end
X = reshape(reduce(hcat, onehot.(seqs)), length(NTs), :, length(seqs))
Compute conservation scores
p = reshape(mean(X; dims=3), size(X, 1), size(X, 2))
H = sum(-xlogx.(p) / log(2); dims=1)
cons = p .* (log2(5) .- H)
Plot sequence logo!
color_scheme = Logomaker.color_scheme('A' => "blue", 'C' => "orange", 'G' => "green", 'U' => "red", '-' => "black")
logo = Logomaker.Logo(cons, collect(NTs); color_scheme)
logo.ax.set_ylim(0, log2(5))
logo.fig
This page was generated using Literate.jl.