Position Weight Matrix

This tutorial shows how to compute Position Weight Matrix using a PWM.

Reading a motif-pwm from file, a PositionWeightMatrix function is created using the appropriate alphabet and counts.

import numpy as np
import bionumpy as bnp
from bionumpy.io.motifs import read_motif
from bionumpy.sequence.position_weight_matrix import get_motif_scores


def read_motif_scores(reads_filename: str, motif_filename: str) -> np.ndarray:
    # Read the alphabet and counts from jaspar file
    pwm = read_motif(motif_filename)

    # Get reads
    entries = bnp.open(reads_filename).read()

    # Calculate the motif score for each valid window
    scores = get_motif_scores(entries.sequence, pwm)

    # Get a histogram of the max-score for each read
    return bnp.histogram(scores.max(axis=-1))


if __name__ == "__main__":
    read_motif_scores("example_data/big.fq.gz", "example_data/MA0080.1.jaspar")