Source code for teilab.seedmatch

# coding: utf-8
import re

import pandas as pd
import requests

from .utils._config import SEEDMATCH_URL


[docs]def get_matched_mRNAs(seedseq: str) -> pd.DataFrame: """Get a list of mRNAs with ``seedseq`` in 3'UTR Args: seedseq (str): A query sequence. Returns: pd.DataFrame: A data frame with a column named ``"SystematicName"`` meaning Accession numbers for each mRNA and a column named ``"NumHits"`` meaning how many ``seedseq`` sequences are in its 3'UTR Examples: >>> from teilab.seedmatch import get_matched_mRNAs >>> df_matched_mRNAs = get_matched_mRNAs(seedseq="gagttca") >>> print(df_matched_mRNAs.to_markdown()) | | SystematicName | NumHits | |-----:|:-----------------|----------:| | 0 | NM_001004713 | 1 | | 1 | NM_173860 | 1 | | 2 | NM_001005493 | 1 | | : | : | : | | 3643 | NM_015139 | 1 | | 3644 | NM_015463 | 1 | | 3645 | NM_007189 | 1 | >>> from teilab.utils import now_str >>> now_str() '2021-06-10@23.36.12' You can also get the data with the command like ``curl`` . .. code-block:: shell $ curl -d "seedseq=gagttca" <SEEDMATCH_URL> """ ret = requests.post(url=SEEDMATCH_URL, data={"seedseq": seedseq.lower()}) return pd.DataFrame( data=re.findall(pattern=r"\n(.+)\t([0-9]+)", string=ret.text), columns=["SystematicName", "NumHits"] )