SEQ 3. pfam数据库的注释及当地分析 (pfam_scan) - ToB企服应用市场:ToB评测及商务社交产业平台

wget ftp://ftp.ebi.ac.uk:21/pub/databases/Pfam/current_release/Pfam-A.hmm.gz
wget ftp://ftp.ebi.ac.uk:21/pub/databases/Pfam/current_release/Pfam-A.hmm.dat.gz
wget ftp://ftp.ebi.ac.uk:21/pub/databases/Pfam/current_release/active_site.dat.gz
gunzip *.gz

复制代码

wget http://eddylab.org/software/hmmer/hmmer-3.2.tar.gz
tar -xzvf hmmer-3.2.1.tar.gz
cd hmmer-3.2
./configure
make
make check
make install
# 添加至环境变量
vim ~/.bashrc
export PATH=/usr/local/bin:$PATH
# 环境变量立即生效
source ~/.bashrc

复制代码

conda create -n pfam_scan
source activate pfam_scan
conda install pfam_scan

复制代码

hmmpress Pfam-A.hmm

复制代码

pfam_scan.pl -h
pfam_scan.pl: search a FASTA file against a library of Pfam HMMs
Usage: pfam_scan.pl -fasta <fasta_file> -dir <directory location of Pfam files>
Additonal options:
-h : show this help
-outfile <file> : output file, otherwise send to STDOUT
-clan_overlap : show overlapping hits within clan member families (applies to Pfam-A families only)
-align : show the HMM-sequence alignment for each match
-e_seq <n> : specify hmmscan evalue sequence cutoff for Pfam-A searches (default Pfam defined)
-e_dom <n> : specify hmmscan evalue domain cutoff for Pfam-A searches (default Pfam defined)
-b_seq <n> : specify hmmscan bit score sequence cutoff for Pfam-A searches (default Pfam defined)
-b_dom <n> : specify hmmscan bit score domain cutoff for Pfam-A searches (default Pfam defined)
-as : predict active site residues for Pfam-A matches
-json [pretty] : write results in JSON format. If the optional value "pretty" is given,
the JSON output will be formatted using the "pretty" option in the JSON
module
-cpu <n> : number of parallel CPU workers to use for multithreads (default all)
-translate [mode] : treat sequence as DNA and perform six-frame translation before searching. If the
optional value "mode" is given it must be either "all", to translate everything
and produce no individual ORFs, or "orf", to report only ORFs with length greater
than 20. If "-translate" is used without a "mode" value, the default is to
report ORFs (default no translation)
For more help, check the perldoc:
shell% perldoc pfam_scan.pl

复制代码

>sp|O95905|ECD_HUMAN Protein ecdysoneless homolog OS=Homo sapiens OX=9606 GN=ECD PE=1 SV=1
MEETMKLATMEDTVEYCLFLIPDESRDSDKHKEILQKYIERIITRFAPMLVPYIWQNQPF
NLKYKPGKGGVPAHMFGVTKFGDNIEDEWFIVYVIKQITKEFPELVARIEDNDGEFLLIE
AADFLPKWLDPENSTNRVFFCHGELCIIPAPRKSGAESWLPTTPPTIPQALNIITAHSEK
ILASESIRAAVNRRIRGYPEKIQASLHRAHCFLPAGIVAVLKQRPRLVAAAVQAFYLRDP
IDLRACRVFKTFLPETRIMTSVTFTKCLYAQLVQQRFVPDRRSGYRLPPPSDPQYRAHEL
GMKLAHGFEILCSKCSPHFSDCKKSLVTASPLWASFLESLKKNDYFKGLIEGSAQYRERL
EMAENYFQLSVDWPESSLAMSPGEEILTLLQTIPFDIEDLKKEAANLPPEDDDQWLDLSP
DQLDQLLQEAVGKKESESVSKEEKEQNYDLTEVSESMKAFISKVSTHKGAELPREPSEAP
ITFDADSFLNYFDKILGPRPNESDSDDLDDEDFECLDSDDDLDFETHEPGEEASLKGTLD
NLKSYMAQMDQELAHTCISKSFTTRNQVEPVSQTTDNNSDEEDSGTGESVMAPVDVDLNL
VSNILESYSSQAGLAGPASNLLQSMGVQLPDNTDHRPTSKPTKN

复制代码

pfam_scan.pl -fasta ./test.fa -dir hmmer-3.2/ -outfile test_result.xls -as
#[42] [94] [42] [110] [66] [398] [363] [272] [294] [392] [294] [134] [291] [212] [64] [549][567] [562][580] [515][533] [539][557] [513][531] [513][531] [546][564] [537][555] [550][568] [184] [184] [329] [152] [376][394] [193][211] [293] [273] [614,680] [90] [76] [90] [76] [76] [90] [73] [123] [25] [39] [213] [30] [204] [51] [225] [81] [255] [39] [213] [39] [213] [71] [245] [71] [245] [18,106,126] [18,106,126] [18,106,126] [86] [18,106,126] [67] [210,343] [210,343] [210,343] [85] [184,191] [483,532,633] [173]

复制代码