###################################### # Connect to server ###################################### # Windows users: download putty from internet # Putty configuration: IP address 129.24.144.224 pw- test # Mac users: open terminal > ssh sbuddenb@129.24.144.224 pw: test ###################################### # File viewing and manipulation ###################################### # Look at the beginning of the file to see what it looks like > more 454_testreads.fa # Search for a specific sequence and return 30 lines after > grep HJ4YRIA01CSMNU -A 30 454_testreads.fa # How could you remove the header? > sed 's/HJ4YRIA.*//' 454_testreads.fa > 454_testreads_seqonly # How could you remove parts of the header? > sed 's/rank.*$//g' 454_testreads.fa > 454_testreads_shortheader # How could you change parts of the header if you want? > perl -pi -w -e 's/SEARCH_FOR/REPLACE_WITH/g;' *.txt ex/ > perl -pi -e 's/HJ4YRIA/SarahisSweet/g;' *.txt # How many individual reads are there? > grep -c '^>' 454_testreads.fa # What is the average sequence length? > grep -v '^>' 454_testreads.fa | wc --chars ###################################### # SEQCLEAN: filter raw reads ###################################### # Seqclean uses Megablast so we need to make a blastable database of our genome on the server # Go to location of BLAST program > cd /teraraid23/sarah/blast # Using blast+ > makeblastdb -in testjunk.fa -input_type fasta -dbtype nucl -out testjunk.fa # Run seqclean > ./seqclean 454_testreads.fa -c 1 -s /teraraid23/sarah/blast/testjunk.fa # Rename output file > mv 454_testreads.fa.clean 454_testreads_filtered.fa ###################################### # GMAP: transcripts x genes/genomes ###################################### # Build database for Capsaspora genome > ./gmap_build -d /teraraid23/sarah/bin/CapsasporaGenome_short.fa /teraraid23/sarah/bin/CapsasporaGenome_short.fa # Align 454_testreads_filtered.fa x CapsasporaGenome_short.fa, then look at the resulting file > ./gmap -D /teraraid23/sarah/bin/ -d CapsasporaGenome_short.fa /teraraid23/sarah/seqclean/seqclean-x86_64/454_testreads_filtered.fa --npaths=1 -Z > 454_testreads_filtered_xCapsasporaGenome 2>stderr.txt #GMAP gives you all matches, good and bad so we need to choose a cutoff based on %identity and %coverage > perl perl_forZ_gmap.pl 454_testreads_filtered_xCapsasporaGenome 95 95 ###################################### # BLASTn: transcripts x transcripts ###################################### # Make blastable database of snail_ESTs.fa > makeblastdb -in snail_ESTs.fa -input_type fasta -dbtype nucl -parse_seqids -out snail_ESTs.fa # Align 454_testreads_filtered.fa x snail_ESTs.fa (snaildb, genbank ESTs, vectorbase) > blastn -db /teraraid23/sarah/blast/snail_ESTs.fa -query /teraraid23/sarah/seqclean/seqclean-x86_64/454_testreads_filtered.fa -task blastn -outfmt 6 -evalue 1e-50 -max_target_seqs 1 -num_threads 16 -out 454_filterd_xsnailESTs_e-50 https://main.g2.bx.psu.edu/