Outline Lecture 14 April 18, 2006 I. Introduction to Perl A. Writing Files B. Conditional Statements C. Pattern Matching and Regular Expressions D. Subroutines and program modules E. BioPerl #Program 1: writing to a file ################################### # # use perl strip_cr.pl input.pdb output.pdb # # read the file name : first argument $in_name= $ARGV[0]; # read the output file name : second argument $out_name= $ARGV[1]; print "Striping CR from file $in_name and writing to $out_name \n"; # open the file and associate a file handle open(infile,$in_name) or die " can not open $in_name \n"; # open an output file and associate a file handle open(outfile,">$out_name") or die " can not create $out_name \n"; # now read the file in a loop while($line = ) { # replace macintosh cr with windows lf $line =~ s/\r/\n/g; print outfile $line; } # close the file because we are done close infile; close outfile; exit; # program 2 logical tests ############################ $a=(10 > 1); #$a=(10 < 1); print "here is a logical test\n"; print "the logical ($a) is"; if ($a) {print " true "} if (!$a) {print " false "} if ($a == 0) {print " zero "} if ($a == 1) {print " one "} if ($a eq "") {print " blank "} if ($a ne "") {print " not blank"} #Program 3: Conditional statements ################################### #note: this program has some non-standard word characters print "Enter your age: "; $age = <>; chomp $age; if ($age < 21) { print "You are too young for this kind of work!\n"; die "too young"; } # if end print "You are old enough to know better!\n"; #Program 4: Fun with Strings ################################### # $line ='SEQRES 1 A 153 ALA LYS HIS VAL LEU'; print "$line \n\n"; $location = index($line,' A ') + 1; $string = substr($line,11,1); print "The string variable $string is found at location $location \n"; # Check for occurrence of a string $b='ALA'; if($line =~ /$b/){ print "The string contains ALA \n"; } if($line =~ /^SEQ/){ print "The string starts with SEQ \n"; } else { print "But the String did not start with SEQ \n"; } # split the string at one or more white spaces into an array print "Splitting the string into an array \n"; @array = split(/\s+/,$line); for $a (@array) { print $a,"\n"; } # program 5: hash example ############################### %three_to_one = ( ALA => A, CYS => C, ASP => D, GLU => E, PHE => F, GLY => G, HIS => H, ILE => I, LYS => K, LEU => L, MET => M, ASN => N, PRO => P, GLN => Q, ARG => R, SER => S, THR => T, VAl => V, TRP => W, TYR => Y, ); print "Input your three letter amino acid code: "; while($code=<>){ chomp $code; $code = uc $code; if (exists $three_to_one{$code}) { print "\n The code for $code is $three_to_one{$code} \n"; }else{ print "Bad codon \"$code\"!! \n"; } print "\nInput your three letter amino acid code: "; } # Program 6: Subrouties ################################################ #!/usr/local/bin/perl $a = 6; $b = 10; print "Average $a and $b \n"; $average = calc_ave ($a, $b); # subroutine invocation. print "The average is $average\n"; exit; # subroutine definition starts here sub calc_ave { my ($x, $y) = @_; my ($result); $result = ($x + $y) / 2; return $result; } # program 7: including a program module ############################# # print out subroutine include path print "The path for included subroutines is:\n"; print @INC, "\n"; # add a path print "Adding an extra directory to the include path \n"; push(@INC,'C:\Perl\programs'); print @INC, "\n"; # require or use the new program module use BeginPerlBioinfo; #require('BeginPerlBioinfo.pm'); pause(); $line="ALA PHE LEU ILE PRO MET\nALA PHE PRO PRO"; print "The input sequence is\n", $line,"\n"; $out=iub3to1($line); print "The translated sequence is\n",$out,"\n"; pause(); exit; sub pause{ print "\n Pausing - press enter to continue ?"; my $input = ; } # Installing BioPerl #################################################### ppm> rep add Bioperl http://bioperl.org/DIST ppm> rep add Kobes http://theoryx5.uwinnipeg.ca/ppms ppm> rep add Bribes http://www.Bribes.org/perl/ppm ppm> search Bioperl ppm> install ppm> query bioperl Ppm> exit # Program 8: Bioperl #################################################### use Bio::Perl; # this script will only work if you have an internet connection on the # computer you're using, the databases you can get sequences from # are 'swiss', 'genbank', 'genpept', 'embl', and 'refseq' $seq_object = get_sequence('swiss',"ROA1_HUMAN"); write_sequence(">roa1.fasta",'fasta',$seq_object); print "Sequence name is ",$seq_object->display_id,"\n"; print "Sequence acc is ",$seq_object->accession_number,"\n"; print "The sequence is \n",$seq_object->seq(),"\n"; # Program 9: Bioperl #################################################### use Bio::Perl; $seq = get_sequence('swiss',"ROA1_HUMAN"); # uses the default database - nr in this case $blast_result = blast_sequence($seq); write_blast(">roa1.blast",$blast_result);