#!/usr/local/perl/bin/perl -w # PICO Assembler ver2.01 # Time-stamp: <2003-06-24 17:27:00 kaneko> # report bugs to kaneko@am.ics.keio.ac.jp # # 1. "pasm foo.s" outputs foo.o # 2. output format can be either for verilog $readmemh() or sfl script file # 3. everything is case insentive # 4. hex(0x??) and decimal accepted as immediate(#) # 5. '#' commentable # 6. one instruction per line, break of the line means the new instruction # 7. @ does the address directive, is notated either in dec or hex ############################################################################ # -- USER DEFINED VARIABLES --> # comment either one out #$target = "verilog"; $target = "sfl"; # 8 if memory word size is single byte, 16 if otherwize double byte $word_size = 8; # address increment (usually 1 or 2) $addr_inc = 2; # name of the instruction memory module, only needed for sfl output $sfl_memory = "/IMEM/memory"; # <-- USER DEFINED VARIABLES -- ############################################################################ # An example of valid pasm input, which does the bubble sort # # @0 # ldli r2, #0x12 # tail of the data memory address # ldli r1, #0x09 # sorting ten items # loop0: # outer loop { # mv r4, r2 # ldli r3, #0x09 # loop1: # inner loop { # ld r5, (r4) # subi r4, #0x02 # ld r6, (r4) # sub r6, r5 # bmi r6, noswap # # swap { # ld r6, (r4) # st (r4), r5 # addi r4, #0x02 # st (r4), r6 # subi r4, #0x02 # # } swap # noswap: # subi r3, #0x01 # bnez r3, loop1 # # } inner loop # subi r1, #0x01 # bnez r1, loop0 # # } outer loop # ldli r2, #0x00 # endloop: # beqz r2, endloop # -- MNEMONIC DEFINITIONS --> # D: destination reg, S: source reg, I: 8-bit immediate O: 11-bit offset %encode = ( # R-type "nop", "00000_000_000_00000", "mv", "00000_D_S_00001", "and", "00000_D_S_00010", "or", "00000_D_S_00011", "xor", "00000_D_S_00100", "not", "00000_D_S_00101", "add", "00000_D_S_00110", "sub", "00000_D_S_00111", "ld", "00000_D_S_01000", "st", "00000_D_S_01001", "lb", "00000_D_S_01010", "sb", "00000_D_S_01011", "sl", "00000_D_S_01100", "sr", "00000_D_S_01101", "rfi", "00000_000000_10000", "eint", "00000_000000_10001", "liar", "00000_D_000_10010", "siar", "00000_000_S_10011", "dint", "00000_000000_10100", # I-type "addi", "00110_D_I", "subi", "00111_D_I", "andi", "00010_D_I", "ori", "00011_D_I", "xori", "00100_D_I", "jalr", "01000_D_00000000", "bnez", "01001_D_I", "beqz", "01010_D_I", "bmi", "01011_D_I", "bpl", "01100_D_I", "jr", "01110_D_00000000", "ldli", "11100_D_I", "ldhi", "11101_D_I", # J-type "jal", "01101_O", "jmp", "01111_O" ); # branch operations that uses the relative address, instead of the absolute $relative_branch = "^((01001)|(01010)|(01011)|(01100))"; # -- SUBROUTINES --> sub hex2dec { my $hex = shift @_; if($hex =~ m/^0x/) { # hex -> dec $hex =~ s/^0x//i; $dec = hex $hex; } else { # dec in the first place, no conversion needed $dec = $hex; } return $dec; } sub dec2bin { my $dec = shift @_; my $bit = shift @_; $dec = hex2dec($dec); unless($dec =~ m/^[+-]?\d+$/) { print "line $line_num: $dec, invalid decimal given\n"; exit 1; } $dec = unpack("B32", pack("N", $dec)); $bin = substr $dec, (32 - $bit); return $bin; } sub bin2hex { $bin = shift @_; # form the number into 32-bit bin, then unpack that to dec $dec = unpack("N", pack("B32", substr("0" x 32 . $bin, -32))); $hex = sprintf "%04x", $dec; return $hex; } ############################################################################ # -- STARTING POINT --> if(@ARGV != 1) { print "usage: pasm.pl src_file.s\n"; exit 1; } open SRC, "< $ARGV[0]" or die "file $ARGV[0] could not be opened\n"; $bin_file = $ARGV[0]; $bin_file =~ s/\.s/\.o/; open BIN, "> $bin_file" or die "file $bin_file could not be opened\n"; $line_num = 0; $inst = 0; $addr[$inst] = 0; while($line = ) { chomp $line; $line_num++; $line =~ s/\(|\)/ /gm; # take away the paranthesises $line =~ s/,/ /gm; # support the comma separated notation # check for the labeling information if($line =~ m/^\s*(\S+)\s*\:/) { $label_name = lc $1; # which is the (\S+) part of the line above $labels{$label_name} = $inst; # reserve the labeling info for later # strip away the labeling portion for the further examinations $line =~ s/^\s*(\S+)\s*\://; } # check for the address directive if($line =~ m/^\s*\@\s*(\S+)/) { $directed_addr = $1; if(hex2dec($directed_addr) =~ m/^\d+$/) { $addr[$inst] = hex2dec($directed_addr); } else { print "line $line_num: syntax error [\@]\n"; exit 1; } } # comments, address directive, or empty space are skipped if($line =~ m/^\s*\#/ || $line =~ m/^\s*\@\s*(\S+)/ || $line =~ m/^\s*$/) { next; } $line =~ s/^\s+//; # consume those leading spaces @token = split /\s+/, $line; $token[0] = lc $token[0]; # lower case it if(exists($encode{$token[0]})) { $binary[$inst][0] = $encode{$token[0]}; # for pico # just a formatting stuff, don't worry for($n = 4 - length $token[0]; $n != 0; $n--) { $token[0] = $token[0] . " " } $binary[$inst][1] = $token[0]; # man readable comment } else { print "line $line_num: no such opecode $token[0]\n"; exit 1; } # D: destination reg, S: source reg, I: 8-bit immediate O: 11-bit offset if($binary[$inst][0] =~ m/D/) { if(defined($token[1]) && $token[1] =~ m/^r/i) { $binary[$inst][1] = $binary[$inst][1]. " " . $token[1]; $token[1] =~ s/r//i; $Rd = dec2bin $token[1], 3; $binary[$inst][0] =~ s/D/$Rd/; } else { print "line $line_num: syntax error [D]\n"; exit 1; } } if($binary[$inst][0] =~ m/S/) { if(defined($token[2]) && $token[2] =~ m/^r/i) { # all others $binary[$inst][1] = $binary[$inst][1]. ", " . $token[2]; $token[2] =~ s/r//i; $Rs = dec2bin $token[2], 3; $binary[$inst][0] =~ s/S/$Rs/; } elsif(defined($token[1]) && $token[1] =~ m/^r/i) { # SIAR $binary[$inst][1] = $binary[$inst][1]. " " . $token[1]; $token[1] =~ s/r//i; $Rs = dec2bin $token[1], 3; $binary[$inst][0] =~ s/S/$Rs/; } else { print "line $line_num: syntax error [S]\n"; exit 1; } } if($binary[$inst][0] =~ m/I/) { if(defined($token[2])) { $binary[$inst][1] = $binary[$inst][1]. ", " . $token[2]; $token[2] =~ s/\#//; if(hex2dec($token[2]) =~ m/^[+-]?\d+$/) { # immidiate or offset given $imm = dec2bin $token[2], 8; $binary[$inst][0] =~ s/I/$imm/; } else { # probably the label, further inspection comes later $binary[$inst][0] =~ s/I/L$token[2]/; } } else { print "line $line_num: syntax error [E]\n"; exit 1; } } if($binary[$inst][0] =~ m/O/) { if(defined($token[1])) { $binary[$inst][1] = $binary[$inst][1]. " " . $token[1]; $token[1] =~ s/\#//; if(hex2dec($token[1]) =~ m/^[+-]?\d+$/) { # Jump offset address Given $offset = dec2bin $token[1], 11; $binary[$inst][0] =~ s/O/$offset/; } else { # probably the label, further inspection comes later $binary[$inst][0] =~ s/O/J$token[1]/; } } else { print "line $line_num: syntax error [O]\n"; exit 1; } } if(!defined$addr[$inst]) { $addr[$inst] = $addr[($inst - 1)] + $addr_inc; } $inst++; } @label_names = keys %labels; # absorb the gramatical difference between verilog and sfl if($target eq "verilog") { $comment = "//"; $mem_addr = "\@%04x"; $hex_pre = ""; } elsif($target eq "sfl") { $comment = "\#"; $mem_addr = "memset $sfl_memory X%04x"; $hex_pre = "X"; } # label substitutions and output for($n = 0; $n != $inst; $n++) { # do the substitution if label is used if($binary[$n][0] =~ m/L(\S+)$/) { # imm is always at the end of the line $label_addr = $addr[$labels{lc $1}]; if($binary[$n][0] =~ m/$relative_branch/) { # branch operations # displacement from here to the label is imm $imm = dec2bin(($label_addr - ($addr[$n] + $addr_inc)), 8); } else { # absolute value for everything else $imm = dec2bin($label_addr, 8); } $binary[$n][0] =~ s/L(\S+)/$imm/; } if($binary[$n][0] =~ m/J(\S+)$/) { # the offset is always at the eol $label_addr = $addr[$labels{lc $1}]; # offset is always relative jump $offset = dec2bin(($label_addr - ($addr[$n] + $addr_inc)), 11); $binary[$n][0] =~ s/J(\S+)/$offset/; } # below here is the output foreach $label_name (@label_names) { if($labels{$label_name} == $n) { $label_addr = sprintf "%x", $addr[$labels{$label_name}]; print BIN "$comment $label_name \@$hex_pre$label_addr\n"; } } $binary[$n][0] =~ s/_//gm; $hex_inst = bin2hex($binary[$n][0]); if($word_size == 8) { $hex_inst =~ s/(\S{2})(\S{2})/$hex_pre$1 $hex_pre$2/gm; } elsif($word_size == 16) { $hex_inst = $hex_pre . $hex_inst; } printf BIN "$mem_addr $hex_inst $comment $binary[$n][1]\n", $addr[$n]; } close BIN; close SRC;