/****************************************** Simple Pipeline Processor with cache memory(SP/1C) V1.0 Synthesizable SFL source code. (C)Copyright by Naohiko Shimizu, 1998. All rights are reserved. Contact information: Dr. Naohiko Shimizu School of Engineering, Tokai University 1117 Kitakaname, Hiratsuka-city, Kanagawa, 259-1292, Japan email: nshimizu@et.u-tokai.ac.jp URL: http://shimizu-lab.et.u-tokai.ac.jp/ The above URL is the primary distribution site for SP/1C. You can get latest copy of sp1c.sfl and patch information (if available). SP/1C uses the same instruction set with SP/1. You can use full or part of this file for your project, if and only if the copyright notice is preserved on the every file and the every product which uses SP/1C or derivatives. You can synthesize this processor with PARTHENON. See the URL: http://www.kecl.ntt.co.jp/car/parthe for more information about PARTHENON. You also can down-load the Linux or FreeBSD or SUN version of PARTHENON from the URL: http://www.kecl.ntt.co.jp/car/parthe/html/package/ Features of the processor: Data 8bit, Instruction 16bit, Harvard architecture, 8bit data/inst address spaces, load/store architecture, It has two types of instruction, i.e. R-type and I-type. R-type instructions |OP|r2|r3|r1| - | ... r1 <- r2 op r3 The instructions belong to the R-type are: ADD addition AND logical and SLT set less than (like MIPS architecture) NOT logical not SR logical shift right by one RI Return from interrupt ex. ri I-type instructions |OP|r1|r2| I | ... r1 <- r2(I) The instructions belong to the I-type are: LD load from memory ST store to memory LDA load address(or add immediate) BZ branch on r1 is zero BAL branch and link IN input OUT output There are four general registers. Each of them can be used for R-type and I-type destination or R-type sources, but only $1 through $3 can be used for I-type index register. (like the System/360) There are four internal registers. They are mapped to the IO space, X00 through X03: ir0: Control and provide about interrupt and counter operations. <0>: interrupt mask <1>: counter run <6>: counter interruption occurred <7>: external interruption occurred ir1: The free run down counter. If the counter hit 0x00 and the interrupt mask is 0b1 then an interrupt will be occurred. ir2: The interrupt new PC. ir3: The interrupt old PC. Pipeline operations: 5 stage pipeline IF|ID|EX|ME|WB Stall condition will be detected during the ID stage, and wait the resource availability within the stage. There are three forwarding path within SP/1C. You may stall up to 1 cycle for the I type instruction result on the case of cache hit. On the case of pending loads exist, you can still execute independent instructions in the following pipeline. The load-use hazard will be detected within the ME and ID stage. The ID stage will stall but the ME stage will retry the instruction on the case of hazard. There are two load buffers and two store buffers. There also is a simple branch predicting mechanism for minimize the taken branch stall. C to SP/1 translation sample: ABI: procedure return value : $1 procedure argument (1st): $1 procedure return address : $2 frame pointer : in memory at 'sp' global pointer : in memory at 'gp' size of integer: 8bit Consider the following C code. int foo(int k) { if(k == 0) return(0); return(k+foo(k-1)); } You can translate this code into the SP/1 assembler code like following: .entry foo foo: ld $3,sp ; load frame pointer lda $3,-2($3) ; reserve two integers st $3,sp ; update frame pointer st $2,0($3) ; save return address st $1,1($3) ; save local argument 'k' bz $1,foo_1 ; if 'k' is equal to 0, return lda $1,-1($1) ; make 'k-1' bal $2,foo ; call foo ld $3,sp ; load frame pointer ld $2,1($3) ; load local argument 'k' add $1,$1,$2 ; calculate 'k+return value of foo' foo_1: ld $2,0($3) ; load return address lda $3,2($3) ; calculate original frame pointer st $3,sp ; update frame pointer lda $0,0($0) ; make a zero in $0 bz $0,0($2) ; return from procedure .end Update informations: 13-Nov-1998: $0 forwarding bug corrected. OPCODE for BZ,BAL,RI was changed. These are feed backs from sp/1. 08-Nov-1998: Brush up the code and slightly change the architecture. $0 is now usable for general purpose, internal registers are mapped to the IO space, and interrupts are now change to the level sensitive. 05-Nov-1998: First working code for SP/1C, derieved from SP/1 of 29-Oct-1998 version. ******************************************/ %i %i %i %i /* R-type instructions |OP|r2|r3|r1| - | ... r1 <- r2 op r3 */ %d ADD 0x0 %d AND 0x1 %d SLT 0x3 %d NOT 0x4 %d SR 0x6 /* I-type instructions |OP|r1|r2| I | ... r1 <- r2(I) */ %d LD 0x8 %d ST 0x9 %d LDA 0xa %d RI 0xb %d IN 0xc %d OUT 0xd %d BAL 0xe %d BZ 0xf %d ITYPE op<15> %d OPCODE op<15:12> %d R2 op<11:10> %d R3 op<9:8> %d R1 op<7:6> %d IRSEL op<1:0> %d I op<7:0> %d eOPCODE eop<15:12> %d eR2 eop<11:10> %d eR3 eop<9:8> %d eR1 eop<7:6> %d eI eop<7:0> %d eITYPE eop<15> %d mOPCODE mop<15:12> %d mR2 mop<11:10> %d mR3 mop<9:8> %d mR1 mop<7:6> %d mI mop<7:0> %d mIRSEL mop<1:0> %d wOPCODE wop<15:12> %d wR2 wop<11:10> %d wR3 wop<9:8> %d wR1 wop<7:6> %d wI wop<7:0> %d wIRSEL wop<1:0> %d ALUADD 0b00001 %d ALUSLT 0b00010 %d ALUAND 0b00100 %d ALUNOT 0b01000 %d ALUSR 0b10000 declare sp1c { input inst<16> ; input dti<8> ; output dto<8> ; output iadrs<8> ; output adrs<8> ; instrin extint; instrout inst_read; instrout memory_read; instrout memory_write; } declare reg4 { input in<8> ; input inadr<2> ; input outadr<2> ; input outbadr<2> ; output out<8> ; output outb<8> ; instrin read; instrin readb; instrin write; instr_arg read(outadr); instr_arg readb(outbadr); instr_arg write(inadr,in); } module top { output dto<8>; output adrs<8>; sp1c cpu; r256_8 ram,iram1,iram2; instruct cpu.inst_read cpu.inst = iram1.read(cpu.iadrs).dout || iram2.read(cpu.iadrs).dout ; instruct cpu.memory_read cpu.dti = ram.read(cpu.adrs).dout ; instruct cpu.memory_write ram.write(cpu.adrs,cpu.dto); } module sp1c { input inst<16> ; input dti<8> ; output dto<8> ; output iadrs<8> ; output adrs<8> ; instrin extint; instrself start; instrself retry; instrself targetif; instrself setbtb; instrout inst_read; instrout memory_read; instrout memory_write; reg pc<8> ; /* program counter */ reg tpc<8> ;/* branch target program counter */ reg mrpc<8> ;/* retry target program counter */ reg_wr st0 ; /* power-on reset capable sequence registers */ reg st1 ; reg st2 ; reg inten, cntintflag, cnten ; reg ir1<8>,ir2<8>,ir3<8>; reg eforcenop, mforcenop; /* force NOP register */ reg op<16> ; reg md<8> ; /* stage registers */ reg dpc<8>, epc<8>, eop<16>, eop1<8>, eop2<8>; reg clasel1<4>, clasel2<4>, alusel<5>, euse0<2>, euse1<2>; reg eusemap<4>, musemap<4>; reg mpc<8>, mop<16>, malu<8>, msrc<8>, mar<8>, muse0<2>, muser1<2>; reg mstb0v, mstb0a<8>, mstb0d<8>; reg mstb1v, mstb1a<8>, mstb1d<8>; reg mldb0v, mldb0a<8>, mldb0r<2>, mldb0d<8>; reg mldb1v, mldb1a<8>, mldb1r<2>, mldb1d<8>; reg mrstagebsy, mst<3>; reg wop<2>, walu<8>, xalu<8>, btb<16>; reg ntif, nretry, dtif<3>, etif<3>, mretry, btbv; reg ru1<4>, ru2<4>, ru3<4>, streq; reg rst0<3>, rst1<3>, rst2<3>; reg rst0v, rst1v, rst2v; reg sst<3>; reg tst<3>; reg ust<3>; reg vst<3>; inc8 inc ; dec8 dec, mdec ; cla8 cla ; reg4 gr,dcdata,dctag; sel useR2, useR3, writeR1, writeR2; sel dtR2<8>, dtR3<8>, stsrc<8>, br_taken, npc<8>; sel dclasel1<4>, dclasel2<4>, dalusel<5>; sel_v er2f<8>, clain1<8>, clain2<8>, aluo<8>; sel stall_req, mretryreq, mtag<8>, mhit; sel mstbhit<2>, mcache<8>, mwbreg<2>; sel rst<3>; sel_v targetpc<8>, btbarg<16>; sel srfbusy; sel R2chk, R3chk; sel_v itif<3>, ipc<8>; instr_arg memory_write(dto); instr_arg targetif(targetpc); instr_arg setbtb(btbarg); stage_name initcore { task initt() ; } stage_name int { task intt() ; } stage_name pipectl { task pipet() ; } stage_name counter { task countert() ; } stage_name if { task ift(pc) ; } stage_name id { task idt(dpc,op,dtif) ; } stage_name exec { task ext(epc,eop,eop1,eop2,etif,clasel1,clasel2,alusel,eusemap) ; } stage_name mem { task mmt(mpc,mop,malu,msrc,mretry,musemap) ; } stage_name wrb { task wbt(wop,walu) ; } /* cache refill stages */ stage_name cachereq { task req(mst) ; } stage_name cachectl { task controlt() ; } stage_name srf { task srft(sst) ; } stage_name trf { task trft(tst) ; } stage_name urf { task urft(ust) ; } stage_name vrf { task vrft(vst) ; } stage_name dcache { task clear() ; } /* Common operations for every stages must be described here */ par{ st0 := 0b1 ; st1 := st0; st2 := st1; any { (st2 == 0b0) & (st1 == 0b1): start(); } } instruct extint generate int.intt(); instruct start generate initcore.initt(); instruct retry par { mforcenop := 0b1; mrpc := mdec.do(mpc).out; nretry := 0b1; } instruct targetif par { eforcenop := 0b1; ntif := 0b1; tpc := targetpc; } instruct setbtb par { btbv := 0b1; btb := btbarg; } stage counter { state_name cntrl; first_state cntrl; state cntrl any { cnten: ir1 := dec.do(ir1).out; cnten & (ir1 == 0x00): cntintflag := 0b1; } } stage int { state_name external_int; first_state external_int; state external_int finish; } stage pipectl { state_name cntrl; first_state cntrl; state cntrl par { /* The r2 field designates the data request for R-form instructions and ST and BZ instructions. While the r3 field designates for both R-form and I-form data requests. The data memory address bus should be stable right after the ST operation cycle, and it will be preserved by the stall request. */ useR2 = ( ^ITYPE | (OPCODE == OUT) | (OPCODE == BZ ) | (OPCODE == ST ) ); useR3 = ( (OPCODE == ADD) | (OPCODE == AND) | (OPCODE == SLT) | (ITYPE & ^(R3 == 0b00)) ); writeR1 = ^ITYPE; writeR2 = (OPCODE == LD ) | (OPCODE == LDA) | (OPCODE == BAL) | (OPCODE == IN); stall_req = (useR2 & R2chk) | (useR3 & R3chk) | streq; /* streq means that stall is requested from the memory refill stage. On the retry case, discard previous pipeline results. */ any { dtif<2>: par { ru2 := 0b0000; ru3 := 0b0000; } else: par { ru2 := ru1; ru3 := ru2; } } /* forwarding control logic for SP/1 */ R2chk = (( ru1<3> & ((R2 @ ru1<2:1>) == 0b00)) & ^dclasel1<1>) | ( mldb0v & ((R2 @ mldb0r) == 0b00)) | ( mldb1v & ((R2 @ mldb1r) == 0b00)) ; R3chk = (( ru1<3> & ((R3 @ ru1<2:1>) == 0b00)) & ^dclasel2<1>) | ( mldb0v & ((R3 @ mldb0r) == 0b00)) | ( mldb1v & ((R3 @ mldb1r) == 0b00)) ; alt { ((0b1 || R2 || 0b0) @ ru1) == 0x0: dclasel1 = 0b0010; ((0b1 || R2 ) @ ru2<3:1> ) == 0b000: dclasel1 = 0b0100; ((0b1 || R2 ) @ ru3<3:1> ) == 0b000: dclasel1 = 0b1000; else: dclasel1 = 0b0001; } alt { ITYPE & (R3 == 0b00): dclasel2 = 0x1; ((0b1 || R3 || 0b0) @ ru1) == 0x0: dclasel2 = 0b0010; ((0b1 || R3 ) @ ru2<3:1> ) == 0b000: dclasel2 = 0b0100; ((0b1 || R3 ) @ ru3<3:1> ) == 0b000: dclasel2 = 0b1000; else: dclasel2 = 0b0001; } any { ^stall_req & id.idt & writeR1 : ru1 := 0b1 || R1 || 0b0; ^stall_req & id.idt & writeR2 : ru1 := 0b1 || R2 || (OPCODE == LD ) | (OPCODE == IN ) | (OPCODE == BAL) ; else: ru1 := 0b0000; } } } stage if { state_name fetch,int1,int2; first_state fetch ; state fetch any { inten & (int.intt | cntintflag) : goto int1; else: par { /* ntif and tpc will be set in the exec stage on the case of taken branch. nretry will be set in the mem stage on the case of instruction retry for the memory operations. */ alt { nretry : par { npc = inc.do(mrpc).out; iadrs = mrpc; } ntif : par { npc = inc.do(tpc).out; iadrs = tpc; } else : par { npc = inc.do(pc).out; iadrs = pc; } } any { /* stall_req will be issued in the decode stage. Because there is no way to cancel the relayed nor generated stages, stall_req is not a latched signal. Be careful for the signal delay.*/ stall_req == 0b0: par { any { ntif : ntif := 0b0; nretry : nretry := 0b0; } /* any */ any { btbv & ((npc @ btb<15:8>) == 0x00): generate if.ift(btb<7:0>); else: generate if.ift(npc); } /* any */ relay id.idt(npc, inst_read().inst, nretry || (btbv & ((npc @ btb<15:8>) == 0x00)) || ntif) ; } /* par */ } /* any */ } /* par */ } /* any */ state int1 par { inten := 0b0; goto int2; } state int2 par { /* At this state the last instruction in pipe will sit on the mem stage, and the npc will show the real address for returning interrupt. */ targetif(ir2); ir3 := npc; goto fetch; } } stage id { state_name decode ; first_state decode ; state decode par { dtR2 = gr.read(R2).out; any { ITYPE & (R3 == 0b00):dtR3 = 0x00; else :dtR3 = gr.readb(R3).outb; } any { OPCODE == SLT : dalusel = ALUSLT; OPCODE == AND : dalusel = ALUAND; OPCODE == NOT : dalusel = ALUNOT; OPCODE == SR : dalusel = ALUSR; else : dalusel = ALUADD; } any { ^stall_req: relay exec.ext(dpc, op, dtR2, dtR3 ,dtif, dclasel1, dclasel2, dalusel, writeR1 || writeR2 || useR2 || useR3) ; } /* any */ } /* par */ } /* stage */ stage exec { state_name execute ; first_state execute ; state execute par { br_taken = (eOPCODE == BAL) | ((eOPCODE == BZ) & (er2f == 0x00)); /* alu input1 forwarding control */ any { eITYPE: clain1 = eI; else: clain1 = er2f; } /* alu input2 forwarding control */ any { clasel2<0>: clain2 = eop2; clasel2<1>: clain2 = malu; clasel2<2>: clain2 = walu; clasel2<3>: clain2 = xalu; } /* alu er2 forwarding control */ any { clasel1<0>: er2f = eop1; clasel1<1>: er2f = malu; clasel1<2>: er2f = walu; clasel1<3>: er2f = xalu; } any { alusel<0>: aluo = cla.do(0b0, clain1, clain2).out; alusel<1>: aluo = 0b0000000 || cla.do(0b1, clain1, ^clain2).out<7>; alusel<2> : aluo = clain1 & clain2; alusel<3> : aluo = ^clain1; alusel<4> : aluo = 0b0 || clain1<7:1> ; } any { (eforcenop == 0b0) | (etif<3> == 0b1) | (etif<0> == 0b1): par { any { (eOPCODE == RI): par { targetif(ir3); finish; } br_taken: par { setbtb(epc || cla.out); any { (eOPCODE == BZ): finish; else: relay mem.mmt( epc, eop, aluo, er2f, etif<2>, eusemap); } any { /* prediction miss or not predicted case */ ^etif<1> | ^((btb<15:8> @ epc) == 0x00) | ^((btb<7:0> @ cla.out) == 0x00) : targetif(cla.out); else: eforcenop := 0b0; } } /* par */ else: par { any { /* prediction miss not taken case */ etif<1> & ((btb<15:8> @ epc) == 0x00): par { targetif(epc); /* on the case of miss BTB will be discarded */ btbv := 0b0; } else: eforcenop := 0b0; } any { (eOPCODE == BZ): finish; else: relay mem.mmt( epc, eop, aluo, er2f, etif<2>, eusemap); } } /* par */ } /* any */ } /* par */ else: finish; } /* any */ } /* par */ } stage mem { state_name memop ; first_state memop ; state memop par { /* All information for the cache refill operation will be passed via the load/store buffers and the buffer will be designated by the 'mst' register. */ any { (mforcenop == 0b0) | (mretry == 0b1): par { /* retry at mem stage will be initiated on the following conditions: 1. R2 is in the pending load buffer 2. R3 is in the pending load buffer 3. destination is in the pending load buffer (WAW hazard) 4. miss hit LD and load buffer full 5. ST and store buffer full */ mretryreq = (musemap<1> & ( (mldb0v & ((mR2 @ mldb0r) == 0b00)) | (mldb1v & ((mR2 @ mldb1r) == 0b00)) )) | (musemap<0> & ( (mldb0v & ((mR3 @ mldb0r) == 0b00)) | (mldb1v & ((mR3 @ mldb1r) == 0b00)) )) | (musemap<3> & ( (mldb0v & ((mR1 @ mldb0r) == 0b00)) | (mldb1v & ((mR1 @ mldb1r) == 0b00)) )) | (musemap<2> & ( (mldb0v & ((mR2 @ mldb0r) == 0b00)) | (mldb1v & ((mR2 @ mldb1r) == 0b00)) )) ; any { musemap<3>: mwbreg = mR1; musemap<2>: mwbreg = mR2; } any { mretryreq: par { retry(); finish; } else: par { mtag = dctag.read(malu<1:0>).out; mhit = mtag<0> & ((mtag<7:2> @ malu<7:2>) == 0b000000); mstbhit = (mstb0v & ((malu @ mstb0a) == 0x00)) || (mstb1v & ((malu @ mstb1a) == 0x00)) ; par { any { mforcenop: mforcenop := 0b0; } any { mOPCODE == LD : par { mcache = dcdata.read(malu<1:0>).out; /* we should reffer store buffers first, and if not hit in the buffer then check the cache hit. */ alt { mstbhit<0>: relay wrb.wbt(mwbreg, mstb0d ) ; mstbhit<1>: relay wrb.wbt(mwbreg, mstb1d ) ; mhit: relay wrb.wbt(mwbreg, mcache ) ; else: alt { ^mldb0v: par{ mldb0v := 0b1; mldb0r := mR2; mldb0a := malu; relay cachereq.req(0b000); } ^mldb1v: par{ mldb1v := 0b1; mldb1r := mR2; mldb1a := malu; relay cachereq.req(0b001); } else: par { retry(); finish; } } /*alt*/ }/*alt*/ }/*par*/ mOPCODE == ST : par { alt { ^mstb0v: par{ mstb0v := 0b1; mstb0d := msrc; mstb0a := malu; relay cachereq.req(0b1 || mhit || 0b0); } ^mstb1v: par{ mstb1v := 0b1; mstb1d := msrc; mstb1a := malu; relay cachereq.req(0b1 || mhit || 0b1); } else: par { retry(); finish; } } } mOPCODE == BAL : relay wrb.wbt(mwbreg, mpc) ; mOPCODE == IN: any { malu == 0x00: relay wrb.wbt(mwbreg, int.intt || cntintflag || 0b0000 || cnten || inten ) ; malu == 0x01: relay wrb.wbt(mwbreg, ir1) ; malu == 0x02: relay wrb.wbt(mwbreg, ir2) ; malu == 0x03: relay wrb.wbt(mwbreg, ir3) ; } (mOPCODE == OUT) :par { any { malu == 0x00: par { cntintflag:=msrc<6>; cnten:=msrc<1>; inten:=msrc<0>; } malu == 0x01: ir1:=msrc; malu == 0x02: ir2:=msrc; malu == 0x03: ir3:=msrc; } finish; } else : relay wrb.wbt(mwbreg, malu) ; } /* any */ } /* par */ } /* par */ } /* any */ } /* par */ else: par { finish; } } /* any */ } /* par */ } stage wrb { state_name writeback ; first_state writeback ; state writeback par { xalu := walu; gr.write(wop,walu); finish; } } /* resetting registers */ stage initcore { state_name ini0,ini1,ini2; first_state ini0; state ini0 par { generate pipectl.pipet(); generate cachectl.controlt(); generate counter.countert(); generate dcache.clear(); inten := 0b0; cnten := 0b0; cntintflag := 0b0; btb := 0x0000; btbv := 0b0; op := 0x0000; eforcenop :=0b0; mforcenop :=0b0; mrstagebsy :=0b0; mldb0v :=0b0; mldb1v :=0b0; mstb0v :=0b0; mstb1v :=0b0; ntif :=0b0; dtif :=0b000; etif :=0b000; nretry :=0b0; ru1 :=0x0; ru2 := 0x0; ru3 := 0x0; streq :=0b0; rst0v := 0b0; rst1v := 0b0; rst2v := 0b0; goto ini1; } state ini1 goto ini2; state ini2 par { generate if.ift(0x00); finish; } } stage dcache { state_name cl0,cl1,cl2,cl3 ; first_state cl0 ; state cl0 par { dctag.write(0b00,0x00); goto cl1; } state cl1 par { dctag.write(0b01,0x00); goto cl2; } state cl2 par { dctag.write(0b10,0x00); goto cl3; } state cl3 par { dctag.write(0b11,0x00); finish; } } stage cachereq { state_name i; first_state i; state i par { finish; } } stage cachectl { state_name st ; first_state st ; state st par { any { /* PARTHENON does not drive a signal when a stage is not initiated, we now check the stage task and set the signal by ourself. */ ^srf.srft: srfbusy = 0b0; ^srfbusy | cachereq.req: alt { rst2v: par { rst = rst2; rst2v := cachereq.req; } rst1v: par { rst = rst1; rst2v := srfbusy & cachereq.req; rst1v := cachereq.req; } rst0v: par { rst = rst0; rst1v := srfbusy & cachereq.req; rst0v := cachereq.req; } else : par { rst = mst; rst0v := srfbusy & cachereq.req; } } } /* The streq makes a slot of mem stage on the following streams. On the store instruction, only cache hit case needs the streq. rst<2>: store/load reqest rst<1>: hit on the store operation(cache write requested) rst<0>: buffer 1/0 select The 'rst0' through 'rst2' makes a FIFO buffer. */ any { cachereq.req: par { alt { rst2v: par { rst0 := mst; rst1 := rst0; rst2 := rst1; } rst1v: par { rst0 := mst; rst1 := rst0; rst2 := rst1; } rst0v: par { rst0 := mst; rst1 := rst0; } else: par { rst0 := mst; } } } /* The load/store address must be stable for two cycles. Then when the srf task is empty and there are refill request or pending refill requests, we will start the following stages. If you need more cycles for memory operations, the srfbusy must be set properly. The 'streq' signal is feed to the ID stage, and it makes a slot in the following pipeline for using the resources in the cache pipeline on the case of miss load or hit store. */ ^srfbusy & (cachereq.req | rst0v): par { streq := ^rst<2> | rst<1> ; generate srf.srft(rst); any { (rst<2> || rst<0>)== 0b00: /* load buffer 0 */ mar := mldb0a; (rst<2> || rst<0>) == 0b01: /* load buffer 1 */ mar := mldb1a; (rst<2> || rst<0>) == 0b10: /* store buffer 0 */ mar := mstb0a; (rst<2> || rst<0>) == 0b11: /* store buffer 1 */ mar := mstb1a; }/* any */ } /* par */ } /* any */ adrs = mar; } /* par */ } stage srf { state_name st ; first_state st ; state st par { srfbusy = 0b1; any { sst<2> : any { ^sst<0>: memory_write(mstb0d); sst<0>: memory_write(mstb1d); } ^sst<2>: any { ^sst<0>: mldb0d := memory_read().dti ; sst<0>: mldb1d := memory_read().dti ; } streq: streq := 0b0; } any { /* On the case of load or hit store, cache must be maintained. */ ^sst<2> | sst<1> : relay trf.trft(sst); /* Not hit store will be terminated here. */ else: par { any { ^sst<0>: mstb0v := 0b0; sst<0>: mstb1v := 0b0; } finish; } } } } /* The stage trf is corresponding to the exec stage of the stalled instruction. */ stage trf { state_name st ; first_state st ; state st par { relay urf.urft(tst); } } /* The stage urf is corresponding to the mem stage of the stalled instruction. We can modify the cache safely. */ stage urf { state_name st ; first_state st ; state st par { any { ust<2> : par { finish; any { /* store request */ ^ust<0>: par { /* buffer 0 */ dcdata.write(mstb0a<1:0>, mstb0d); dctag.write(mstb0a<1:0>, mstb0a<7:2> || 0b01); mstb0v := 0b0; } ust<0>: par { /* buffer 1 */ dcdata.write(mstb1a<1:0>, mstb1d); dctag.write(mstb1a<1:0>, mstb1a<7:2> || 0b01); mstb1v := 0b0; } } } ^ust<2>: par { relay vrf.vrft(ust); any { /* load request */ ^ust<0>: par { /* buffer 0 */ dcdata.write(mldb0a<1:0>, mldb0d); dctag.write(mldb0a<1:0>, mldb0a<7:2> || 0b01); } ust<0>: par { /* buffer 1 */ dcdata.write(mldb1a<1:0>, mldb1d); dctag.write(mldb1a<1:0>, mldb1a<7:2> || 0b01); } } } } } } /* The stage vrf is corresponding to the wb stage of the stalled instruction. We can modify the register safely. */ stage vrf { state_name st ; first_state st ; state st par { any { ^ust<0>: par { /* buffer 0 */ mldb0v := 0b0; gr.write(mldb0r, mldb0d); } ust<0>: par { /* buffer 1 */ mldb1v := 0b0; gr.write(mldb1r, mldb1d); } } finish; } } } module reg4 { input in<8> ; input inadr<2> ; input outadr<2> ; input outbadr<2> ; output out<8> ; output outb<8> ; instrin read; instrin readb; instrin write; reg r0<8>,r1<8>,r2<8>,r3<8> ; instruct read any { outadr == 0b00: out = r0; outadr == 0b01: out = r1; outadr == 0b10: out = r2; outadr == 0b11: out = r3; } instruct readb any { outbadr == 0b00: outb = r0; outbadr == 0b01: outb = r1; outbadr == 0b10: outb = r2; outbadr == 0b11: outb = r3; } instruct write any { inadr == 0b00: r0 := in; inadr == 0b01: r1 := in; inadr == 0b10: r2 := in; inadr == 0b11: r3 := in; } }