在龙芯1C上移植硬浮点FPU到RT-Thread

来源:互联网 发布:点击进入f2c最新域名 编辑:程序博客网 时间:2024/06/06 15:22
已经将龙芯1C的FPU移植到裸机编程了,如果对龙芯1C的FPU不太了解,不妨先移步到《【龙芯1c库】移植硬浮点FPU》http://blog.csdn.net/caogos/article/details/77219853,该篇文章以《see mips run》为理论依据,并结合龙芯1C的实际,详细讲解了移植FPU的要点,包括移植到实时系统。

本文以RT-Thread为例,继续分享我是如何在上一篇文章的指导下一步一步移植FPU到RT-Thread的。

移植要点

先来回顾《【龙芯1c库】移植硬浮点FPU》中的移植要点
1,和裸机编程一样,需要初始化FPU,初始化的函数也是一样的
2,需要在中断和上下文切换时保存用于浮点运算的16个寄存器$f0,$f2,$f4, ...... ,$f28,$f30

其中,要点一说的FPU初始化函数如下,

/** * init hardware FPU */void rt_hw_fpu_init(void){    rt_uint32_t c0_status = 0;    rt_uint32_t c1_status = 0;    // 使能协处理器1--FPU    c0_status = read_c0_status();    c0_status |= (ST0_CU1 | ST0_FR);    write_c0_status(c0_status);    // 配置FPU    c1_status = read_c1_status();    c1_status |= (FPU_CSR_FS | FPU_CSR_FO | FPU_CSR_FN);    // set FS, FO, FN    c1_status &= ~(FPU_CSR_ALL_E);                          // disable exception    c1_status = (c1_status & (~FPU_CSR_RM)) | FPU_CSR_RN;   // set RN    write_c1_status(c1_status);    return ;}

只需在“bsp\ls1cdev\drivers\board.c”的函数rt_hw_board_init()中调用即可。
本文重点放在要点2上

在中断和上下文切换时保存用于浮点运算的浮点寄存器

分析中断时保存通用寄存器的SAVE_ALL和RESTORE_ALL_AND_RET

分析SAVE_ALL

先来看下SAVE_ALL的源码截图

宏SAVE_ALL有多个宏组成,第一个宏为SAVE_SOME,下面详细分析SAVE_SOME。
move    k1, sp  的功能是k1=sp,即sp的值赋给k1
move    k0, sp  的功能是k0=sp
PTR_SUBU sp, k1, PT_SIZE 的功能是 sp = k1 - PT_SIZE,即把栈指针sp向下移动
LONG_S    k0, PT_R29(sp)  的功能是 *(sp + PT_R28) = k0,即将k0(之前的sp值)压栈
LONG_S    $3, PT_R3(sp)   的功能是 *(sp + PT_R3) = $3,即将通用寄存器$3中的值压栈
……
后面的类似,都是压栈,包括后面的宏SAVE_AT,宏SAVE_TEMP,宏SAVE_STATIC。
汇编中用到的宏PT_R29、PT_R3等,表示相应的寄存器$29、$3等在栈中相对sp的偏移地址。宏PT_SIZE为所有寄存器的入栈后占用的总的空间大小。具体源码如下

#define PT_R0(0)/* 0 */#define PT_R1((PT_R0) + LONGSIZE)/* 1 */#define PT_R2((PT_R1) + LONGSIZE)/* 2 */#define PT_R3((PT_R2) + LONGSIZE)/* 3 */#define PT_R4((PT_R3) + LONGSIZE)/* 4 */#define PT_R5((PT_R4) + LONGSIZE)/* 5 */#define PT_R6((PT_R5) + LONGSIZE)/* 6 */#define PT_R7((PT_R6) + LONGSIZE)/* 7 */#define PT_R8((PT_R7) + LONGSIZE)/* 8 */#define PT_R9((PT_R8) + LONGSIZE)/* 9 */#define PT_R10((PT_R9) + LONGSIZE)/* 10 */#define PT_R11((PT_R10) + LONGSIZE)/* 11 */#define PT_R12((PT_R11) + LONGSIZE)/* 12 */#define PT_R13((PT_R12) + LONGSIZE)/* 13 */#define PT_R14((PT_R13) + LONGSIZE)/* 14 */#define PT_R15((PT_R14) + LONGSIZE)/* 15 */#define PT_R16((PT_R15) + LONGSIZE)/* 16 */#define PT_R17((PT_R16) + LONGSIZE)/* 17 */#define PT_R18((PT_R17) + LONGSIZE)/* 18 */#define PT_R19((PT_R18) + LONGSIZE)/* 19 */#define PT_R20((PT_R19) + LONGSIZE)/* 20 */#define PT_R21((PT_R20) + LONGSIZE)/* 21 */#define PT_R22((PT_R21) + LONGSIZE)/* 22 */#define PT_R23((PT_R22) + LONGSIZE)/* 23 */#define PT_R24((PT_R23) + LONGSIZE)/* 24 */#define PT_R25((PT_R24) + LONGSIZE)/* 25 */#define PT_R26((PT_R25) + LONGSIZE)/* 26 */#define PT_R27((PT_R26) + LONGSIZE)/* 27 */#define PT_R28((PT_R27) + LONGSIZE)/* 28 */#define PT_R29((PT_R28) + LONGSIZE)/* 29 */#define PT_R30((PT_R29) + LONGSIZE)/* 30 */#define PT_R31((PT_R30) + LONGSIZE)/* 31 *//* * Saved special registers */#define PT_STATUS((PT_R31) + LONGSIZE)/* 32 */#define PT_HI((PT_STATUS) + LONGSIZE)/* 33 */#define PT_LO((PT_HI) + LONGSIZE)/* 34 */#define PT_BADVADDR((PT_LO) + LONGSIZE)/* 35 */#define PT_CAUSE((PT_BADVADDR) + LONGSIZE)/* 36 */#define PT_EPC((PT_CAUSE) + LONGSIZE)/* 37 */#define PT_SIZE((((PT_EPC) + LONGSIZE) + (PTRSIZE-1)) & ~(PTRSIZE-1))

比如,寄存器$0的偏移地址为0,所以有“#define PT_R0        (0)”,寄存器$1的偏移地址为寄存器$0的地址加上寄存器$0的大小(4字节),所以有“#define PT_R1        ((PT_R0) + LONGSIZE)”,其它的类似。

分析RESTORE_ALL_AND_RET

宏RESTORE_ALL_AND_RET的源码为

.macroRESTORE_ALL_AND_RETRESTORE_TEMPRESTORE_STATICRESTORE_ATRESTORE_SOMERESTORE_SP_AND_RET.endm

从宏名字和执行顺序看,首先是将temp类的寄存器出栈,倒数第二才是RESTORE_SOME,最后才是RESTORE_SP_AND_RET。
宏RESTORE_SP_AND_RET的源码为

.macroRESTORE_SP_AND_RETLONG_Lsp, PT_R29(sp).setmips3eret.setmips0.endm

真正的汇编指令就两条“LONG_L    sp, PT_R29(sp)”和“eret”。其中“LONG_L    sp, PT_R29(sp)”的功能是把sp的值从栈中弹出,“eret”为中断返回

依葫芦画瓢,实现SAVE_FPU和RESTORE_FPU

计算各个寄存器在栈中相对sp的偏移

寄存器$f0的偏移 = 0,
用代码表示为“#define PT_FPU_R0               (0)”
寄存器$f2的偏移 = $f0的偏移 + 8字节,
用代码表示为“#define PT_FPU_R2               ((PT_FPU_R0) + 2*LONGSIZE)”
寄存器$f4的偏移 = $f2的偏移 + 8字节
#define PT_FPU_R4               ((PT_FPU_R2) + 2*LONGSIZE)
以此类推
……
寄存器$f30的偏移 = $f28的偏移 + 8字节
#define PT_FPU_R30              ((PT_FPU_R28) + 2*LONGSIZE)
这16个寄存器在栈中占用的大小 = $f30的偏移 + 8字节
#define PT_FPU_SIZE             ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1))
为什么还要加上(2*PTRSIZE-1),然后& ~(2*PTRSIZE-1) ?
为了8字节对齐
完整的代码为

#define PT_FPU_R0               (0)#define PT_FPU_R2               ((PT_FPU_R0) + 2*LONGSIZE)#define PT_FPU_R4               ((PT_FPU_R2) + 2*LONGSIZE)#define PT_FPU_R6               ((PT_FPU_R4) + 2*LONGSIZE)#define PT_FPU_R8               ((PT_FPU_R6) + 2*LONGSIZE)#define PT_FPU_R10              ((PT_FPU_R8) + 2*LONGSIZE)#define PT_FPU_R12              ((PT_FPU_R10) + 2*LONGSIZE)#define PT_FPU_R14              ((PT_FPU_R12) + 2*LONGSIZE)#define PT_FPU_R16              ((PT_FPU_R14) + 2*LONGSIZE)#define PT_FPU_R18              ((PT_FPU_R16) + 2*LONGSIZE)#define PT_FPU_R20              ((PT_FPU_R18) + 2*LONGSIZE)#define PT_FPU_R22              ((PT_FPU_R20) + 2*LONGSIZE)#define PT_FPU_R24              ((PT_FPU_R22) + 2*LONGSIZE)#define PT_FPU_R26              ((PT_FPU_R24) + 2*LONGSIZE)#define PT_FPU_R28              ((PT_FPU_R26) + 2*LONGSIZE)#define PT_FPU_R30              ((PT_FPU_R28) + 2*LONGSIZE)#define PT_FPU_SIZE             ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1))

实现SAVE_FPU

首先,记录一下当前sp值
move k1, sp
然后,判断当前sp是否是8字节对齐,不是,则向下移动sp(栈是向下生长的),使其8字节对齐
and k0, k1, 0xFFFFFFF8
将sp向下移动PT_FPU_SIZE字节,腾出空间来存放FPU的16个寄存器的值
PTR_SUBU sp, k0, PT_FPU_SIZE
然依次将16个寄存器压栈
将$f0压栈
s.d $f0, PT_FPU_R0(sp)
将$f2压栈
s.d $f2, PT_FPU_R2(sp)
……
……
……
将$f30压栈
s.d $f30, PT_FPU_R30(sp)

完整的代码为

    .macro SAVE_FPU    .set push    .set noreorder    move k1, sp                     /* 保存现场 */    and k0, k1, 0xFFFFFFF8          /* 8字节对齐 */        PTR_SUBU sp, k0, PT_FPU_SIZE    /* 计算栈底 */    s.d $f0, PT_FPU_R0(sp)    s.d $f2, PT_FPU_R2(sp)    s.d $f4, PT_FPU_R4(sp)    s.d $f6, PT_FPU_R6(sp)    s.d $f8, PT_FPU_R8(sp)    s.d $f10, PT_FPU_R10(sp)    s.d $f12, PT_FPU_R12(sp)    s.d $f14, PT_FPU_R14(sp)    s.d $f16, PT_FPU_R16(sp)    s.d $f18, PT_FPU_R18(sp)    s.d $f20, PT_FPU_R20(sp)    s.d $f22, PT_FPU_R22(sp)    s.d $f24, PT_FPU_R24(sp)    s.d $f26, PT_FPU_R26(sp)    s.d $f28, PT_FPU_R28(sp)    s.d $f30, PT_FPU_R30(sp)    move sp, k1                     /* 恢复现场 */    .set reorder    .set pop    .endm


实现RESTORE_FPU

和使用s.d指令压栈相反,使用l.d指令则是将栈内指定单元的内容弹出。注意,这里弹出的不一定是栈顶,而是根据指令中sp的偏移决定。
比如,将$f0出栈
l.d $f0, PT_FPU_R0(sp)
将$f2出栈
l.d $f2, PT_FPU_R2(sp)

完整的代码为

    .macro RESTORE_FPU    .set push    .set noreorder    move k1, sp                     /* 保存现场 */    and k0, k1, 0xFFFFFFF8          /* 8字节对齐 */    PTR_SUBU sp, k0, PT_FPU_SIZE    /* 计算栈底*/    l.d $f0, PT_FPU_R0(sp)    l.d $f2, PT_FPU_R2(sp)    l.d $f4, PT_FPU_R4(sp)    l.d $f6, PT_FPU_R6(sp)    l.d $f8, PT_FPU_R8(sp)    l.d $f10, PT_FPU_R10(sp)    l.d $f12, PT_FPU_R12(sp)    l.d $f14, PT_FPU_R14(sp)    l.d $f16, PT_FPU_R16(sp)    l.d $f18, PT_FPU_R18(sp)    l.d $f20, PT_FPU_R20(sp)    l.d $f22, PT_FPU_R22(sp)    l.d $f24, PT_FPU_R24(sp)    l.d $f26, PT_FPU_R26(sp)    l.d $f28, PT_FPU_R28(sp)    l.d $f30, PT_FPU_R30(sp)    move sp, k1                     /* 恢复现场 */    .set reorder    .set pop    .endm


注意:宏SAVE_FPU和宏RESTORE_FPU中使用的汇编指令s.d和l.d是双精度的,会被自动汇编成两条汇编指令,自动把相邻的奇数号也保存了。《see mips run》中的说明如下


不影响正常使用的小bug

按道理说,SAVE_FPU应该属于SAVE_ALL的一部分,应该追加在SAVE_ALL的最后。可是SAVE_ALL的源码位于RT-Thread的目录“libcpu\mips\common”内,也就是说可能其它mips的cpu也会使用SAVE_ALL,这就决定了不能随意修改这个源文件。假设在SAVE_ALL后追加了SAVE_FPU,某款mips cpu也调用了SAVE_ALL,可是该款cpu默认没有使能FPU(君正x1000默认就没有使能FPU),或者根本没有FPU(龙芯1B就没有FPU),这种情况下,执行浮点指令可能会异常。
所以,只有将龙芯1c的FPU的SAVE_FPU和RESTORE_FPU单独放在一个源文件中,并放在龙芯1c的目录内。
前面讨论的SAVE_FPU和RESTORE_FPU还不全,里面没有涉及将sp压栈和出栈。理论上应该在SAVE_FPU中将sp压栈,然后再RESTORE_FPU中将sp出栈。
假设成功启动后,RT-Thread第一次进行任务切换,首先调用SAVE_ALL和SAVE_FPU,这是没问题的;然后切换到另外一个任务(线程),调用RESTORE_FPU和RESTORE_ALL_AND_RET,这时候就有问题了,请问该线程什么时候把FPU的16个寄存器压栈了,即执行了SAVE_FPU,如果没有,那么现在执行RESTORE_FPU是不是不对呀?
考虑到,被抢占了cpu的任务,再未重新获得cpu之前,不会再次被抢占。即SAVE_ALL之后,再没有RESTORE_ALL_AND_RET之前,不会再SAVE_ALL。也就是没有嵌套的可能。既然这样,那么SAVE_FPU和RESTORE_FPU前后不移动sp,只压栈,出栈。即在执行SAVE_FPU后,重新将sp指针指向SAVE_FPU之前的位置,RESTORE_FPU也类似。虽然这样看起来有点怪的,但是再SAVE_ALL后,该任务(线程)处于等待状态,不会压栈,也就不会踩到刚执行SAVE_FPU时保存的FPU寄存器信息,所以这种方案是可行的。
经过实际上机测试,RT-Thread能正常运转,浮点运算也正确,所以暂时就采用这种方案,虽然有点怪怪的。如有哪位大神有更好的方案,请直接修改后,直接提交到RT-Thread官方git上,能在后面留个言就更好了,谢谢!

小技巧

在c代码中,使用volatile unsigned int test_fpu_before_sp定义一个全局变量test_fpu_before_sp,然后再汇编代码中使用一下两条汇编指令

la k0, test_fpu_before_spsw sp, 0(k0)
就用这两条汇编指令就可以将当前sp保存到全局变量test_fpu_before_sp中,然后可以在c语言中将其打印出来。

源码清单

stackframe_fpu.h

libcpu\mips\loongson_1c\stackframe_fpu.h

/* * ls1c FPU's stackframe * 最开始本想,将代码加入到stackframe.h中的SAVE_ALL, RESTORE_ALL和RESTORE_ALL_AND_RET中, * 但考虑到源文件"stackframe.h"位于目录"libcpu\mips\common"内,怕影响到其它mips cpu * 所以,另外新建本源文件 */#ifndef __OPENLOONGSON_STACKFRAME_FPU_H#define __OPENLOONGSON_STACKFRAME_FPU_H#include "../common/asm.h"#include "../common/mipsregs.h"#include "../common/stackframe.h"#define PT_FPU_R0               (0)#define PT_FPU_R2               ((PT_FPU_R0) + 2*LONGSIZE)#define PT_FPU_R4               ((PT_FPU_R2) + 2*LONGSIZE)#define PT_FPU_R6               ((PT_FPU_R4) + 2*LONGSIZE)#define PT_FPU_R8               ((PT_FPU_R6) + 2*LONGSIZE)#define PT_FPU_R10              ((PT_FPU_R8) + 2*LONGSIZE)#define PT_FPU_R12              ((PT_FPU_R10) + 2*LONGSIZE)#define PT_FPU_R14              ((PT_FPU_R12) + 2*LONGSIZE)#define PT_FPU_R16              ((PT_FPU_R14) + 2*LONGSIZE)#define PT_FPU_R18              ((PT_FPU_R16) + 2*LONGSIZE)#define PT_FPU_R20              ((PT_FPU_R18) + 2*LONGSIZE)#define PT_FPU_R22              ((PT_FPU_R20) + 2*LONGSIZE)#define PT_FPU_R24              ((PT_FPU_R22) + 2*LONGSIZE)#define PT_FPU_R26              ((PT_FPU_R24) + 2*LONGSIZE)#define PT_FPU_R28              ((PT_FPU_R26) + 2*LONGSIZE)#define PT_FPU_R30              ((PT_FPU_R28) + 2*LONGSIZE)#define PT_FPU_SIZE             ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1))    .macro SAVE_FPU    .set push    .set noreorder    move k1, sp                     /* 保存现场 */    and k0, k1, 0xFFFFFFF8          /* 8字节对齐 */        PTR_SUBU sp, k0, PT_FPU_SIZE    /* 计算栈底 */    s.d $f0, PT_FPU_R0(sp)    s.d $f2, PT_FPU_R2(sp)    s.d $f4, PT_FPU_R4(sp)    s.d $f6, PT_FPU_R6(sp)    s.d $f8, PT_FPU_R8(sp)    s.d $f10, PT_FPU_R10(sp)    s.d $f12, PT_FPU_R12(sp)    s.d $f14, PT_FPU_R14(sp)    s.d $f16, PT_FPU_R16(sp)    s.d $f18, PT_FPU_R18(sp)    s.d $f20, PT_FPU_R20(sp)    s.d $f22, PT_FPU_R22(sp)    s.d $f24, PT_FPU_R24(sp)    s.d $f26, PT_FPU_R26(sp)    s.d $f28, PT_FPU_R28(sp)    s.d $f30, PT_FPU_R30(sp)    move sp, k1                     /* 恢复现场 */    .set reorder    .set pop    .endm    .macro RESTORE_FPU    .set push    .set noreorder    move k1, sp                     /* 保存现场 */    and k0, k1, 0xFFFFFFF8          /* 8字节对齐 */    PTR_SUBU sp, k0, PT_FPU_SIZE    /* 计算栈底*/    l.d $f0, PT_FPU_R0(sp)    l.d $f2, PT_FPU_R2(sp)    l.d $f4, PT_FPU_R4(sp)    l.d $f6, PT_FPU_R6(sp)    l.d $f8, PT_FPU_R8(sp)    l.d $f10, PT_FPU_R10(sp)    l.d $f12, PT_FPU_R12(sp)    l.d $f14, PT_FPU_R14(sp)    l.d $f16, PT_FPU_R16(sp)    l.d $f18, PT_FPU_R18(sp)    l.d $f20, PT_FPU_R20(sp)    l.d $f22, PT_FPU_R22(sp)    l.d $f24, PT_FPU_R24(sp)    l.d $f26, PT_FPU_R26(sp)    l.d $f28, PT_FPU_R28(sp)    l.d $f30, PT_FPU_R30(sp)    move sp, k1                     /* 恢复现场 */    .set reorder    .set pop    .endm#endif

context_gcc.S

libcpu\mips\loongson_1c\context_gcc.S

/* * File      : context_gcc.S * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006 - 2011, RT-Thread Development Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date           Author       Notes * 2010-05-17     swkyer       first version * 2010-09-11     bernard      port to Loongson SoC3210 * 2011-08-08     lgnq         port to Loongson LS1B */#include "../common/mips.inc"#include "../common/stackframe.h"#include "stackframe_fpu.h"    .section ".text", "ax"    .set noreorder/* * rt_base_t rt_hw_interrupt_disable() */    .globl rt_hw_interrupt_disablert_hw_interrupt_disable:    mfc0    v0, CP0_STATUS    and     v1, v0, 0xfffffffe    mtc0    v1, CP0_STATUS    jr      ra    nop/* * void rt_hw_interrupt_enable(rt_base_t level) */    .globl rt_hw_interrupt_enablert_hw_interrupt_enable:    ori     a0, 0x00000800       mtc0    a0, CP0_STATUS    ehb    mfc0    v0, CP0_CAUSE    ehb    or      v1, v0, 0x800000                 //EBASE + 0x200                                                                                                     mtc0    v1, CP0_CAUSE    ehb    jr      ra    nop/* * void rt_hw_context_switch(rt_uint32 from, rt_uint32 to) * a0 --> from * a1 --> to */    .globl rt_hw_context_switchrt_hw_context_switch:    mtc0    ra, CP0_EPC    SAVE_ALL    SAVE_FPU    sw      sp, 0(a0)       /* store sp in preempted tasks TCB */    lw      sp, 0(a1)       /* get new task stack pointer */    RESTORE_FPU    RESTORE_ALL_AND_RET/* * void rt_hw_context_switch_to(rt_uint32 to)/* * a0 --> to */    .globl rt_hw_context_switch_tort_hw_context_switch_to:    lw      sp, 0(a0)       /* get new task stack pointer */    RESTORE_FPU    RESTORE_ALL_AND_RET/* * void rt_hw_context_switch_interrupt(rt_uint32 from, rt_uint32 to)/* */    .globl rt_thread_switch_interrupt_flag    .globl rt_interrupt_from_thread    .globl rt_interrupt_to_thread    .globl rt_hw_context_switch_interruptrt_hw_context_switch_interrupt:    la      t0, rt_thread_switch_interrupt_flag    lw      t1, 0(t0)    nop    bnez    t1, _reswitch    nop    li      t1, 0x01                       /* set rt_thread_switch_interrupt_flag to 1 */    sw      t1, 0(t0)    la      t0, rt_interrupt_from_thread   /* set rt_interrupt_from_thread */    sw      a0, 0(t0)_reswitch:    la      t0, rt_interrupt_to_thread     /* set rt_interrupt_to_thread */    sw      a1, 0(t0)    jr      ra    nop/* * void rt_hw_context_switch_interrupt_do(rt_base_t flag) */    .globl rt_interrupt_enter    .globl rt_interrupt_leave    .globl mips_irq_handlemips_irq_handle:    SAVE_ALL    SAVE_FPU    mfc0    t0, CP0_CAUSE    and     t1, t0, 0xffbnezt1, spurious_interrupt/* check exception */nop/* let k0 keep the current context sp */    move    k0, sp     /* switch to kernel stack */    li      sp, SYSTEM_STACK    jal     rt_interrupt_enter    nop    jal     rt_interrupt_dispatch    nop    jal     rt_interrupt_leave    nop    /* switch sp back to thread's context */    move    sp, k0    /*     * if rt_thread_switch_interrupt_flag set, jump to     * rt_hw_context_switch_interrupt_do and don't return     */    la      k0, rt_thread_switch_interrupt_flag    lw      k1, 0(k0)    beqz    k1, spurious_interrupt    nop    sw      zero, 0(k0)                     /* clear flag */nop    /*     * switch to the new thread     */    la      k0, rt_interrupt_from_thread    lw      k1, 0(k0)    nop    sw      sp, 0(k1)                       /* store sp in preempted tasks's TCB */    la      k0, rt_interrupt_to_thread    lw      k1, 0(k0)    nop    lw      sp, 0(k1)                       /* get new task's stack pointer */    j       spurious_interrupt    nopspurious_interrupt:    RESTORE_FPU    RESTORE_ALL_AND_RET    .set reorder


board.c

bsp\ls1cdev\drivers\board.c

/* * File      : board.c * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006-2012, RT-Thread Develop Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date           Author       Notes * 2010-06-25     Bernard      first version * 2011-08-08     lgnq            modified for Loongson LS1B * 2015-07-06     chinesebear  modified for Loongson LS1C */#include <rtthread.h>#include <rthw.h>#include "board.h"#include "uart.h"#include "ls1c.h"/** * @addtogroup Loongson LS1B */ /*@{*//** * This is the timer interrupt service routine. */void rt_hw_timer_handler(void){unsigned int count;count = read_c0_compare();write_c0_compare(count);write_c0_count(0);/* increase a OS tick */rt_tick_increase();}/** * This function will initial OS timer */void rt_hw_timer_init(void){write_c0_compare(CPU_HZ/2/RT_TICK_PER_SECOND);write_c0_count(0);}/** * init hardware FPU */void rt_hw_fpu_init(void){    rt_uint32_t c0_status = 0;    rt_uint32_t c1_status = 0;    // 使能协处理器1--FPU    c0_status = read_c0_status();    c0_status |= (ST0_CU1 | ST0_FR);    write_c0_status(c0_status);    // 配置FPU    c1_status = read_c1_status();    c1_status |= (FPU_CSR_FS | FPU_CSR_FO | FPU_CSR_FN);    // set FS, FO, FN    c1_status &= ~(FPU_CSR_ALL_E);                          // disable exception    c1_status = (c1_status & (~FPU_CSR_RM)) | FPU_CSR_RN;   // set RN    write_c1_status(c1_status);    return ;}/** * This function will initial sam7s64 board. */void rt_hw_board_init(void){#ifdef RT_USING_UART/* init hardware UART device */rt_hw_uart_init();#endif#ifdef RT_USING_CONSOLE/* set console device */rt_console_set_device("uart2");#endif/* init operating system timer */rt_hw_timer_init();    /* init hardware fpu */    rt_hw_fpu_init();rt_kprintf("current sr: 0x%08x\n", read_c0_status());}#define __raw_out_put(unr) \while (*ptr) \{ \if (*ptr == '\n') \{ \/* FIFO status, contain valid data */ \while (!(UART_LSR(UART##unr##_BASE) & (UARTLSR_TE | UARTLSR_TFE))); \/* write data */ \UART_DAT(UART##unr##_BASE) = '\r'; \} \/* FIFO status, contain valid data */ \while (!(UART_LSR(UART##unr##_BASE) & (UARTLSR_TE | UARTLSR_TFE))); \/* write data */ \UART_DAT(UART##unr##_BASE) = *ptr; \ptr ++; \}/* UART line status register value */#define UARTLSR_ERROR(1 << 7)#define UARTLSR_TE(1 << 6)#define UARTLSR_TFE(1 << 5)#define UARTLSR_BI(1 << 4)#define UARTLSR_FE(1 << 3)#define UARTLSR_PE(1 << 2)#define UARTLSR_OE(1 << 1)#define UARTLSR_DR(1 << 0)void rt_hw_console_output(const char *ptr){#if defined(RT_USING_UART0)    __raw_out_put(0);#elif defined(RT_USING_UART2)    __raw_out_put(2);#elif defined(RT_USING_UART3)    __raw_out_put(3);#endif}/*@}*/



application.c

bsp\ls1cdev\applications\application.c

/* * File      : application.c * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006-2012, RT-Thread Develop Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date                Author         Notes * 2010-06-25          Bernard        first version * 2011-08-08          lgnq           modified for Loongson LS1B * 2015-07-06          chinesebear    modified for Loongson LS1C */#include <rtthread.h>#include <components.h>#include "rthw.h"#include "ls1c.h"#include "ls1c_public.h"#include "ls1c_gpio.h"#include "mipsregs.h"// 测试用的线程  #define THREAD_TEST_PRIORITY                    (25)  #define THREAD_TEST_STACK_SIZE                  (4*1024)        // 4k  #define THREAD_TEST_TIMESLICE                   (100)    struct rt_thread thread_test;  ALIGN(8) rt_uint8_t thread_test_stack[THREAD_TEST_STACK_SIZE];  // 测试硬浮点FPU的线程// 该线程会不断被其它线程抢占,以此测试在任务切换时是否会影响浮点运算#define THREAD_TEST_FPU_PRIORITY                (26)    // 值越大,优先级越低#define THREAD_TEST_FPU_STACK_SIZE              (2*1024)#define THREAD_TEST_FPU_TIMESLICE               (100)struct rt_thread thread_test_fpu;ALIGN(8) rt_uint8_t thread_test_fpu_stack[THREAD_TEST_FPU_STACK_SIZE];// 每个测试用例中for循环的最大值#define TEST_FPU_MAX_COUNT          (1000)struct ieee754sp_kconst {    unsigned mant:23;    unsigned bexp:8;    unsigned sign:1;};// 测试大小端void test_endian(void){    unsigned short test = 0x1234;    if (0x12 == *((unsigned char *)&test))        rt_kprintf("[%s] big endian\n", __FUNCTION__);    else        rt_kprintf("[%s] little endian\n", __FUNCTION__);}// 打印浮点数的规格化信息--十六进制数、符号、指数和尾数void print_float(float value){    struct ieee754sp_kconst *test_p = (struct ieee754sp_kconst *)&value;    unsigned int *test_int = (unsigned int *)&value;    rt_kprintf("[%s] 0x%x, sign=%d, bexp=0x%x, mant=0x%x\n",                 __FUNCTION__,                 *test_int,                test_p->sign,                 test_p->bexp,                test_p->mant);    return ;}// 使用硬浮点执行浮点数的加法void test_fpu_add(void){    unsigned int i = 0;    float sum_f = 0.0;    unsigned int *sum_p = (unsigned int *)&sum_f;    rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__);    for (i=0; i<TEST_FPU_MAX_COUNT; i++)    {        sum_f += 0.62113;        rt_kprintf("[%s] *sum_p=0x%x\n", __FUNCTION__, *sum_p);    }    return ;}// 使用硬浮点执行浮点数的减法void test_fpu_subtraction(void){    unsigned int i = 0;    float result_f = 252.731;    unsigned int *result_p = (unsigned int *)&result_f;    rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__);    for (i=0; i<TEST_FPU_MAX_COUNT; i++)    {        result_f -= 0.62113;        rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p);    }    return ;}// 使用硬浮点执行浮点数的乘法void test_fpu_multiplication(void){    unsigned int i = 0;    float result_f = 9.016;    unsigned int *result_p = (unsigned int *)&result_f;    rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__);    for (i=1; i<TEST_FPU_MAX_COUNT; i++)    {        result_f *= 1.00001;        rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p);    }    return ;}// 使用硬浮点执行浮点数的除法void test_fpu_division(void){    unsigned int i = 0;    float result_f = 723.801;    unsigned int *result_p = (unsigned int *)&result_f;    rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__);    for (i=1; i<TEST_FPU_MAX_COUNT; i++)    {        result_f /= 1.00003;        rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p);    }    return ;}// 测试使用硬浮点进行浮点数的加减乘除void test_fpu(void){    // 使用硬浮点执行浮点数的加法    test_fpu_add();    // 使用硬浮点执行浮点数的减法    test_fpu_subtraction();    // 使用硬浮点执行浮点数的乘法    test_fpu_multiplication();    // 使用硬浮点执行浮点数的除法    test_fpu_division();    return ;}volatile unsigned int test_fpu_before_sp = 0;volatile unsigned int test_fpu_end_sp = 0;volatile unsigned int test_fpu_save_sp = 0;// 测试用的线程的入口void thread_test_entry(void *parameter)  {    double test1 = 1.71;    double test2 = 100.039;    double result;    int i = 0;    rt_kprintf("[%s] test_fpu_save_sp=0x%x, test_fpu_before_sp=0x%x, test_fpu_end_sp=0x%x\n",                 __FUNCTION__,                test_fpu_save_sp,                test_fpu_before_sp,                 test_fpu_end_sp);        while (1)      {        i++;        test1 += i;        result = test1 * test2;        i = i + result / 9;                rt_thread_delay(10);        // 不断抢占另外一个执行浮点运算的线程,抢到cpu后,打印一条消息//        rt_kprintf("[%s] ..........................\n", __FUNCTION__);    }  }// 执行浮点运算的线程入口void thread_test_fpu_entry(void *parameter){    // 测试使用硬浮点进行浮点数的加减乘除    // 整个过程会被其它线程打断多次    test_fpu();    rt_kprintf("[%s] test fpu end.\n", __FUNCTION__);    while (1)    {        rt_thread_delay(RT_TICK_PER_SECOND);    }}void rt_init_thread_entry(void *parameter){/* initialization RT-Thread Components */rt_components_init();}int rt_application_init(void){rt_thread_t tid;    rt_err_t result;/* create initialization thread */tid = rt_thread_create("init",rt_init_thread_entry, RT_NULL,4096, RT_THREAD_PRIORITY_MAX/3, 20);if (tid != RT_NULL)rt_thread_startup(tid);      // 初始化测试用的线程      result = rt_thread_init(&thread_test,                               "test",                              thread_test_entry,                              RT_NULL,                              &thread_test_stack[0],                              sizeof(thread_test_stack),                              THREAD_TEST_PRIORITY,                              THREAD_TEST_TIMESLICE);      if (RT_EOK == result)      {          rt_thread_startup(&thread_test);      }      else      {          return -1;      }      // 初始化测试FPU的线程    result = rt_thread_init(&thread_test_fpu,                            "test_fpu",                            thread_test_fpu_entry,                            RT_NULL,                            &thread_test_fpu_stack[0],                            sizeof(thread_test_fpu_stack),                            THREAD_TEST_FPU_PRIORITY,                            THREAD_TEST_FPU_TIMESLICE);    if (RT_EOK == result)    {        rt_thread_startup(&thread_test_fpu);    }    else    {        return -1;    }return 0;}

新建了两个线程来测试浮点运算是否正常。线程“test_fpu”不停的执行浮点运算,线程“test”的优先级比线程“test_fpu”高,线程“test”不断抢占cpu,触发任务切换,最后通过串口打印查看浮点元算结果是否正确。