在龙芯1C上移植硬浮点FPU到RT-Thread
来源:互联网 发布:点击进入f2c最新域名 编辑:程序博客网 时间:2024/06/06 15:22
本文以RT-Thread为例,继续分享我是如何在上一篇文章的指导下一步一步移植FPU到RT-Thread的。
移植要点
先来回顾《【龙芯1c库】移植硬浮点FPU》中的移植要点
1,和裸机编程一样,需要初始化FPU,初始化的函数也是一样的
2,需要在中断和上下文切换时保存用于浮点运算的16个寄存器$f0,$f2,$f4, ...... ,$f28,$f30
其中,要点一说的FPU初始化函数如下,
/** * init hardware FPU */void rt_hw_fpu_init(void){ rt_uint32_t c0_status = 0; rt_uint32_t c1_status = 0; // 使能协处理器1--FPU c0_status = read_c0_status(); c0_status |= (ST0_CU1 | ST0_FR); write_c0_status(c0_status); // 配置FPU c1_status = read_c1_status(); c1_status |= (FPU_CSR_FS | FPU_CSR_FO | FPU_CSR_FN); // set FS, FO, FN c1_status &= ~(FPU_CSR_ALL_E); // disable exception c1_status = (c1_status & (~FPU_CSR_RM)) | FPU_CSR_RN; // set RN write_c1_status(c1_status); return ;}
只需在“bsp\ls1cdev\drivers\board.c”的函数rt_hw_board_init()中调用即可。
本文重点放在要点2上
在中断和上下文切换时保存用于浮点运算的浮点寄存器
分析中断时保存通用寄存器的SAVE_ALL和RESTORE_ALL_AND_RET
分析SAVE_ALL
先来看下SAVE_ALL的源码截图
宏SAVE_ALL有多个宏组成,第一个宏为SAVE_SOME,下面详细分析SAVE_SOME。
move k1, sp 的功能是k1=sp,即sp的值赋给k1
move k0, sp 的功能是k0=sp
PTR_SUBU sp, k1, PT_SIZE 的功能是 sp = k1 - PT_SIZE,即把栈指针sp向下移动
LONG_S k0, PT_R29(sp) 的功能是 *(sp + PT_R28) = k0,即将k0(之前的sp值)压栈
LONG_S $3, PT_R3(sp) 的功能是 *(sp + PT_R3) = $3,即将通用寄存器$3中的值压栈
……
后面的类似,都是压栈,包括后面的宏SAVE_AT,宏SAVE_TEMP,宏SAVE_STATIC。
汇编中用到的宏PT_R29、PT_R3等,表示相应的寄存器$29、$3等在栈中相对sp的偏移地址。宏PT_SIZE为所有寄存器的入栈后占用的总的空间大小。具体源码如下
#define PT_R0(0)/* 0 */#define PT_R1((PT_R0) + LONGSIZE)/* 1 */#define PT_R2((PT_R1) + LONGSIZE)/* 2 */#define PT_R3((PT_R2) + LONGSIZE)/* 3 */#define PT_R4((PT_R3) + LONGSIZE)/* 4 */#define PT_R5((PT_R4) + LONGSIZE)/* 5 */#define PT_R6((PT_R5) + LONGSIZE)/* 6 */#define PT_R7((PT_R6) + LONGSIZE)/* 7 */#define PT_R8((PT_R7) + LONGSIZE)/* 8 */#define PT_R9((PT_R8) + LONGSIZE)/* 9 */#define PT_R10((PT_R9) + LONGSIZE)/* 10 */#define PT_R11((PT_R10) + LONGSIZE)/* 11 */#define PT_R12((PT_R11) + LONGSIZE)/* 12 */#define PT_R13((PT_R12) + LONGSIZE)/* 13 */#define PT_R14((PT_R13) + LONGSIZE)/* 14 */#define PT_R15((PT_R14) + LONGSIZE)/* 15 */#define PT_R16((PT_R15) + LONGSIZE)/* 16 */#define PT_R17((PT_R16) + LONGSIZE)/* 17 */#define PT_R18((PT_R17) + LONGSIZE)/* 18 */#define PT_R19((PT_R18) + LONGSIZE)/* 19 */#define PT_R20((PT_R19) + LONGSIZE)/* 20 */#define PT_R21((PT_R20) + LONGSIZE)/* 21 */#define PT_R22((PT_R21) + LONGSIZE)/* 22 */#define PT_R23((PT_R22) + LONGSIZE)/* 23 */#define PT_R24((PT_R23) + LONGSIZE)/* 24 */#define PT_R25((PT_R24) + LONGSIZE)/* 25 */#define PT_R26((PT_R25) + LONGSIZE)/* 26 */#define PT_R27((PT_R26) + LONGSIZE)/* 27 */#define PT_R28((PT_R27) + LONGSIZE)/* 28 */#define PT_R29((PT_R28) + LONGSIZE)/* 29 */#define PT_R30((PT_R29) + LONGSIZE)/* 30 */#define PT_R31((PT_R30) + LONGSIZE)/* 31 *//* * Saved special registers */#define PT_STATUS((PT_R31) + LONGSIZE)/* 32 */#define PT_HI((PT_STATUS) + LONGSIZE)/* 33 */#define PT_LO((PT_HI) + LONGSIZE)/* 34 */#define PT_BADVADDR((PT_LO) + LONGSIZE)/* 35 */#define PT_CAUSE((PT_BADVADDR) + LONGSIZE)/* 36 */#define PT_EPC((PT_CAUSE) + LONGSIZE)/* 37 */#define PT_SIZE((((PT_EPC) + LONGSIZE) + (PTRSIZE-1)) & ~(PTRSIZE-1))
比如,寄存器$0的偏移地址为0,所以有“#define PT_R0 (0)”,寄存器$1的偏移地址为寄存器$0的地址加上寄存器$0的大小(4字节),所以有“#define PT_R1 ((PT_R0) + LONGSIZE)”,其它的类似。
分析RESTORE_ALL_AND_RET
宏RESTORE_ALL_AND_RET的源码为
.macroRESTORE_ALL_AND_RETRESTORE_TEMPRESTORE_STATICRESTORE_ATRESTORE_SOMERESTORE_SP_AND_RET.endm
从宏名字和执行顺序看,首先是将temp类的寄存器出栈,倒数第二才是RESTORE_SOME,最后才是RESTORE_SP_AND_RET。
宏RESTORE_SP_AND_RET的源码为
.macroRESTORE_SP_AND_RETLONG_Lsp, PT_R29(sp).setmips3eret.setmips0.endm
真正的汇编指令就两条“LONG_L sp, PT_R29(sp)”和“eret”。其中“LONG_L sp, PT_R29(sp)”的功能是把sp的值从栈中弹出,“eret”为中断返回
依葫芦画瓢,实现SAVE_FPU和RESTORE_FPU
计算各个寄存器在栈中相对sp的偏移
寄存器$f0的偏移 = 0,
用代码表示为“#define PT_FPU_R0 (0)”
寄存器$f2的偏移 = $f0的偏移 + 8字节,
用代码表示为“#define PT_FPU_R2 ((PT_FPU_R0) + 2*LONGSIZE)”
寄存器$f4的偏移 = $f2的偏移 + 8字节
#define PT_FPU_R4 ((PT_FPU_R2) + 2*LONGSIZE)
以此类推
……
寄存器$f30的偏移 = $f28的偏移 + 8字节
#define PT_FPU_R30 ((PT_FPU_R28) + 2*LONGSIZE)
这16个寄存器在栈中占用的大小 = $f30的偏移 + 8字节
#define PT_FPU_SIZE ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1))
为什么还要加上(2*PTRSIZE-1),然后& ~(2*PTRSIZE-1) ?
为了8字节对齐
完整的代码为
#define PT_FPU_R0 (0)#define PT_FPU_R2 ((PT_FPU_R0) + 2*LONGSIZE)#define PT_FPU_R4 ((PT_FPU_R2) + 2*LONGSIZE)#define PT_FPU_R6 ((PT_FPU_R4) + 2*LONGSIZE)#define PT_FPU_R8 ((PT_FPU_R6) + 2*LONGSIZE)#define PT_FPU_R10 ((PT_FPU_R8) + 2*LONGSIZE)#define PT_FPU_R12 ((PT_FPU_R10) + 2*LONGSIZE)#define PT_FPU_R14 ((PT_FPU_R12) + 2*LONGSIZE)#define PT_FPU_R16 ((PT_FPU_R14) + 2*LONGSIZE)#define PT_FPU_R18 ((PT_FPU_R16) + 2*LONGSIZE)#define PT_FPU_R20 ((PT_FPU_R18) + 2*LONGSIZE)#define PT_FPU_R22 ((PT_FPU_R20) + 2*LONGSIZE)#define PT_FPU_R24 ((PT_FPU_R22) + 2*LONGSIZE)#define PT_FPU_R26 ((PT_FPU_R24) + 2*LONGSIZE)#define PT_FPU_R28 ((PT_FPU_R26) + 2*LONGSIZE)#define PT_FPU_R30 ((PT_FPU_R28) + 2*LONGSIZE)#define PT_FPU_SIZE ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1))
实现SAVE_FPU
首先,记录一下当前sp值
move k1, sp
然后,判断当前sp是否是8字节对齐,不是,则向下移动sp(栈是向下生长的),使其8字节对齐
and k0, k1, 0xFFFFFFF8
将sp向下移动PT_FPU_SIZE字节,腾出空间来存放FPU的16个寄存器的值
PTR_SUBU sp, k0, PT_FPU_SIZE
然依次将16个寄存器压栈
将$f0压栈
s.d $f0, PT_FPU_R0(sp)
将$f2压栈
s.d $f2, PT_FPU_R2(sp)
……
……
……
将$f30压栈
s.d $f30, PT_FPU_R30(sp)
完整的代码为
.macro SAVE_FPU .set push .set noreorder move k1, sp /* 保存现场 */ and k0, k1, 0xFFFFFFF8 /* 8字节对齐 */ PTR_SUBU sp, k0, PT_FPU_SIZE /* 计算栈底 */ s.d $f0, PT_FPU_R0(sp) s.d $f2, PT_FPU_R2(sp) s.d $f4, PT_FPU_R4(sp) s.d $f6, PT_FPU_R6(sp) s.d $f8, PT_FPU_R8(sp) s.d $f10, PT_FPU_R10(sp) s.d $f12, PT_FPU_R12(sp) s.d $f14, PT_FPU_R14(sp) s.d $f16, PT_FPU_R16(sp) s.d $f18, PT_FPU_R18(sp) s.d $f20, PT_FPU_R20(sp) s.d $f22, PT_FPU_R22(sp) s.d $f24, PT_FPU_R24(sp) s.d $f26, PT_FPU_R26(sp) s.d $f28, PT_FPU_R28(sp) s.d $f30, PT_FPU_R30(sp) move sp, k1 /* 恢复现场 */ .set reorder .set pop .endm
实现RESTORE_FPU
和使用s.d指令压栈相反,使用l.d指令则是将栈内指定单元的内容弹出。注意,这里弹出的不一定是栈顶,而是根据指令中sp的偏移决定。
比如,将$f0出栈
l.d $f0, PT_FPU_R0(sp)
将$f2出栈
l.d $f2, PT_FPU_R2(sp)
完整的代码为
.macro RESTORE_FPU .set push .set noreorder move k1, sp /* 保存现场 */ and k0, k1, 0xFFFFFFF8 /* 8字节对齐 */ PTR_SUBU sp, k0, PT_FPU_SIZE /* 计算栈底*/ l.d $f0, PT_FPU_R0(sp) l.d $f2, PT_FPU_R2(sp) l.d $f4, PT_FPU_R4(sp) l.d $f6, PT_FPU_R6(sp) l.d $f8, PT_FPU_R8(sp) l.d $f10, PT_FPU_R10(sp) l.d $f12, PT_FPU_R12(sp) l.d $f14, PT_FPU_R14(sp) l.d $f16, PT_FPU_R16(sp) l.d $f18, PT_FPU_R18(sp) l.d $f20, PT_FPU_R20(sp) l.d $f22, PT_FPU_R22(sp) l.d $f24, PT_FPU_R24(sp) l.d $f26, PT_FPU_R26(sp) l.d $f28, PT_FPU_R28(sp) l.d $f30, PT_FPU_R30(sp) move sp, k1 /* 恢复现场 */ .set reorder .set pop .endm
注意:宏SAVE_FPU和宏RESTORE_FPU中使用的汇编指令s.d和l.d是双精度的,会被自动汇编成两条汇编指令,自动把相邻的奇数号也保存了。《see mips run》中的说明如下
不影响正常使用的小bug
按道理说,SAVE_FPU应该属于SAVE_ALL的一部分,应该追加在SAVE_ALL的最后。可是SAVE_ALL的源码位于RT-Thread的目录“libcpu\mips\common”内,也就是说可能其它mips的cpu也会使用SAVE_ALL,这就决定了不能随意修改这个源文件。假设在SAVE_ALL后追加了SAVE_FPU,某款mips cpu也调用了SAVE_ALL,可是该款cpu默认没有使能FPU(君正x1000默认就没有使能FPU),或者根本没有FPU(龙芯1B就没有FPU),这种情况下,执行浮点指令可能会异常。
所以,只有将龙芯1c的FPU的SAVE_FPU和RESTORE_FPU单独放在一个源文件中,并放在龙芯1c的目录内。
前面讨论的SAVE_FPU和RESTORE_FPU还不全,里面没有涉及将sp压栈和出栈。理论上应该在SAVE_FPU中将sp压栈,然后再RESTORE_FPU中将sp出栈。
假设成功启动后,RT-Thread第一次进行任务切换,首先调用SAVE_ALL和SAVE_FPU,这是没问题的;然后切换到另外一个任务(线程),调用RESTORE_FPU和RESTORE_ALL_AND_RET,这时候就有问题了,请问该线程什么时候把FPU的16个寄存器压栈了,即执行了SAVE_FPU,如果没有,那么现在执行RESTORE_FPU是不是不对呀?
考虑到,被抢占了cpu的任务,再未重新获得cpu之前,不会再次被抢占。即SAVE_ALL之后,再没有RESTORE_ALL_AND_RET之前,不会再SAVE_ALL。也就是没有嵌套的可能。既然这样,那么SAVE_FPU和RESTORE_FPU前后不移动sp,只压栈,出栈。即在执行SAVE_FPU后,重新将sp指针指向SAVE_FPU之前的位置,RESTORE_FPU也类似。虽然这样看起来有点怪的,但是再SAVE_ALL后,该任务(线程)处于等待状态,不会压栈,也就不会踩到刚执行SAVE_FPU时保存的FPU寄存器信息,所以这种方案是可行的。
经过实际上机测试,RT-Thread能正常运转,浮点运算也正确,所以暂时就采用这种方案,虽然有点怪怪的。如有哪位大神有更好的方案,请直接修改后,直接提交到RT-Thread官方git上,能在后面留个言就更好了,谢谢!
小技巧
在c代码中,使用volatile unsigned int test_fpu_before_sp定义一个全局变量test_fpu_before_sp,然后再汇编代码中使用一下两条汇编指令
la k0, test_fpu_before_spsw sp, 0(k0)就用这两条汇编指令就可以将当前sp保存到全局变量test_fpu_before_sp中,然后可以在c语言中将其打印出来。
源码清单
stackframe_fpu.h
libcpu\mips\loongson_1c\stackframe_fpu.h
/* * ls1c FPU's stackframe * 最开始本想,将代码加入到stackframe.h中的SAVE_ALL, RESTORE_ALL和RESTORE_ALL_AND_RET中, * 但考虑到源文件"stackframe.h"位于目录"libcpu\mips\common"内,怕影响到其它mips cpu * 所以,另外新建本源文件 */#ifndef __OPENLOONGSON_STACKFRAME_FPU_H#define __OPENLOONGSON_STACKFRAME_FPU_H#include "../common/asm.h"#include "../common/mipsregs.h"#include "../common/stackframe.h"#define PT_FPU_R0 (0)#define PT_FPU_R2 ((PT_FPU_R0) + 2*LONGSIZE)#define PT_FPU_R4 ((PT_FPU_R2) + 2*LONGSIZE)#define PT_FPU_R6 ((PT_FPU_R4) + 2*LONGSIZE)#define PT_FPU_R8 ((PT_FPU_R6) + 2*LONGSIZE)#define PT_FPU_R10 ((PT_FPU_R8) + 2*LONGSIZE)#define PT_FPU_R12 ((PT_FPU_R10) + 2*LONGSIZE)#define PT_FPU_R14 ((PT_FPU_R12) + 2*LONGSIZE)#define PT_FPU_R16 ((PT_FPU_R14) + 2*LONGSIZE)#define PT_FPU_R18 ((PT_FPU_R16) + 2*LONGSIZE)#define PT_FPU_R20 ((PT_FPU_R18) + 2*LONGSIZE)#define PT_FPU_R22 ((PT_FPU_R20) + 2*LONGSIZE)#define PT_FPU_R24 ((PT_FPU_R22) + 2*LONGSIZE)#define PT_FPU_R26 ((PT_FPU_R24) + 2*LONGSIZE)#define PT_FPU_R28 ((PT_FPU_R26) + 2*LONGSIZE)#define PT_FPU_R30 ((PT_FPU_R28) + 2*LONGSIZE)#define PT_FPU_SIZE ((((PT_FPU_R30) + 2*LONGSIZE) + (2*PTRSIZE-1)) & ~(2*PTRSIZE-1)) .macro SAVE_FPU .set push .set noreorder move k1, sp /* 保存现场 */ and k0, k1, 0xFFFFFFF8 /* 8字节对齐 */ PTR_SUBU sp, k0, PT_FPU_SIZE /* 计算栈底 */ s.d $f0, PT_FPU_R0(sp) s.d $f2, PT_FPU_R2(sp) s.d $f4, PT_FPU_R4(sp) s.d $f6, PT_FPU_R6(sp) s.d $f8, PT_FPU_R8(sp) s.d $f10, PT_FPU_R10(sp) s.d $f12, PT_FPU_R12(sp) s.d $f14, PT_FPU_R14(sp) s.d $f16, PT_FPU_R16(sp) s.d $f18, PT_FPU_R18(sp) s.d $f20, PT_FPU_R20(sp) s.d $f22, PT_FPU_R22(sp) s.d $f24, PT_FPU_R24(sp) s.d $f26, PT_FPU_R26(sp) s.d $f28, PT_FPU_R28(sp) s.d $f30, PT_FPU_R30(sp) move sp, k1 /* 恢复现场 */ .set reorder .set pop .endm .macro RESTORE_FPU .set push .set noreorder move k1, sp /* 保存现场 */ and k0, k1, 0xFFFFFFF8 /* 8字节对齐 */ PTR_SUBU sp, k0, PT_FPU_SIZE /* 计算栈底*/ l.d $f0, PT_FPU_R0(sp) l.d $f2, PT_FPU_R2(sp) l.d $f4, PT_FPU_R4(sp) l.d $f6, PT_FPU_R6(sp) l.d $f8, PT_FPU_R8(sp) l.d $f10, PT_FPU_R10(sp) l.d $f12, PT_FPU_R12(sp) l.d $f14, PT_FPU_R14(sp) l.d $f16, PT_FPU_R16(sp) l.d $f18, PT_FPU_R18(sp) l.d $f20, PT_FPU_R20(sp) l.d $f22, PT_FPU_R22(sp) l.d $f24, PT_FPU_R24(sp) l.d $f26, PT_FPU_R26(sp) l.d $f28, PT_FPU_R28(sp) l.d $f30, PT_FPU_R30(sp) move sp, k1 /* 恢复现场 */ .set reorder .set pop .endm#endif
context_gcc.S
libcpu\mips\loongson_1c\context_gcc.S
/* * File : context_gcc.S * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006 - 2011, RT-Thread Development Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date Author Notes * 2010-05-17 swkyer first version * 2010-09-11 bernard port to Loongson SoC3210 * 2011-08-08 lgnq port to Loongson LS1B */#include "../common/mips.inc"#include "../common/stackframe.h"#include "stackframe_fpu.h" .section ".text", "ax" .set noreorder/* * rt_base_t rt_hw_interrupt_disable() */ .globl rt_hw_interrupt_disablert_hw_interrupt_disable: mfc0 v0, CP0_STATUS and v1, v0, 0xfffffffe mtc0 v1, CP0_STATUS jr ra nop/* * void rt_hw_interrupt_enable(rt_base_t level) */ .globl rt_hw_interrupt_enablert_hw_interrupt_enable: ori a0, 0x00000800 mtc0 a0, CP0_STATUS ehb mfc0 v0, CP0_CAUSE ehb or v1, v0, 0x800000 //EBASE + 0x200 mtc0 v1, CP0_CAUSE ehb jr ra nop/* * void rt_hw_context_switch(rt_uint32 from, rt_uint32 to) * a0 --> from * a1 --> to */ .globl rt_hw_context_switchrt_hw_context_switch: mtc0 ra, CP0_EPC SAVE_ALL SAVE_FPU sw sp, 0(a0) /* store sp in preempted tasks TCB */ lw sp, 0(a1) /* get new task stack pointer */ RESTORE_FPU RESTORE_ALL_AND_RET/* * void rt_hw_context_switch_to(rt_uint32 to)/* * a0 --> to */ .globl rt_hw_context_switch_tort_hw_context_switch_to: lw sp, 0(a0) /* get new task stack pointer */ RESTORE_FPU RESTORE_ALL_AND_RET/* * void rt_hw_context_switch_interrupt(rt_uint32 from, rt_uint32 to)/* */ .globl rt_thread_switch_interrupt_flag .globl rt_interrupt_from_thread .globl rt_interrupt_to_thread .globl rt_hw_context_switch_interruptrt_hw_context_switch_interrupt: la t0, rt_thread_switch_interrupt_flag lw t1, 0(t0) nop bnez t1, _reswitch nop li t1, 0x01 /* set rt_thread_switch_interrupt_flag to 1 */ sw t1, 0(t0) la t0, rt_interrupt_from_thread /* set rt_interrupt_from_thread */ sw a0, 0(t0)_reswitch: la t0, rt_interrupt_to_thread /* set rt_interrupt_to_thread */ sw a1, 0(t0) jr ra nop/* * void rt_hw_context_switch_interrupt_do(rt_base_t flag) */ .globl rt_interrupt_enter .globl rt_interrupt_leave .globl mips_irq_handlemips_irq_handle: SAVE_ALL SAVE_FPU mfc0 t0, CP0_CAUSE and t1, t0, 0xffbnezt1, spurious_interrupt/* check exception */nop/* let k0 keep the current context sp */ move k0, sp /* switch to kernel stack */ li sp, SYSTEM_STACK jal rt_interrupt_enter nop jal rt_interrupt_dispatch nop jal rt_interrupt_leave nop /* switch sp back to thread's context */ move sp, k0 /* * if rt_thread_switch_interrupt_flag set, jump to * rt_hw_context_switch_interrupt_do and don't return */ la k0, rt_thread_switch_interrupt_flag lw k1, 0(k0) beqz k1, spurious_interrupt nop sw zero, 0(k0) /* clear flag */nop /* * switch to the new thread */ la k0, rt_interrupt_from_thread lw k1, 0(k0) nop sw sp, 0(k1) /* store sp in preempted tasks's TCB */ la k0, rt_interrupt_to_thread lw k1, 0(k0) nop lw sp, 0(k1) /* get new task's stack pointer */ j spurious_interrupt nopspurious_interrupt: RESTORE_FPU RESTORE_ALL_AND_RET .set reorder
board.c
bsp\ls1cdev\drivers\board.c
/* * File : board.c * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006-2012, RT-Thread Develop Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date Author Notes * 2010-06-25 Bernard first version * 2011-08-08 lgnq modified for Loongson LS1B * 2015-07-06 chinesebear modified for Loongson LS1C */#include <rtthread.h>#include <rthw.h>#include "board.h"#include "uart.h"#include "ls1c.h"/** * @addtogroup Loongson LS1B */ /*@{*//** * This is the timer interrupt service routine. */void rt_hw_timer_handler(void){unsigned int count;count = read_c0_compare();write_c0_compare(count);write_c0_count(0);/* increase a OS tick */rt_tick_increase();}/** * This function will initial OS timer */void rt_hw_timer_init(void){write_c0_compare(CPU_HZ/2/RT_TICK_PER_SECOND);write_c0_count(0);}/** * init hardware FPU */void rt_hw_fpu_init(void){ rt_uint32_t c0_status = 0; rt_uint32_t c1_status = 0; // 使能协处理器1--FPU c0_status = read_c0_status(); c0_status |= (ST0_CU1 | ST0_FR); write_c0_status(c0_status); // 配置FPU c1_status = read_c1_status(); c1_status |= (FPU_CSR_FS | FPU_CSR_FO | FPU_CSR_FN); // set FS, FO, FN c1_status &= ~(FPU_CSR_ALL_E); // disable exception c1_status = (c1_status & (~FPU_CSR_RM)) | FPU_CSR_RN; // set RN write_c1_status(c1_status); return ;}/** * This function will initial sam7s64 board. */void rt_hw_board_init(void){#ifdef RT_USING_UART/* init hardware UART device */rt_hw_uart_init();#endif#ifdef RT_USING_CONSOLE/* set console device */rt_console_set_device("uart2");#endif/* init operating system timer */rt_hw_timer_init(); /* init hardware fpu */ rt_hw_fpu_init();rt_kprintf("current sr: 0x%08x\n", read_c0_status());}#define __raw_out_put(unr) \while (*ptr) \{ \if (*ptr == '\n') \{ \/* FIFO status, contain valid data */ \while (!(UART_LSR(UART##unr##_BASE) & (UARTLSR_TE | UARTLSR_TFE))); \/* write data */ \UART_DAT(UART##unr##_BASE) = '\r'; \} \/* FIFO status, contain valid data */ \while (!(UART_LSR(UART##unr##_BASE) & (UARTLSR_TE | UARTLSR_TFE))); \/* write data */ \UART_DAT(UART##unr##_BASE) = *ptr; \ptr ++; \}/* UART line status register value */#define UARTLSR_ERROR(1 << 7)#define UARTLSR_TE(1 << 6)#define UARTLSR_TFE(1 << 5)#define UARTLSR_BI(1 << 4)#define UARTLSR_FE(1 << 3)#define UARTLSR_PE(1 << 2)#define UARTLSR_OE(1 << 1)#define UARTLSR_DR(1 << 0)void rt_hw_console_output(const char *ptr){#if defined(RT_USING_UART0) __raw_out_put(0);#elif defined(RT_USING_UART2) __raw_out_put(2);#elif defined(RT_USING_UART3) __raw_out_put(3);#endif}/*@}*/
application.c
bsp\ls1cdev\applications\application.c
/* * File : application.c * This file is part of RT-Thread RTOS * COPYRIGHT (C) 2006-2012, RT-Thread Develop Team * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at * http://www.rt-thread.org/license/LICENSE * * Change Logs: * Date Author Notes * 2010-06-25 Bernard first version * 2011-08-08 lgnq modified for Loongson LS1B * 2015-07-06 chinesebear modified for Loongson LS1C */#include <rtthread.h>#include <components.h>#include "rthw.h"#include "ls1c.h"#include "ls1c_public.h"#include "ls1c_gpio.h"#include "mipsregs.h"// 测试用的线程 #define THREAD_TEST_PRIORITY (25) #define THREAD_TEST_STACK_SIZE (4*1024) // 4k #define THREAD_TEST_TIMESLICE (100) struct rt_thread thread_test; ALIGN(8) rt_uint8_t thread_test_stack[THREAD_TEST_STACK_SIZE]; // 测试硬浮点FPU的线程// 该线程会不断被其它线程抢占,以此测试在任务切换时是否会影响浮点运算#define THREAD_TEST_FPU_PRIORITY (26) // 值越大,优先级越低#define THREAD_TEST_FPU_STACK_SIZE (2*1024)#define THREAD_TEST_FPU_TIMESLICE (100)struct rt_thread thread_test_fpu;ALIGN(8) rt_uint8_t thread_test_fpu_stack[THREAD_TEST_FPU_STACK_SIZE];// 每个测试用例中for循环的最大值#define TEST_FPU_MAX_COUNT (1000)struct ieee754sp_kconst { unsigned mant:23; unsigned bexp:8; unsigned sign:1;};// 测试大小端void test_endian(void){ unsigned short test = 0x1234; if (0x12 == *((unsigned char *)&test)) rt_kprintf("[%s] big endian\n", __FUNCTION__); else rt_kprintf("[%s] little endian\n", __FUNCTION__);}// 打印浮点数的规格化信息--十六进制数、符号、指数和尾数void print_float(float value){ struct ieee754sp_kconst *test_p = (struct ieee754sp_kconst *)&value; unsigned int *test_int = (unsigned int *)&value; rt_kprintf("[%s] 0x%x, sign=%d, bexp=0x%x, mant=0x%x\n", __FUNCTION__, *test_int, test_p->sign, test_p->bexp, test_p->mant); return ;}// 使用硬浮点执行浮点数的加法void test_fpu_add(void){ unsigned int i = 0; float sum_f = 0.0; unsigned int *sum_p = (unsigned int *)&sum_f; rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__); for (i=0; i<TEST_FPU_MAX_COUNT; i++) { sum_f += 0.62113; rt_kprintf("[%s] *sum_p=0x%x\n", __FUNCTION__, *sum_p); } return ;}// 使用硬浮点执行浮点数的减法void test_fpu_subtraction(void){ unsigned int i = 0; float result_f = 252.731; unsigned int *result_p = (unsigned int *)&result_f; rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__); for (i=0; i<TEST_FPU_MAX_COUNT; i++) { result_f -= 0.62113; rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p); } return ;}// 使用硬浮点执行浮点数的乘法void test_fpu_multiplication(void){ unsigned int i = 0; float result_f = 9.016; unsigned int *result_p = (unsigned int *)&result_f; rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__); for (i=1; i<TEST_FPU_MAX_COUNT; i++) { result_f *= 1.00001; rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p); } return ;}// 使用硬浮点执行浮点数的除法void test_fpu_division(void){ unsigned int i = 0; float result_f = 723.801; unsigned int *result_p = (unsigned int *)&result_f; rt_kprintf("\n\n----------------------%s-------------------\n", __FUNCTION__); for (i=1; i<TEST_FPU_MAX_COUNT; i++) { result_f /= 1.00003; rt_kprintf("[%s] *result_p=0x%x\n", __FUNCTION__, *result_p); } return ;}// 测试使用硬浮点进行浮点数的加减乘除void test_fpu(void){ // 使用硬浮点执行浮点数的加法 test_fpu_add(); // 使用硬浮点执行浮点数的减法 test_fpu_subtraction(); // 使用硬浮点执行浮点数的乘法 test_fpu_multiplication(); // 使用硬浮点执行浮点数的除法 test_fpu_division(); return ;}volatile unsigned int test_fpu_before_sp = 0;volatile unsigned int test_fpu_end_sp = 0;volatile unsigned int test_fpu_save_sp = 0;// 测试用的线程的入口void thread_test_entry(void *parameter) { double test1 = 1.71; double test2 = 100.039; double result; int i = 0; rt_kprintf("[%s] test_fpu_save_sp=0x%x, test_fpu_before_sp=0x%x, test_fpu_end_sp=0x%x\n", __FUNCTION__, test_fpu_save_sp, test_fpu_before_sp, test_fpu_end_sp); while (1) { i++; test1 += i; result = test1 * test2; i = i + result / 9; rt_thread_delay(10); // 不断抢占另外一个执行浮点运算的线程,抢到cpu后,打印一条消息// rt_kprintf("[%s] ..........................\n", __FUNCTION__); } }// 执行浮点运算的线程入口void thread_test_fpu_entry(void *parameter){ // 测试使用硬浮点进行浮点数的加减乘除 // 整个过程会被其它线程打断多次 test_fpu(); rt_kprintf("[%s] test fpu end.\n", __FUNCTION__); while (1) { rt_thread_delay(RT_TICK_PER_SECOND); }}void rt_init_thread_entry(void *parameter){/* initialization RT-Thread Components */rt_components_init();}int rt_application_init(void){rt_thread_t tid; rt_err_t result;/* create initialization thread */tid = rt_thread_create("init",rt_init_thread_entry, RT_NULL,4096, RT_THREAD_PRIORITY_MAX/3, 20);if (tid != RT_NULL)rt_thread_startup(tid); // 初始化测试用的线程 result = rt_thread_init(&thread_test, "test", thread_test_entry, RT_NULL, &thread_test_stack[0], sizeof(thread_test_stack), THREAD_TEST_PRIORITY, THREAD_TEST_TIMESLICE); if (RT_EOK == result) { rt_thread_startup(&thread_test); } else { return -1; } // 初始化测试FPU的线程 result = rt_thread_init(&thread_test_fpu, "test_fpu", thread_test_fpu_entry, RT_NULL, &thread_test_fpu_stack[0], sizeof(thread_test_fpu_stack), THREAD_TEST_FPU_PRIORITY, THREAD_TEST_FPU_TIMESLICE); if (RT_EOK == result) { rt_thread_startup(&thread_test_fpu); } else { return -1; }return 0;}
新建了两个线程来测试浮点运算是否正常。线程“test_fpu”不停的执行浮点运算,线程“test”的优先级比线程“test_fpu”高,线程“test”不断抢占cpu,触发任务切换,最后通过串口打印查看浮点元算结果是否正确。
- 在龙芯1C上移植硬浮点FPU到RT-Thread
- 【龙芯1c库】移植硬浮点FPU
- RT-Thread在STM32F100C8上的移植
- 在windows上搭建龙芯1c的RT-Thread开发环境
- 在龙芯1c上用RT-Thread打印hello
- 在RT-Thread上使用龙芯1c库中的硬件I2C接口
- 在龙芯1c上使用rt-thread统一标准的spi接口
- 在龙芯1c上使用RT-Thread统一标准的i2c接口
- 在龙芯1c上使用RT-Thread统一标准的gpio接口
- 在龙芯1c上用RT-Thread上的lwip NETCONN接口实现socket编程
- 移植 RT-Thread 到MB9BF218S
- uip 移植在rt-thread上的源码
- RT Thread 在STM32f10x 平台上的移植
- RT-Thread在S3C2410下的移植(1)
- 移植rt-thread到esp8266笔记
- 移植RT-THREAD+LUA到STM32F4
- 用龙芯1c库在RT-Thread下输出PWM
- rt-thread移植STM32F103C8
- python---pandas.merge使用
- dp 电子科大本部食堂的饭卡有一种很诡异的设计,即在购买之前判断余额。
- YARN资源管理的最佳实践
- Python日志查询
- 自己制作GitHub开源项目的代码仓库的经验——直接compile自己的代码包【开源】
- 在龙芯1C上移植硬浮点FPU到RT-Thread
- JeePlus后台JSP页面编写指南
- 面试
- luogu3856 [TJOI2008]公共子串(dp)
- Python数据库查询
- SVN工具使用问题及解答汇总
- WebView使用详解(一)——Native与JS相互调用(附JadX反编译)
- 表示数值的字符串
- EnterPrise JavaBeans