[BACK]Return to bcopy.S CVS log [TXT][DIR] Up to [cvs.NetBSD.org] / src / common / lib / libc / arch / mips / string

File: [cvs.NetBSD.org] / src / common / lib / libc / arch / mips / string / bcopy.S (download)

Revision 1.4, Sat Aug 27 13:23:52 2011 UTC (10 years, 3 months ago) by bouyer
Branch: MAIN
CVS Tags: yamt-pagecache-tag8, yamt-pagecache-base9, yamt-pagecache-base8, yamt-pagecache-base7, yamt-pagecache-base6, yamt-pagecache-base5, yamt-pagecache-base4, yamt-pagecache-base3, yamt-pagecache-base2, yamt-pagecache-base, yamt-pagecache, tls-maxphys-base, tls-maxphys-20171202, tls-maxphys, tls-earlyentropy-base, tls-earlyentropy, rmind-smpnet-nbase, rmind-smpnet-base, riastradh-xf86-video-intel-2-7-1-pre-2-21-15, riastradh-drm2-base3, riastradh-drm2-base2, riastradh-drm2-base1, riastradh-drm2-base, riastradh-drm2, prg-localcount2-base3, prg-localcount2-base2, prg-localcount2-base1, prg-localcount2-base, prg-localcount2, phil-wifi-base, phil-wifi-20200421, phil-wifi-20200411, phil-wifi-20200406, phil-wifi-20191119, phil-wifi-20190609, pgoyette-localcount-base, pgoyette-localcount-20170426, pgoyette-localcount-20170320, pgoyette-localcount-20170107, pgoyette-localcount-20161104, pgoyette-localcount-20160806, pgoyette-localcount-20160726, pgoyette-localcount, pgoyette-compat-merge-20190127, pgoyette-compat-base, pgoyette-compat-20190127, pgoyette-compat-20190118, pgoyette-compat-1226, pgoyette-compat-1126, pgoyette-compat-1020, pgoyette-compat-0930, pgoyette-compat-0906, pgoyette-compat-0728, pgoyette-compat-0625, pgoyette-compat-0521, pgoyette-compat-0502, pgoyette-compat-0422, pgoyette-compat-0415, pgoyette-compat-0407, pgoyette-compat-0330, pgoyette-compat-0322, pgoyette-compat-0315, pgoyette-compat, perseant-stdc-iso10646-base, perseant-stdc-iso10646, netbsd-9-base, netbsd-9-2-RELEASE, netbsd-9-1-RELEASE, netbsd-9-0-RELEASE, netbsd-9-0-RC2, netbsd-9-0-RC1, netbsd-9, netbsd-8-base, netbsd-8-2-RELEASE, netbsd-8-1-RELEASE, netbsd-8-1-RC1, netbsd-8-0-RELEASE, netbsd-8-0-RC2, netbsd-8-0-RC1, netbsd-8, netbsd-7-nhusb-base-20170116, netbsd-7-nhusb-base, netbsd-7-nhusb, netbsd-7-base, netbsd-7-2-RELEASE, netbsd-7-1-RELEASE, netbsd-7-1-RC2, netbsd-7-1-RC1, netbsd-7-1-2-RELEASE, netbsd-7-1-1-RELEASE, netbsd-7-1, netbsd-7-0-RELEASE, netbsd-7-0-RC3, netbsd-7-0-RC2, netbsd-7-0-RC1, netbsd-7-0-2-RELEASE, netbsd-7-0-1-RELEASE, netbsd-7-0, netbsd-7, netbsd-6-base, netbsd-6-1-RELEASE, netbsd-6-1-RC4, netbsd-6-1-RC3, netbsd-6-1-RC2, netbsd-6-1-RC1, netbsd-6-1-5-RELEASE, netbsd-6-1-4-RELEASE, netbsd-6-1-3-RELEASE, netbsd-6-1-2-RELEASE, netbsd-6-1-1-RELEASE, netbsd-6-1, netbsd-6-0-RELEASE, netbsd-6-0-RC2, netbsd-6-0-RC1, netbsd-6-0-6-RELEASE, netbsd-6-0-5-RELEASE, netbsd-6-0-4-RELEASE, netbsd-6-0-3-RELEASE, netbsd-6-0-2-RELEASE, netbsd-6-0-1-RELEASE, netbsd-6-0, netbsd-6, matt-nb8-mediatek-base, matt-nb8-mediatek, matt-nb6-plus-nbase, matt-nb6-plus-base, matt-nb6-plus, localcount-20160914, khorben-n900, is-mlppp-base, is-mlppp, cjep_sun2x-base1, cjep_sun2x-base, cjep_sun2x, cjep_staticlib_x-base1, cjep_staticlib_x-base, cjep_staticlib_x, bouyer-xenpvh-base2, bouyer-xenpvh-base1, bouyer-xenpvh-base, bouyer-xenpvh, bouyer-socketcan-base1, bouyer-socketcan-base, bouyer-socketcan, agc-symver-base, agc-symver, ad-namecache-base3, ad-namecache-base, ad-namecache, HEAD
Branch point for: phil-wifi
Changes since 1.3: +6 -2 lines

loongson2f support:
- Add some loongson2 definitions to cpuregs.h, from OpenBSD
- Make sure that the at register is useable before every jump register
  instruction (exept when register is k0 or k1) because -mfix-loongson2f-btb
  needs the at register for its workaround
- add code to mips_fixup.c to handle the instructions added by
  -mfix-loongson2f-btb
- Add a ls2-specific tlb miss handler: it doesn't have separate handler
  for the xtlbmiss exeption.
- Fixes for some #ifdef MIPS3_LOONGSON2 assembly code (using the wrong
  register)

/*	$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $	*/

/*
 * Mach Operating System
 * Copyright (c) 1993 Carnegie Mellon University
 * All Rights Reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */

/*
 *	File:	mips_bcopy.s
 *	Author:	Chris Maeda
 *	Date:	June 1993
 *
 *	Fast copy routine.  Derived from aligned_block_copy.
 */


#include <mips/asm.h>
#ifndef _LOCORE
#define _LOCORE		/* XXX not really, just assembly-code source */
#endif
#include <machine/endian.h>


#if defined(LIBC_SCCS) && !defined(lint)
#if 0
	RCSID("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
#else
	RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $")
#endif
#endif /* LIBC_SCCS and not lint */

/*
 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
 *
 *	a0 	src address
 *	a1	dst address
 *	a2	length
 */

#if defined(MEMCOPY) || defined(MEMMOVE)
#ifdef MEMCOPY
#define	FUNCTION	memcpy
#else
#define FUNCTION	memmove
#endif
#define	SRCREG		a1
#define	DSTREG		a0
#else
#define	FUNCTION	bcopy
#define	SRCREG		a0
#define	DSTREG		a1
#endif

#define	SIZEREG		a2

LEAF(FUNCTION)
	.set	noat
	.set	noreorder

#if defined(MEMCOPY) || defined(MEMMOVE)
	/* set up return value, while we still can */
	move	v0,DSTREG
#endif
	/*
	 *	Make sure we can copy forwards.
	 */
	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
	bne	t0,zero,6f		# copy backwards

	/*
	 * 	There are four alignment cases (with frequency)
	 *	(Based on measurements taken with a DECstation 5000/200
	 *	 inside a Mach kernel.)
	 *
	 * 	aligned   -> aligned		(mostly)
	 * 	unaligned -> aligned		(sometimes)
	 * 	aligned,unaligned -> unaligned	(almost never)
	 *
	 *	Note that we could add another case that checks if
	 *	the destination and source are unaligned but the
	 *	copy is alignable.  eg if src and dest are both
	 *	on a halfword boundary.
	 */
	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
	bne		t1,zero,3f		# dest unaligned
	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
	bne		t0,zero,5f

	/*
	 *	Forward aligned->aligned copy, 8 words at a time.
	 */
98:
	li		AT,-(SZREG*8)
	and		t0,SIZEREG,AT		# count truncated to multiples
	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
	sltu		AT,SRCREG,a3		# any work to do?
	beq		AT,zero,2f
	PTR_SUBU	SIZEREG,t0

	/*
	 *	loop body
	 */
1:	# cp
	REG_L		t3,(0*SZREG)(SRCREG)
	REG_L		v1,(1*SZREG)(SRCREG)
	REG_L		t0,(2*SZREG)(SRCREG)
	REG_L		t1,(3*SZREG)(SRCREG)
	PTR_ADDU	SRCREG,SZREG*8
	REG_S		t3,(0*SZREG)(DSTREG)
	REG_S		v1,(1*SZREG)(DSTREG)
	REG_S		t0,(2*SZREG)(DSTREG)
	REG_S		t1,(3*SZREG)(DSTREG)
	REG_L		t1,(-1*SZREG)(SRCREG)
	REG_L		t0,(-2*SZREG)(SRCREG)
	REG_L		v1,(-3*SZREG)(SRCREG)
	REG_L		t3,(-4*SZREG)(SRCREG)
	PTR_ADDU	DSTREG,SZREG*8
	REG_S		t1,(-1*SZREG)(DSTREG)
	REG_S		t0,(-2*SZREG)(DSTREG)
	REG_S		v1,(-3*SZREG)(DSTREG)
	bne		SRCREG,a3,1b
	REG_S		t3,(-4*SZREG)(DSTREG)

	/*
	 *	Copy a word at a time, no loop unrolling.
	 */
2:	# wordcopy
	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
	beq		t2,zero,3f
	PTR_ADDU	t0,SRCREG,t2		# stop at t0
	PTR_SUBU	SIZEREG,SIZEREG,t2
1:
	REG_L		t3,0(SRCREG)
	PTR_ADDU	SRCREG,SZREG
	REG_S		t3,0(DSTREG)
	bne		SRCREG,t0,1b
	PTR_ADDU	DSTREG,SZREG

3:	# bytecopy
	beq		SIZEREG,zero,4f		# nothing left to do?
	nop
1:
	lb		t3,0(SRCREG)
	PTR_ADDU	SRCREG,1
	sb		t3,0(DSTREG)
	PTR_SUBU	SIZEREG,1
	bgtz		SIZEREG,1b
	PTR_ADDU	DSTREG,1

4:	# copydone
	.set at		#-mfix-loongson2f-btb
	j	ra
	nop
	.set noat

	/*
	 *	Copy from unaligned source to aligned dest.
	 */
5:	# destaligned
	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
	beq		a3,zero,3b
	nop
	move		SIZEREG,t0		# this many to do after we are done
	PTR_ADDU	a3,SRCREG,a3		# stop point

1:
	REG_LHI		t3,0(SRCREG)
	REG_LLO		t3,SZREG-1(SRCREG)
	PTR_ADDI	SRCREG,SZREG
	REG_S		t3,0(DSTREG)
	bne		SRCREG,a3,1b
	PTR_ADDI	DSTREG,SZREG

	b		3b
	nop

6:	# backcopy -- based on above
	PTR_ADDU	SRCREG,SIZEREG
	PTR_ADDU	DSTREG,SIZEREG
	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
	bne		t1,zero,3f
	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
	bne		t0,zero,5f

	/*
	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
	 */
	li		AT,(-8*SZREG)
	and		t0,SIZEREG,AT		# count truncated to multiple of 32
	beq		t0,zero,2f		# any work to do?
	PTR_SUBU	SIZEREG,t0
	PTR_SUBU	a3,SRCREG,t0

	/*
	 *	loop body
	 */
1:	# cp
	REG_L		t3,(-4*SZREG)(SRCREG)
	REG_L		v1,(-3*SZREG)(SRCREG)
	REG_L		t0,(-2*SZREG)(SRCREG)
	REG_L		t1,(-1*SZREG)(SRCREG)
	PTR_SUBU	SRCREG,8*SZREG
	REG_S		t3,(-4*SZREG)(DSTREG)
	REG_S		v1,(-3*SZREG)(DSTREG)
	REG_S		t0,(-2*SZREG)(DSTREG)
	REG_S		t1,(-1*SZREG)(DSTREG)
	REG_L		t1,(3*SZREG)(SRCREG)
	REG_L		t0,(2*SZREG)(SRCREG)
	REG_L		v1,(1*SZREG)(SRCREG)
	REG_L		t3,(0*SZREG)(SRCREG)
	PTR_SUBU	DSTREG,8*SZREG
	REG_S		t1,(3*SZREG)(DSTREG)
	REG_S		t0,(2*SZREG)(DSTREG)
	REG_S		v1,(1*SZREG)(DSTREG)
	bne		SRCREG,a3,1b
	REG_S		t3,(0*SZREG)(DSTREG)

	/*
	 *	Copy a word at a time, no loop unrolling.
	 */
2:	# wordcopy
	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
	beq		t2,zero,3f
	PTR_SUBU	t0,SRCREG,t2		# stop at t0
	PTR_SUBU	SIZEREG,SIZEREG,t2
1:
	REG_L		t3,-SZREG(SRCREG)
	PTR_SUBU	SRCREG,SZREG
	REG_S		t3,-SZREG(DSTREG)
	bne		SRCREG,t0,1b
	PTR_SUBU	DSTREG,SZREG

3:	# bytecopy
	beq		SIZEREG,zero,4f		# nothing left to do?
	nop
1:
	lb		t3,-1(SRCREG)
	PTR_SUBU	SRCREG,1
	sb		t3,-1(DSTREG)
	PTR_SUBU	SIZEREG,1
	bgtz		SIZEREG,1b
	PTR_SUBU	DSTREG,1

4:	# copydone
	.set at		#-mfix-loongson2f-btb
	j	ra
	nop
	.set noat

	/*
	 *	Copy from unaligned source to aligned dest.
	 */
5:	# destaligned
	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
	beq		a3,zero,3b
	nop
	move		SIZEREG,t0		# this many to do after we are done
	PTR_SUBU	a3,SRCREG,a3		# stop point

1:
	REG_LHI		t3,-SZREG(SRCREG)
	REG_LLO		t3,-1(SRCREG)
	PTR_SUBU	SRCREG,SZREG
	REG_S		t3,-SZREG(DSTREG)
	bne		SRCREG,a3,1b
	PTR_SUBU	DSTREG,SZREG

	b		3b
	nop

	.set	reorder
	.set	at
	END(FUNCTION)