kernel-fxtec-pro1x/arch/mn10300/lib/do_csum.S

/* Optimised simple memory checksum
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public Licence
 * as published by the Free Software Foundation; either version
 * 2 of the Licence, or (at your option) any later version.
 */
#include <asm/cache.h>

        .section .text
        .balign	L1_CACHE_BYTES

###############################################################################
#
# unsigned int do_csum(const unsigned char *buff, size_t len)
#
###############################################################################
	.globl	do_csum
        .type	do_csum,@function
do_csum:
	movm	[d2,d3],(sp)
	mov	d0,(12,sp)
	mov	d1,(16,sp)
	mov	d1,d2				# count
	mov	d0,a0				# buff
	clr	d1				# accumulator

	cmp	+0,d2
	beq	do_csum_done			# return if zero-length buffer

	# 4-byte align the buffer pointer
	btst	+3,a0
	beq	do_csum_now_4b_aligned

	btst	+1,a0
	beq	do_csum_addr_not_odd
	movbu	(a0),d0
	inc	a0
	asl	+8,d0
	add	d0,d1
	addc	+0,d1
	add	-1,d2
do_csum_addr_not_odd:

	cmp	+2,d2
	bcs	do_csum_fewer_than_4
	btst	+2,a0
	beq	do_csum_now_4b_aligned
	movhu	(a0+),d0
	add	d0,d1
	addc	+0,d1
	add	-2,d2
	cmp	+4,d2
	bcs	do_csum_fewer_than_4

do_csum_now_4b_aligned:
	# we want to checksum as much as we can in chunks of 32 bytes
	cmp	+31,d2
	bls	do_csum_remainder		# 4-byte aligned remainder

	add	-32,d2
	mov	+32,d3

do_csum_loop:
	mov	(a0+),d0
	add	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	mov	(a0+),d0
	addc	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	addc	+0,d1

	sub	d3,d2
	bcc	do_csum_loop

	add	d3,d2
	beq	do_csum_done

do_csum_remainder:
	# cut 16-31 bytes down to 0-15
	cmp	+16,d2
	bcs	do_csum_fewer_than_16
	mov	(a0+),d0
	add	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	addc	+0,d1
	add	-16,d2
	beq	do_csum_done

do_csum_fewer_than_16:
	# copy the remaining whole words
	cmp	+4,d2
	bcs	do_csum_fewer_than_4
	cmp	+8,d2
	bcs	do_csum_one_word
	cmp	+12,d2
	bcs	do_csum_two_words
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_two_words:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_one_word:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1

do_csum_fewer_than_4:
	and	+3,d2
	beq	do_csum_done
	xor_cmp	d0,d0,+2,d2
	bcs	do_csum_fewer_than_2
	movhu	(a0+),d0
do_csum_fewer_than_2:
	and	+1,d2
	beq	do_csum_add_last_bit
	movbu	(a0),d3
	add	d3,d0
do_csum_add_last_bit:
	add	d0,d1
	addc	+0,d1

do_csum_done:
	# compress the checksum down to 16 bits
	mov	+0xffff0000,d2
	and	d1,d2
	asl	+16,d1
	add	d2,d1,d0
	addc	+0xffff,d0
	lsr	+16,d0

	# flip the halves of the word result if the buffer was oddly aligned
	mov	(12,sp),d1
	and	+1,d1
	beq	do_csum_not_oddly_aligned
	swaph	d0,d0				# exchange bits 15:8 with 7:0

do_csum_not_oddly_aligned:
	ret	[d2,d3],8

do_csum_end:
	.size	do_csum, do_csum_end-do_csum
mn10300: add the MN10300/AM33 architecture to the kernel Add architecture support for the MN10300/AM33 CPUs produced by MEI to the kernel. This patch also adds board support for the ASB2303 with the ASB2308 daughter board, and the ASB2305. The only processor supported is the MN103E010, which is an AM33v2 core plus on-chip devices. [akpm@linux-foundation.org: nuke cvs control strings] Signed-off-by: Masakazu Urade <urade.masakazu@jp.panasonic.com> Signed-off-by: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> 2008-02-08 05:19:31 -07:00			`/* Optimised simple memory checksum`
			`*`
			`* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.`
			`* Written by David Howells (dhowells@redhat.com)`
			`*`
			`* This program is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU General Public Licence`
			`* as published by the Free Software Foundation; either version`
			`* 2 of the Licence, or (at your option) any later version.`
			`*/`
			`#include <asm/cache.h>`

			`.section .text`
			`.balign L1_CACHE_BYTES`

			`###############################################################################`
			`#`
			`# unsigned int do_csum(const unsigned char *buff, size_t len)`
			`#`
			`###############################################################################`
			`.globl do_csum`
			`.type do_csum,@function`
			`do_csum:`
			`movm [d2,d3],(sp)`
			`mov d0,(12,sp)`
			`mov d1,(16,sp)`
			`mov d1,d2 # count`
			`mov d0,a0 # buff`
			`clr d1 # accumulator`

			`cmp +0,d2`
			`beq do_csum_done # return if zero-length buffer`

			`# 4-byte align the buffer pointer`
			`btst +3,a0`
			`beq do_csum_now_4b_aligned`

			`btst +1,a0`
			`beq do_csum_addr_not_odd`
			`movbu (a0),d0`
			`inc a0`
			`asl +8,d0`
			`add d0,d1`
			`addc +0,d1`
			`add -1,d2`
			`do_csum_addr_not_odd:`

			`cmp +2,d2`
			`bcs do_csum_fewer_than_4`
			`btst +2,a0`
			`beq do_csum_now_4b_aligned`
			`movhu (a0+),d0`
			`add d0,d1`
			`addc +0,d1`
			`add -2,d2`
			`cmp +4,d2`
			`bcs do_csum_fewer_than_4`

			`do_csum_now_4b_aligned:`
			`# we want to checksum as much as we can in chunks of 32 bytes`
			`cmp +31,d2`
			`bls do_csum_remainder # 4-byte aligned remainder`

			`add -32,d2`
			`mov +32,d3`

			`do_csum_loop:`
			`mov (a0+),d0`
			`add d0,d1`
			`mov (a0+),e0`
			`addc e0,d1`
			`mov (a0+),e1`
			`addc e1,d1`
			`mov (a0+),e3`
			`addc e3,d1`
			`mov (a0+),d0`
			`addc d0,d1`
			`mov (a0+),e0`
			`addc e0,d1`
			`mov (a0+),e1`
			`addc e1,d1`
			`mov (a0+),e3`
			`addc e3,d1`
			`addc +0,d1`

			`sub d3,d2`
			`bcc do_csum_loop`

			`add d3,d2`
			`beq do_csum_done`

			`do_csum_remainder:`
			`# cut 16-31 bytes down to 0-15`
			`cmp +16,d2`
			`bcs do_csum_fewer_than_16`
			`mov (a0+),d0`
			`add d0,d1`
			`mov (a0+),e0`
			`addc e0,d1`
			`mov (a0+),e1`
			`addc e1,d1`
			`mov (a0+),e3`
			`addc e3,d1`
			`addc +0,d1`
			`add -16,d2`
			`beq do_csum_done`

			`do_csum_fewer_than_16:`
			`# copy the remaining whole words`
			`cmp +4,d2`
			`bcs do_csum_fewer_than_4`
			`cmp +8,d2`
			`bcs do_csum_one_word`
			`cmp +12,d2`
			`bcs do_csum_two_words`
			`mov (a0+),d0`
			`add d0,d1`
			`addc +0,d1`
			`do_csum_two_words:`
			`mov (a0+),d0`
			`add d0,d1`
			`addc +0,d1`
			`do_csum_one_word:`
			`mov (a0+),d0`
			`add d0,d1`
			`addc +0,d1`

			`do_csum_fewer_than_4:`
			`and +3,d2`
			`beq do_csum_done`
			`xor_cmp d0,d0,+2,d2`
			`bcs do_csum_fewer_than_2`
			`movhu (a0+),d0`
			`do_csum_fewer_than_2:`
			`and +1,d2`
			`beq do_csum_add_last_bit`
			`movbu (a0),d3`
			`add d3,d0`
			`do_csum_add_last_bit:`
			`add d0,d1`
			`addc +0,d1`

			`do_csum_done:`
			`# compress the checksum down to 16 bits`
			`mov +0xffff0000,d2`
			`and d1,d2`
			`asl +16,d1`
			`add d2,d1,d0`
			`addc +0xffff,d0`
			`lsr +16,d0`

			`# flip the halves of the word result if the buffer was oddly aligned`
			`mov (12,sp),d1`
			`and +1,d1`
			`beq do_csum_not_oddly_aligned`
			`swaph d0,d0 # exchange bits 15:8 with 7:0`

			`do_csum_not_oddly_aligned:`
			`ret [d2,d3],8`

			`do_csum_end:`
			`.size do_csum, do_csum_end-do_csum`